In [3]:
# Install Python libraries for fine-tuning and evaluation
!pip install mistralai matplotlib nltk rouge-score
!pip install rouge-score



In [4]:
import os
import json
import random
import time
import matplotlib.pyplot as plt
from nltk.translate.bleu_score import sentence_bleu
from rouge_score import rouge_scorer
from mistralai import Mistral

# Mock retriever function (replace with an actual retriever for real use)
def retrieve_context(prompt):
    """
    Mock retrieval function: Simulates retrieving relevant documents based on the input prompt.
    Replace this with a proper retrieval system (e.g., FAISS, Pinecone, Elasticsearch).
    """
    knowledge_base = [
        "Eat a balanced diet with lean protein, complex carbs, and healthy fats.",
        "Consider chicken, eggs, quinoa, and avocado for muscle gain.",
        "Include strength training exercises for optimal results.",
    ]
    # For simplicity, just return the first result
    return knowledge_base[0]

# API Key and Model
api_key = "0gEpvMu7NKCEYwuaL595Bebh6Dwrjgkf"
model = "mistral-large-latest"
client = Mistral(api_key=api_key)

# File paths
input_file = "/kaggle/input/dataset-hybrid-gen/dataset2.jsonl"
training_file = "/kaggle/working/training_data.jsonl"
validation_file = "/kaggle/working/validation_data.jsonl"

# Read and prepare the dataset
with open(input_file, "r") as f:
    lines = f.readlines()

random.shuffle(lines)

# Split into training (90%) and validation (10%)
split_idx = int(0.9 * len(lines))
training_lines = lines[:split_idx]
validation_lines = lines[split_idx:]

with open(training_file, "w") as f_train:
    f_train.writelines(training_lines)
with open(validation_file, "w") as f_valid:
    f_valid.writelines(validation_lines)

print("Training and validation datasets created successfully!")

# Upload training and validation files
training_data = client.files.upload(
    file={"file_name": training_file, "content": open(training_file, "rb")}
)
validation_data = client.files.upload(
    file={"file_name": validation_file, "content": open(validation_file, "rb")}
)
print("Training File ID:", training_data.id)
print("Validation File ID:", validation_data.id)

# Fine-tuning job
created_jobs = client.fine_tuning.jobs.create(
    model="open-mistral-7b",
    training_files=[{"file_id": training_data.id, "weight": 1}],
    validation_files=[validation_data.id],
    hyperparameters={"training_steps": 10, "learning_rate": 0.0001},
    auto_start=True,
)

# Poll for job status
job_id = created_jobs.id
while True:
    job_details = client.fine_tuning.jobs.get(job_id=job_id)
    status = job_details.status
    print(f"Job Status: {status}")
    if status == "SUCCESS":
        break
    elif status in ["FAILED_VALIDATION", "CANCELLED"]:
        print("Job failed. Details:", job_details)
        raise Exception(f"Job {job_id} has status {status}")
    time.sleep(10)

fine_tuned_model_id = job_details.fine_tuned_model
print(f"Fine-tuned model ID: {fine_tuned_model_id}")

# RAG + Completion
test_prompt = "I am a 25-year-old male looking to gain muscle. What should I eat?"
retrieved_context = retrieve_context(test_prompt)  # Retrieval step
augmented_prompt = f"Context: {retrieved_context}\nQuestion: {test_prompt}"
chat_response = client.chat.complete(
    model=fine_tuned_model_id,
    messages=[{"role": "user", "content": augmented_prompt}],
)
response = chat_response.choices[0].message.content
print("Response:", response)



Training and validation datasets created successfully!
Training File ID: 987cb53e-a6f1-4412-9ac1-226d6951c767
Validation File ID: 802e00fd-2092-4a59-8698-b12df60554e0
Job Status: RUNNING
Job Status: RUNNING
Job Status: RUNNING
Job Status: RUNNING
Job Status: RUNNING
Job Status: RUNNING
Job Status: RUNNING
Job Status: RUNNING
Job Status: RUNNING
Job Status: RUNNING
Job Status: RUNNING
Job Status: SUCCESS
Fine-tuned model ID: ft:open-mistral-7b:0e2425c9:20241207:61369689
Response: For gaining muscle, you should focus on high-protein foods such as lean meats, fish, beans, lentils, nuts, and seeds. Complex carbohydrates like whole grains, brown rice, and sweet potatoes provide the energy needed for muscle growth. Include healthy fats like avocados, olive oil, and nuts in your diet. Additionally, stay hydrated by drinking enough water throughout the day. Consult a registered dietitian or healthcare professional for personalized advice based on your specific health needs.


In [9]:
!pip install bert-score sentence-transformers rouge-score nltk

from bert_score import score as bert_score
from nltk.translate.bleu_score import sentence_bleu
from rouge_score import rouge_scorer
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer

# Initialize necessary models
bert_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
rouge_scorer_instance = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)

# Example data (replace these with your real inputs)
ground_truths = [
    "The quick brown fox jumps over the lazy dog.",
    "Artificial intelligence is transforming industries.",
]
generated_responses = [
    "The fast brown fox leaps over the idle dog.",
    "AI is changing various fields.",
]

# Compute embeddings for cosine similarity
ground_embeddings = bert_model.encode(ground_truths)
generated_embeddings = bert_model.encode(generated_responses)

# BLEU and ROUGE scores
bleu_scores = [
    sentence_bleu([gt.split()], gen.split()) for gt, gen in zip(ground_truths, generated_responses)
]
rouge_1_scores = [
    rouge_scorer_instance.score(gen, gt)['rouge1'].fmeasure for gt, gen in zip(ground_truths, generated_responses)
]

# Cosine similarities
similarities = cosine_similarity(ground_embeddings, generated_embeddings)

# Compute average scores
average_bleu = sum(bleu_scores) / len(bleu_scores)
average_rouge_1 = sum(rouge_1_scores) / len(rouge_1_scores)
average_cosine_similarity = similarities.diagonal().mean()

# Display results
print(f"Average BLEU: {average_bleu:.4f}")
print(f"Average ROUGE-1: {average_rouge_1:.4f}")
print(f"Average Cosine Similarity: {average_cosine_similarity:.4f}")


Collecting sentence-transformers
  Downloading sentence_transformers-3.3.1-py3-none-any.whl.metadata (10 kB)
Downloading sentence_transformers-3.3.1-py3-none-any.whl (268 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m268.8/268.8 kB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sentence-transformers
Successfully installed sentence-transformers-3.3.1


modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/3.73k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/314 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Average BLEU: 0.6538
Average ROUGE-1: 0.4333
Average Cosine Similarity: 0.7853


Corpus/Sentence contains 0 counts of 3-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().
Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().
