<a href="https://colab.research.google.com/github/Deep-Dey1/MISTRAL-RAG/blob/main/Result_analysis_finetuning_vs_rag_based_on_mistral_7b.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Step 1: Clear GPU & System RAM

In [6]:
import torch
import gc

# Clear GPU cache
torch.cuda.empty_cache()

# Run garbage collection
gc.collect()

# Restart runtime (optional, but ensures a fresh start)
!kill -9 -1  # WARNING: This will restart your Colab session.

Step 2: Install Dependencies

In [1]:
!pip install torch transformers datasets peft sentence-transformers faiss-cpu wandb matplotlib scikit-learn


Collecting datasets
  Downloading datasets-3.3.2-py3-none-any.whl.metadata (19 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from tor

Step 3: Load 20 Queries from the Alpaca Dataset

In [2]:
from datasets import load_dataset

# Load the dataset
dataset = load_dataset("tatsu-lab/alpaca", split="train")

# Select 20 queries
eval_data = dataset.shuffle(seed=42).select(range(20))

# Format input queries
eval_prompts = [
    f"###Human: {ex['instruction']} {ex['input']} ###Assistant:" for ex in eval_data
]

# Ground truth responses
ground_truth_responses = [ex["output"] for ex in eval_data]


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/7.47k [00:00<?, ?B/s]

(…)-00000-of-00001-a09b74b3ef9c3b56.parquet:   0%|          | 0.00/24.2M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/52002 [00:00<?, ? examples/s]

Step 4: Load Tokenizer & Finetuned Model

In [4]:
! huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) y
Token is valid (permission: write).
The token `mistral-rag-push` has been saved to /root/.cache/huggingface/stored_tokens
[1m[31mCannot authenticate through git-credential as no helper is defined on your machine.
You might have to re-a

In [None]:
import torch
from transformers import AutoTokenizer
from peft import AutoPeftModelForCausalLM

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained("deep0210/mistral-finetuned-alpaca")

# Load finetuned model with PEFT (fixing LoRA mismatch)
model_finetuned = AutoPeftModelForCausalLM.from_pretrained(
    "deep0210/mistral-finetuned-alpaca",
    device_map="auto"
)

# Move model to GPU
model_finetuned.to("cuda")


 Step 5: Generate Responses from the Finetuned Model

In [None]:
def generate_responses(model, tokenizer, prompts):
    model.eval()
    responses = []

    for prompt in prompts:
        inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

        with torch.no_grad():
            output = model.generate(**inputs, max_length=200)

        generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
        responses.append(generated_text)

    return responses

# Get finetuned model responses
finetuned_responses = generate_responses(model_finetuned, tokenizer, eval_prompts)


Step 6: Compute Performance Metrics

In [None]:
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
from datasets import load_metric
import numpy as np

# Load evaluation models
bleu = load_metric("sacrebleu")
rouge = load_metric("rouge")
embedder = SentenceTransformer("all-MiniLM-L6-v2")

# Function to compute metrics
def evaluate_model(generated, reference):
    # Compute BLEU
    bleu_score = bleu.compute(predictions=generated, references=[[r] for r in reference])["score"]

    # Compute ROUGE
    rouge_scores = rouge.compute(predictions=generated, references=reference)

    # Compute Cosine Similarity using embeddings
    gen_embeddings = embedder.encode(generated)
    ref_embeddings = embedder.encode(reference)
    cosine_sim = np.mean([cosine_similarity([g], [r])[0][0] for g, r in zip(gen_embeddings, ref_embeddings)])

    return {
        "BLEU": bleu_score,
        "ROUGE-L": rouge_scores["rougeL"].mid.fmeasure,
        "Cosine Similarity": cosine_sim
    }

# Evaluate finetuned model
finetuned_metrics = evaluate_model(finetuned_responses, ground_truth_responses)
print(f"Finetuned Model Metrics: {finetuned_metrics}")


Step 7: Plot Performance Graphs

In [None]:
import matplotlib.pyplot as plt

# Plot function
def plot_metrics(metrics, title):
    keys = list(metrics.keys())
    values = list(metrics.values())

    plt.figure(figsize=(8, 5))
    plt.bar(keys, values, color=['blue', 'green', 'red'])
    plt.xlabel("Metric")
    plt.ylabel("Score")
    plt.title(title)
    plt.ylim(0, 1)  # Normalize to 0-1 scale
    plt.show()

# Plot finetuned model metrics
plot_metrics(finetuned_metrics, "Finetuned Mistral Model Performance")


Step 8: Repeat for RAG Model

In [None]:
# Load RAG model
model_rag = AutoModelForCausalLM.from_pretrained("deep0210/mistral-rag", device_map="auto").to("cuda")

# Generate responses from RAG model
rag_responses = generate_responses(model_rag, tokenizer, eval_prompts)

# Evaluate RAG model
rag_metrics = evaluate_model(rag_responses, ground_truth_responses)
print(f"RAG Model Metrics: {rag_metrics}")

# Plot RAG model performance
plot_metrics(rag_metrics, "RAG Mistral Model Performance")


Step 9: Compare Both Models

In [None]:
# Compare metrics
labels = list(finetuned_metrics.keys())
finetuned_scores = list(finetuned_metrics.values())
rag_scores = list(rag_metrics.values())

x = np.arange(len(labels))  # Label locations
width = 0.35  # Width of bars

fig, ax = plt.subplots(figsize=(8, 5))
rects1 = ax.bar(x - width/2, finetuned_scores, width, label="Finetuned")
rects2 = ax.bar(x + width/2, rag_scores, width, label="RAG")

ax.set_xlabel("Metric")
ax.set_ylabel("Score")
ax.set_title("Comparison of Finetuned vs RAG Model Performance")
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.legend()

plt.show()
