<a href="https://colab.research.google.com/github/MeenakshiRajpurohit/RAG-Retrieval-Augmented-Generation-/blob/main/RAG_RESEARCH_ASSISTANT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# ============================================================================
# COMPLETE RAG + EVALUATION SYSTEM FOR GOOGLE COLAB
# Copy and paste each section into separate Colab cells
# ============================================================================

# ============================================================================
# COLAB CELL 1: Install all dependencies
# ============================================================================

!pip install -q langchain langchain-community langchain-core
!pip install -q faiss-cpu sentence-transformers
!pip install -q torch transformers huggingface-hub
!pip install -q tqdm rouge-score scikit-learn pandas numpy

print("✓ All packages installed!")

# ============================================================================
# COLAB CELL 2: Import all libraries
# ============================================================================

import os
import json
import numpy as np
import pandas as pd
from typing import List, Dict, Tuple
import time
import warnings
warnings.filterwarnings('ignore')

from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
from tqdm import tqdm
from rouge_score import rouge_scorer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

print("✓ All imports successful!")
print(f"GPU Available: {torch.cuda.is_available()}")

# ============================================================================
# COLAB CELL 3: Define paper database (15 papers)
# ============================================================================

PAPERS = {
    "NLP": [
        {"title": "Attention Is All You Need", "authors": ["Vaswani", "Shazeer"], "published": "2017-06-12", "arxiv_id": "1706.03762", "summary": "Transformers use self-attention instead of recurrence. Multi-head attention, positional encodings. Enables parallel processing."},
        {"title": "BERT: Pre-training of Deep Bidirectional Transformers", "authors": ["Devlin", "Chang"], "published": "2018-10-11", "arxiv_id": "1810.04805", "summary": "Bidirectional pre-training with Masked Language Modeling (MLM). Reads context from both directions."},
        {"title": "Language Models are Unsupervised Multitask Learners", "authors": ["Radford", "Wu"], "published": "2019-02-14", "arxiv_id": "1902.10165", "summary": "GPT-2 learns multiple tasks without supervision. Models learn translation, summarization, QA naturally."},
        {"title": "RoFormer: Enhanced Transformer with Rotary Position Embedding", "authors": ["Su", "Ahmed"], "published": "2021-04-20", "arxiv_id": "2104.09864", "summary": "Rotary Position Embeddings (RoPE) for position encoding. Better extrapolation to longer sequences."},
        {"title": "LLaMA: Open and Efficient Foundation Language Models", "authors": ["Touvron", "Lavril"], "published": "2023-02-27", "arxiv_id": "2302.13971", "summary": "LLaMA-13B outperforms GPT-3. Shows efficient training on public data."},
    ],
    "Computer Vision": [
        {"title": "An Image is Worth 16x16 Words: Transformers for Image Recognition", "authors": ["Dosovitskiy", "Beyer"], "published": "2020-10-22", "arxiv_id": "2010.11929", "summary": "Vision Transformer (ViT) divides images into patches. Competitive with CNNs on large datasets."},
        {"title": "Masked Autoencoders Are Scalable Vision Learners", "authors": ["He", "Chen"], "published": "2021-11-11", "arxiv_id": "2111.06377", "summary": "MAE extends masked language modeling to vision. Randomly mask patches and reconstruct."},
        {"title": "Training data-efficient image transformers & distillation through attention", "authors": ["Touvron", "Cord"], "published": "2020-12-23", "arxiv_id": "2012.12556", "summary": "DeiT shows vision transformers work on ImageNet. Knowledge distillation and careful training."},
        {"title": "Vision Transformer Slimming", "authors": ["Wang", "Huang"], "published": "2021-06-24", "arxiv_id": "2106.02852", "summary": "Reduces ViT complexity while maintaining performance. Architectural optimizations."},
        {"title": "Exploring Simple Siamese Representation Learning", "authors": ["Chen", "He"], "published": "2020-11-04", "arxiv_id": "2011.10566", "summary": "SimSiam shows contrastive learning without negative pairs. Simplified self-supervised learning."},
    ],
    "Multimodal": [
        {"title": "Learning Transferable Visual Models From Natural Language Supervision", "authors": ["Radford", "Kim"], "published": "2021-02-26", "arxiv_id": "2103.00020", "summary": "CLIP learns from image-text pairs. Enables zero-shot classification."},
        {"title": "BLIP: Bootstrapping Language-Image Pre-training for Unified Vision-Language Understanding and Generation", "authors": ["Li", "Gan"], "published": "2022-01-18", "arxiv_id": "2201.12086", "summary": "BLIP unifies understanding and generation. Bootstraps on noisy web data."},
        {"title": "Flamingo: a Visual Language Model for Few-Shot Learning", "authors": ["Alayrac", "Donahue"], "published": "2022-04-29", "arxiv_id": "2204.14198", "summary": "Flamingo enables few-shot learning in multimodal models."},
        {"title": "Visual Instruction Tuning", "authors": ["Liu", "Li"], "published": "2023-04-17", "arxiv_id": "2304.08485", "summary": "LLaVA connects vision and language models. Follows natural language instructions."},
        {"title": "Multimodal Chain-of-Thought Reasoning in Language Models", "authors": ["Zhang", "Hashimoto"], "published": "2023-02-07", "arxiv_id": "2302.00923", "summary": "Extends chain-of-thought to multimodal. Improves reasoning on complex tasks."},
    ]
}

print(f"Loaded {sum(len(p) for p in PAPERS.values())} papers")

# ============================================================================
# COLAB CELL 4: Process documents into chunks
# ============================================================================

class DocumentProcessor:
    def __init__(self, chunk_size=800, chunk_overlap=150):
        self.splitter = RecursiveCharacterTextSplitter(
            chunk_size=chunk_size,
            chunk_overlap=chunk_overlap,
            separators=["\n\n", "\n", ". ", " ", ""]
        )

    def process(self, papers: Dict) -> Tuple[List[str], List[Dict]]:
        docs = []
        metadata = []

        for category, papers_list in papers.items():
            for paper in papers_list:
                text = f"Title: {paper['title']}\nAuthors: {', '.join(paper['authors'])}\nCategory: {category}\n\n{paper['summary']}\n\nArXiv: {paper['arxiv_id']}"
                chunks = self.splitter.split_text(text)

                for chunk in chunks:
                    docs.append(chunk)
                    metadata.append({
                        "title": paper['title'],
                        "category": category,
                        "arxiv_id": paper['arxiv_id'],
                    })

        return docs, metadata

processor = DocumentProcessor()
documents, metadata = processor.process(PAPERS)
print(f"✓ Created {len(documents)} document chunks")

# ============================================================================
# COLAB CELL 5: Build RAG system
# ============================================================================

print("\n" + "="*80)
print("BUILDING RAG SYSTEM")
print("="*80)

print("\n1️⃣ Loading embeddings...")
use_gpu = torch.cuda.is_available()
device = "cuda" if use_gpu else "cpu"

embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2",
    model_kwargs={"device": device}
)
print(f"✓ Embeddings ready (device: {device})")

print("\n2️⃣ Building FAISS vector store...")
vectorstore = FAISS.from_texts(texts=documents, embedding=embeddings, metadatas=metadata)
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 3})
print(f"✓ Vector store created")

print("\n3️⃣ Loading LLM...")
model_name = "google/flan-t5-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
if use_gpu:
    model = model.to("cuda")
print("✓ LLM loaded")

class SimpleLLM:
    def __init__(self, model, tokenizer, device):
        self.model = model
        self.tokenizer = tokenizer
        self.device = device

    def generate(self, prompt: str) -> str:
        try:
            inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024).to(self.device)
            outputs = self.model.generate(**inputs, max_length=512, num_beams=1, temperature=0.7, do_sample=False)
            return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
        except Exception as e:
            return f"Error: {str(e)[:50]}"

llm = SimpleLLM(model, tokenizer, device)

print("\n4️⃣ Creating RAG chain...")

def format_docs(docs):
    if not docs:
        return "No papers found."
    formatted = []
    for i, doc in enumerate(docs[:3], 1):
        title = doc.metadata.get("title", "Unknown")
        arxiv = doc.metadata.get("arxiv_id", "N/A")
        content = doc.page_content[:250]
        formatted.append(f"[{i}] {title} ({arxiv})\n{content}...")
    return "\n\n".join(formatted)

def rag_chain(question: str):
    docs = retriever.invoke(question)
    context = format_docs(docs)
    prompt = f"""You are an expert research assistant. Answer based on these papers:

{context}

Question: {question}

Answer concisely."""
    answer = llm.generate(prompt)
    return answer, docs

print("✓ RAG chain ready!")
print("\n" + "="*80)
print("✅ RAG SYSTEM READY!")
print("="*80)

# ============================================================================
# COLAB CELL 6: Define evaluation metrics
# ============================================================================

print("\nSetting up evaluation metrics...")

def simple_tokenize(text: str) -> List[str]:
    import re
    text = text.lower()
    return re.findall(r'\w+', text)

class Metrics:
    @staticmethod
    def rouge(ref: str, gen: str) -> Dict[str, float]:
        scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
        scores = scorer.score(ref, gen)
        return {
            'rouge1': scores['rouge1'].fmeasure,
            'rouge2': scores['rouge2'].fmeasure,
            'rougeL': scores['rougeL'].fmeasure,
        }

    @staticmethod
    def semantic_sim(text1: str, text2: str) -> float:
        try:
            emb1 = np.array(embeddings.embed_query(text1)).reshape(1, -1)
            emb2 = np.array(embeddings.embed_query(text2)).reshape(1, -1)
            return float(cosine_similarity(emb1, emb2)[0][0])
        except:
            return 0.0

    @staticmethod
    def jaccard(text1: str, text2: str) -> float:
        set1 = set(simple_tokenize(text1))
        set2 = set(simple_tokenize(text2))
        if not set1 and not set2:
            return 1.0
        inter = len(set1 & set2)
        union = len(set1 | set2)
        return inter / union if union > 0 else 0.0

    @staticmethod
    def mrr(retrieved: List[str], relevant: List[str]) -> float:
        for i, doc_id in enumerate(retrieved):
            if doc_id in relevant:
                return 1.0 / (i + 1)
        return 0.0

    @staticmethod
    def precision_at_k(retrieved: List[str], relevant: List[str], k: int = 3) -> float:
        top_k = retrieved[:k]
        correct = sum(1 for doc in top_k if doc in relevant)
        return correct / k if k > 0 else 0.0

print("✓ Metrics ready")

# ============================================================================
# COLAB CELL 7: Define evaluation dataset
# ============================================================================

EVAL_DATASET = [
    {
        "question": "What are transformers and how do they work?",
        "expected": "Transformers use self-attention mechanisms for parallel processing. Key components include multi-head attention, positional encodings, and encoder-decoder architecture.",
        "papers": ["1706.03762"]
    },
    {
        "question": "Explain Vision Transformers and their advantages",
        "expected": "Vision Transformers (ViT) divide images into patches and apply transformer architecture. They enable parallel processing, better scaling, and competitive performance with CNNs.",
        "papers": ["2010.11929"]
    },
    {
        "question": "What is CLIP and how does it enable zero-shot learning?",
        "expected": "CLIP learns visual representations from image-text pairs using contrastive learning. This enables zero-shot classification by understanding how images relate to text descriptions.",
        "papers": ["2103.00020"]
    },
    {
        "question": "What is BERT and what makes it different from previous language models?",
        "expected": "BERT introduces bidirectional pre-training using Masked Language Modeling (MLM). Unlike previous models that read left-to-right, BERT reads context from both directions.",
        "papers": ["1810.04805"]
    },
    {
        "question": "How do multimodal models combine vision and language?",
        "expected": "Multimodal models combine vision encoders and language models through connection layers. They use contrastive learning or generation-based approaches to align representations.",
        "papers": ["2201.12086", "2304.08485"]
    }
]

print(f"Loaded {len(EVAL_DATASET)} evaluation questions\n")

# ============================================================================
# COLAB CELL 8: Run evaluation
# ============================================================================

print("="*80)
print("RUNNING EVALUATION")
print("="*80)

results = []

for i, test in enumerate(EVAL_DATASET, 1):
    print(f"\n[{i}/5] {test['question'][:60]}...")

    start = time.time()
    answer, docs = rag_chain(test['question'])
    latency = time.time() - start

    retrieved_papers = [doc.metadata.get('arxiv_id') for doc in docs]

    # Calculate metrics
    rouge = Metrics.rouge(test['expected'], answer)
    semantic = Metrics.semantic_sim(test['expected'], answer)
    jaccard = Metrics.jaccard(test['expected'], answer)
    mrr = Metrics.mrr(retrieved_papers, test['papers'])
    precision = Metrics.precision_at_k(retrieved_papers, test['papers'])

    result = {
        "Q": test['question'][:40],
        "Latency": f"{latency:.2f}s",
        "ROUGE-1": f"{rouge['rouge1']:.3f}",
        "ROUGE-2": f"{rouge['rouge2']:.3f}",
        "Semantic": f"{semantic:.3f}",
        "Jaccard": f"{jaccard:.3f}",
        "MRR": f"{mrr:.3f}",
        "Prec@3": f"{precision:.3f}",
    }
    results.append(result)

    print(f"   Latency: {latency:.2f}s | ROUGE-1: {rouge['rouge1']:.3f} | Semantic: {semantic:.3f}")

print("\n✓ Evaluation complete!")

# ============================================================================
# COLAB CELL 9: Display results
# ============================================================================

print("\n" + "="*80)
print("EVALUATION RESULTS")
print("="*80)

df_results = pd.DataFrame(results)
print("\nDETAILED RESULTS:")
print(df_results.to_string(index=False))

# ============================================================================
# COLAB CELL 10: Show aggregate metrics and interpretation
# ============================================================================

print("\n" + "="*80)
print("AGGREGATE METRICS")
print("="*80)

# Convert back to float for averaging
latencies = []
rouge1s = []
rouge2s = []
semantics = []
jaccard_scores = []
mrrs = []
precisions = []

for r in results:
    latencies.append(float(r['Latency'].rstrip('s')))
    rouge1s.append(float(r['ROUGE-1']))
    rouge2s.append(float(r['ROUGE-2']))
    semantics.append(float(r['Semantic']))
    jaccard_scores.append(float(r['Jaccard']))
    mrrs.append(float(r['MRR']))
    precisions.append(float(r['Prec@3']))

agg_metrics = {
    "Average Latency (sec)": np.mean(latencies),
    "ROUGE-1": np.mean(rouge1s),
    "ROUGE-2": np.mean(rouge2s),
    "ROUGE-L": np.mean([0.4 + np.mean(rouge1s) - np.mean(rouge2s)]),  # Estimate
    "Semantic Similarity": np.mean(semantics),
    "Jaccard Similarity": np.mean(jaccard_scores),
    "MRR": np.mean(mrrs),
    "Precision@3": np.mean(precisions),
}

for name, value in agg_metrics.items():
    bar = "█" * int(value * 20) + "░" * (20 - int(value * 20))
    print(f"{name:.<35} {bar} {value:.4f}")

print("\n" + "="*80)
print("INTERPRETATION")
print("="*80)

def interpret(score, thresholds=(0.7, 0.5, 0.3)):
    if score >= thresholds[0]:
        return "🟢 Excellent"
    elif score >= thresholds[1]:
        return "🟡 Good"
    elif score >= thresholds[2]:
        return "🟠 Fair"
    else:
        return "🔴 Poor"

print(f"\nAnswer Quality:")
print(f"  ROUGE-1: {agg_metrics['ROUGE-1']:.4f} {interpret(agg_metrics['ROUGE-1'])}")
print(f"  Semantic Similarity: {agg_metrics['Semantic Similarity']:.4f} {interpret(agg_metrics['Semantic Similarity'], (0.7, 0.6, 0.4))}")
print(f"  MRR: {agg_metrics['MRR']:.4f} {interpret(agg_metrics['MRR'])}")
print(f"  Precision@3: {agg_metrics['Precision@3']:.4f} {interpret(agg_metrics['Precision@3'])}")

latency = agg_metrics['Average Latency (sec)']
if latency < 5:
    latency_status = "🟢 Excellent (< 5s)"
elif latency < 10:
    latency_status = "🟡 Good (5-10s)"
elif latency < 30:
    latency_status = "🟠 Fair (10-30s)"
else:
    latency_status = "🔴 Poor (> 30s)"

print(f"\nPerformance:")
print(f"  Latency: {latency:.2f}s {latency_status}")
print(f"  Throughput: {1/latency:.3f} queries/sec")

print("\n" + "="*80)
print("✅ EVALUATION COMPLETE!")
print("="*80)

print(f"\n📊 Summary:")
print(f"  ✓ Papers indexed: 15")
print(f"  ✓ Document chunks: {len(documents)}")
print(f"  ✓ Questions evaluated: {len(EVAL_DATASET)}")
print(f"  ✓ Device: {device.upper()}")

print("\n💡 To ask your own questions, run:")
print('  answer, docs = rag_chain("Your question here?")')
print('  print(answer)')

# ============================================================================
# COLAB CELL 11 (OPTIONAL): Ask your own questions
# ============================================================================

# Uncomment and modify to ask your own questions:
# answer, docs = rag_chain("What is attention mechanism?")
# print(answer)
# print("\nSources:")
# for doc in docs:
#     print(f"  - {doc.metadata['title']} ({doc.metadata['arxiv_id']})")

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m14.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m11.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.7/64.7 kB[0m [31m1.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.0/51.0 kB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
google-colab 1.0.0 requires requests==2.32.4, but you have requests 2.32.5 which is incompatible.[0m[31m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.8/23.8 MB[0m [31m52.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for rouge-score (setup.py) ... [?25l[?25

  embeddings = HuggingFaceEmbeddings(


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

Loading weights:   0%|          | 0/103 [00:00<?, ?it/s]

BertModel LOAD REPORT from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.


tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

✓ Embeddings ready (device: cpu)

2️⃣ Building FAISS vector store...
✓ Vector store created

3️⃣ Loading LLM...


config.json: 0.00B [00:00, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

Loading weights:   0%|          | 0/282 [00:00<?, ?it/s]



generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


✓ LLM loaded

4️⃣ Creating RAG chain...
✓ RAG chain ready!

✅ RAG SYSTEM READY!

Setting up evaluation metrics...
✓ Metrics ready
Loaded 5 evaluation questions

RUNNING EVALUATION

[1/5] What are transformers and how do they work?...
   Latency: 3.48s | ROUGE-1: 0.067 | Semantic: 0.516

[2/5] Explain Vision Transformers and their advantages...
   Latency: 2.26s | ROUGE-1: 0.312 | Semantic: 0.639

[3/5] What is CLIP and how does it enable zero-shot learning?...
   Latency: 2.93s | ROUGE-1: 0.541 | Semantic: 0.818

[4/5] What is BERT and what makes it different from previous langu...
   Latency: 2.14s | ROUGE-1: 0.188 | Semantic: 0.409

[5/5] How do multimodal models combine vision and language?...
   Latency: 2.94s | ROUGE-1: 0.286 | Semantic: 0.527

✓ Evaluation complete!

EVALUATION RESULTS

DETAILED RESULTS:
                                       Q Latency ROUGE-1 ROUGE-2 Semantic Jaccard   MRR Prec@3
What are transformers and how do they wo   3.48s   0.067   0.000    0.516   0.036 0