# RAG Demo - Reproducible End-to-End

This notebook demonstrates the complete RAG pipeline:
1. Load sample dataset
2. Compute embeddings
3. Build FAISS index
4. Run sample queries
5. Evaluate results

In [None]:
# Install dependencies (if running in Colab)
!pip install sentence-transformers faiss-cpu pandas numpy matplotlib

In [None]:
import json
import numpy as np
import pandas as pd
from sentence_transformers import SentenceTransformer
import faiss
import matplotlib.pyplot as plt

## 1. Load Sample Dataset

In [None]:
# Load documents
df = pd.read_csv('../data/sample/documents.csv')
print(f"Loaded {len(df)} documents")
df.head()

## 2. Compute Embeddings

In [None]:
# Load embedding model
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
print(f"Model: {model}")
print(f"Embedding dimension: {model.get_sentence_embedding_dimension()}")

In [None]:
# Encode documents
texts = df['text'].tolist()
embeddings = model.encode(texts, show_progress_bar=True)
print(f"Embeddings shape: {embeddings.shape}")

## 3. Build FAISS Index

In [None]:
# Create FAISS index
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(np.array(embeddings, dtype=np.float32))
print(f"Index contains {index.ntotal} vectors")

## 4. Run Sample Queries

In [None]:
def search(query, k=5):
    """Search for top-k similar documents"""
    query_embedding = model.encode([query])[0]
    query_embedding = np.array([query_embedding], dtype=np.float32)
    
    distances, indices = index.search(query_embedding, k)
    
    results = []
    for idx, dist in zip(indices[0], distances[0]):
        results.append({
            'title': df.iloc[idx]['title'],
            'text': df.iloc[idx]['text'],
            'distance': float(dist),
            'similarity': 1 / (1 + float(dist))  # Convert distance to similarity
        })
    
    return results

In [None]:
# Query 1: What is RAG?
query1 = "What is RAG?"
results1 = search(query1, k=3)

print(f"Query: {query1}\n")
for i, result in enumerate(results1, 1):
    print(f"{i}. {result['title']} (similarity: {result['similarity']:.3f})")
    print(f"   {result['text'][:100]}...\n")

In [None]:
# Query 2: How do agents work?
query2 = "How do agents work?"
results2 = search(query2, k=3)

print(f"Query: {query2}\n")
for i, result in enumerate(results2, 1):
    print(f"{i}. {result['title']} (similarity: {result['similarity']:.3f})")
    print(f"   {result['text'][:100]}...\n")

In [None]:
# Query 3: OpenRouter API
query3 = "Tell me about OpenRouter"
results3 = search(query3, k=3)

print(f"Query: {query3}\n")
for i, result in enumerate(results3, 1):
    print(f"{i}. {result['title']} (similarity: {result['similarity']:.3f})")
    print(f"   {result['text'][:100]}...\n")

## 5. Visualize Results

In [None]:
# Visualize similarity scores
queries = [query1, query2, query3]
all_results = [results1, results2, results3]

fig, axes = plt.subplots(1, 3, figsize=(15, 4))

for idx, (query, results) in enumerate(zip(queries, all_results)):
    titles = [r['title'][:20] for r in results]
    similarities = [r['similarity'] for r in results]
    
    axes[idx].barh(titles, similarities, color='skyblue')
    axes[idx].set_xlabel('Similarity Score')
    axes[idx].set_title(f"Query: {query[:30]}...")
    axes[idx].set_xlim(0, 1)

plt.tight_layout()
plt.show()

## 6. Save Index (Optional)

In [None]:
# Save FAISS index
import os
os.makedirs('../data/sample_embeddings', exist_ok=True)
faiss.write_index(index, '../data/sample_embeddings/index.faiss')
print("✓ Index saved to ../data/sample_embeddings/index.faiss")

## Summary

This notebook demonstrated:
- ✅ Loading a sample dataset
- ✅ Computing embeddings with sentence-transformers
- ✅ Building a FAISS index for efficient search
- ✅ Running semantic search queries
- ✅ Visualizing similarity scores

Next steps:
- Integrate with LLM for response generation
- Add re-ranking for improved relevance
- Scale to larger datasets