# Week 11: RAG Systems

Building a Retrieval-Augmented Generation system from scratch.

## Learning Objectives
1. Implement a Dense Retriever (Embeddings + Cosine Sim)
2. Build a Chunking Strategy
3. Create a Generator loop

In [None]:
import numpy as np
from typing import List, Dict, Any

## 1. Document Chunking

Simple sliding window chunker.

In [None]:
def chunk_text(text: str, chunk_size: int = 100, overlap: int = 20) -> List[str]:
    words = text.split()
    chunks = []
    
    for i in range(0, len(words), chunk_size - overlap):
        chunk = " ".join(words[i : i + chunk_size])
        chunks.append(chunk)
        
    return chunks

docs = [
    "Machine learning is a field of inquiry devoted to understanding and building methods that 'learn'",
    "Deep learning is part of a broader family of machine learning methods based on artificial neural networks",
    "Python is a high-level, general-purpose programming language. Its design philosophy emphasizes code readability"
]

all_chunks = []
for doc in docs:
    all_chunks.extend(chunk_text(doc, chunk_size=10, overlap=2))

## 2. Vector Database (Mock)

Using numpy for dense retrieval.

In [None]:
class VectorStore:
    def __init__(self):
        self.embeddings = []
        self.documents = []
    
    def add(self, docs: List[str]):
        # Mock embeddings: random vectors for demo
        self.documents.extend(docs)
        new_embs = np.random.randn(len(docs), 128)
        # Normalize
        new_embs /= np.linalg.norm(new_embs, axis=1, keepdims=True)
        
        if len(self.embeddings) == 0:
            self.embeddings = new_embs
        else:
            self.embeddings = np.vstack([self.embeddings, new_embs])
            
    def search(self, query: str, k: int = 3):
        # Mock query embedding
        q_emb = np.random.randn(128)
        q_emb /= np.linalg.norm(q_emb)
        
        # Cosine similarity
        scores = self.embeddings @ q_emb
        top_indices = np.argsort(scores)[::-1][:k]
        
        return [(self.documents[i], scores[i]) for i in top_indices]

In [None]:
# Test RAG Retrieve
store = VectorStore()
store.add(all_chunks)

results = store.search("What is machine learning?")
for doc, score in results:
    print(f"[{score:.4f}] {doc}")

## 3. RAG Pipeline

Retrieve -> Augment -> Generate

In [None]:
def rag_generate(query, vector_store):
    # 1. Retrieve
    retrieved = vector_store.search(query, k=2)
    context = "\n".join([doc for doc, _ in retrieved])
    
    # 2. Augment
    prompt = f"Context:\n{context}\n\nQuestion: {query}\nAnswer:"
    
    # 3. Generate (Mock)
    # In real world: output = model.generate(prompt)
    return f"[LLM Generated Answer based on] {prompt[:50]}..." 

print(rag_generate("Explain Python", store))