In [21]:
# Install required packages
!pip install sentence-transformers chromadb pdfplumber pypdf pandas numpy \
    torch transformers accelerate bitsandbytes pillow



In [2]:
import os
import numpy as np
import pandas as pd
from pathlib import Path
from typing import List, Dict, Tuple, Optional
import warnings
warnings.filterwarnings('ignore')

# PDF processing
import pdfplumber
from pypdf import PdfReader

# Embeddings
from sentence_transformers import SentenceTransformer
import torch

# Vector store
import chromadb
from chromadb.config import Settings

print("‚úÖ All imports successful!")
print(f"‚úÖ Device: {'GPU (CUDA)' if torch.cuda.is_available() else 'CPU'}")
if torch.cuda.is_available():
    print(f"   GPU: {torch.cuda.get_device_name(0)}")

‚úÖ All imports successful!
‚úÖ Device: CPU


In [3]:
print("Loading free embedding model...")
print("(First run will download the model - about 90MB)\n")

# Choose your embedding model:
# 'all-MiniLM-L6-v2' - Fast, small (80MB), good quality
# 'all-mpnet-base-v2' - Better quality, slightly larger (420MB)
# 'paraphrase-multilingual-MiniLM-L12-v2' - Multilingual support

embedding_model = SentenceTransformer('all-MiniLM-L6-v2')

print("‚úÖ Embedding model loaded successfully!")
print(f"   Model: all-MiniLM-L6-v2")
print(f"   Embedding dimension: {embedding_model.get_sentence_embedding_dimension()}")
print(f"   Max sequence length: {embedding_model.max_seq_length}")
print("\nüí° This model runs 100% locally - no internet or API needed!")

Loading free embedding model...
(First run will download the model - about 90MB)



Loading weights: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 103/103 [00:00<00:00, 919.24it/s, Materializing param=pooler.dense.weight]                              
BertModel LOAD REPORT from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.


‚úÖ Embedding model loaded successfully!
   Model: all-MiniLM-L6-v2
   Embedding dimension: 384
   Max sequence length: 256

üí° This model runs 100% locally - no internet or API needed!


In [4]:
# Quick test
test_sentences = [
    "Machine learning is a subset of artificial intelligence",
    "Deep learning uses neural networks with multiple layers",
    "I love eating pizza for dinner"
]

print("Testing embeddings...\n")
embeddings = embedding_model.encode(test_sentences)

print(f"Generated {len(embeddings)} embeddings")
print(f"Embedding shape: {embeddings[0].shape}")

# Calculate similarity
def cosine_similarity(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

print("\nSimilarity scores:")
print(f"  Sentence 1 vs 2 (both about AI): {cosine_similarity(embeddings[0], embeddings[1]):.3f}")
print(f"  Sentence 1 vs 3 (different topics): {cosine_similarity(embeddings[0], embeddings[2]):.3f}")
print("\n‚úÖ Embeddings working correctly!")

Testing embeddings...

Generated 3 embeddings
Embedding shape: (384,)

Similarity scores:
  Sentence 1 vs 2 (both about AI): 0.403
  Sentence 1 vs 3 (different topics): 0.071

‚úÖ Embeddings working correctly!


In [5]:
class PDFContentExtractor:
    """
    Extract text and tables from PDFs - completely free!
    """
    
    def __init__(self, pdf_path: str):
        self.pdf_path = pdf_path
        self.pages_content = []
        self.extracted_tables = []
    
    def extract_all(self) -> Dict:
        """Extract all content from PDF"""
        print(f"\nüìÑ Processing: {Path(self.pdf_path).name}")
        
        with pdfplumber.open(self.pdf_path) as pdf:
            total_pages = len(pdf.pages)
            
            for page_num, page in enumerate(pdf.pages, 1):
                print(f"   Processing page {page_num}/{total_pages}...", end='\r')
                
                page_data = {
                    'page_number': page_num,
                    'text': '',
                    'tables': []
                }
                
                # Extract text
                text = page.extract_text()
                if text:
                    page_data['text'] = text.strip()
                
                # Extract tables
                tables = page.extract_tables()
                for table_idx, table in enumerate(tables):
                    if table and len(table) > 0:
                        table_text = self._format_table(table, page_num, table_idx)
                        page_data['tables'].append(table_text)
                        self.extracted_tables.append({
                            'page': page_num,
                            'index': table_idx,
                            'text': table_text
                        })
                
                self.pages_content.append(page_data)
        
        print(f"\n‚úÖ Extracted: {len(self.pages_content)} pages, {len(self.extracted_tables)} tables")
        return self._compile_results()
    
    def _format_table(self, table: List[List], page_num: int, table_idx: int) -> str:
        """Convert table to readable text"""
        lines = [f"\n[Table {table_idx + 1} on Page {page_num}]"]
        
        for row in table:
            cleaned_row = [str(cell).strip() if cell else "" for cell in row]
            lines.append(" | ".join(cleaned_row))
        
        return "\n".join(lines)
    
    def _compile_results(self) -> Dict:
        return {
            'pages': self.pages_content,
            'total_pages': len(self.pages_content),
            'total_tables': len(self.extracted_tables)
        }
    
    def create_documents(self) -> List[Dict]:
        """Create document chunks"""
        documents = []
        
        for page_data in self.pages_content:
            content_parts = []
            
            if page_data['text']:
                content_parts.append(page_data['text'])
            
            if page_data['tables']:
                content_parts.extend(page_data['tables'])
            
            if content_parts:
                documents.append({
                    'content': "\n\n".join(content_parts),
                    'metadata': {
                        'source': self.pdf_path,
                        'page': page_data['page_number'],
                        'has_tables': len(page_data['tables']) > 0
                    }
                })
        
        return documents

print("‚úÖ PDF Extractor ready")

‚úÖ PDF Extractor ready


In [8]:
# ============================================
# ADD YOUR PDF FILES HERE
# ============================================

pdf_files = [
    r'C:\Users\ASUS\OneDrive\Desktop\RAG\pdfs\Paradigms_of_Programming.pdf',
    r'C:\Users\ASUS\OneDrive\Desktop\RAG\pdfs\barry.pdf',

]

# ============================================

all_documents = []

if pdf_files:
    print("\n" + "="*80)
    print("PROCESSING PDF FILES")
    print("="*80)
    
    for pdf_path in pdf_files:
        if not os.path.exists(pdf_path):
            print(f"\n‚ö†Ô∏è  File not found: {pdf_path}")
            continue
        
        try:
            extractor = PDFContentExtractor(pdf_path)
            results = extractor.extract_all()
            docs = extractor.create_documents()
            all_documents.extend(docs)
            print(f"   ‚úÖ Created {len(docs)} document chunks\n")
        except Exception as e:
            print(f"\n   ‚ùå Error processing {pdf_path}: {e}\n")
else:
    print("\n" + "="*80)
    print("‚ö†Ô∏è  NO PDF FILES SPECIFIED - Using Sample Documents")
    print("="*80)
    print("\nTo use your own PDFs, add file paths to the 'pdf_files' list above.")
    print("Example: pdf_files = ['/path/to/your/document.pdf']\n")
    
    # Sample documents for testing
    sample_texts = [
        """Machine Learning is a branch of artificial intelligence that focuses on building systems 
        that can learn from data. It includes various approaches such as supervised learning, where 
        models learn from labeled data, unsupervised learning, where patterns are discovered in 
        unlabeled data, and reinforcement learning, where agents learn through interaction with 
        an environment.""",
        
        """Deep Learning is a subset of machine learning that uses artificial neural networks with 
        multiple layers. These deep neural networks can automatically learn hierarchical 
        representations of data. Deep learning has achieved breakthrough results in computer vision, 
        natural language processing, and speech recognition tasks.""",
        
        """Natural Language Processing (NLP) is a field of AI that focuses on enabling computers to 
        understand, interpret, and generate human language. Modern NLP uses transformer models like 
        BERT, GPT, and T5 for tasks such as text classification, machine translation, question 
        answering, and text generation.""",
        
        """Computer Vision enables machines to interpret and understand visual information from the 
        world. Applications include image classification, object detection, semantic segmentation, 
        and facial recognition. Convolutional Neural Networks (CNNs) are the primary architecture 
        used in computer vision tasks.""",
        
        """Reinforcement Learning (RL) is a machine learning paradigm where an agent learns to make 
        decisions by interacting with an environment. The agent receives rewards or penalties based 
        on its actions and learns to maximize cumulative reward. RL has been successfully applied 
        in game playing, robotics, and autonomous systems.""",
    ]
    
    all_documents = [
        {
            'content': text,
            'metadata': {'source': 'sample', 'page': i, 'has_tables': False}
        }
        for i, text in enumerate(sample_texts, 1)
    ]
    
    print(f"‚úÖ Loaded {len(all_documents)} sample documents\n")

print("="*80)
print(f"üìä TOTAL DOCUMENTS: {len(all_documents)}")
print("="*80)


PROCESSING PDF FILES

üìÑ Processing: Paradigms_of_Programming.pdf
   Processing page 46/46...
‚úÖ Extracted: 46 pages, 46 tables
   ‚úÖ Created 46 document chunks


üìÑ Processing: barry.pdf
   Processing page 210/210...
‚úÖ Extracted: 210 pages, 0 tables
   ‚úÖ Created 210 document chunks

üìä TOTAL DOCUMENTS: 256


In [9]:
def chunk_text(text: str, chunk_size: int = 1000, overlap: int = 200) -> List[str]:
    """
    Split text into overlapping chunks
    """
    words = text.split()
    chunks = []
    
    for i in range(0, len(words), chunk_size - overlap):
        chunk = ' '.join(words[i:i + chunk_size])
        if chunk:
            chunks.append(chunk)
    
    return chunks if chunks else [text]

# Chunk all documents
print("\nChunking documents...")

chunked_documents = []
for doc in all_documents:
    chunks = chunk_text(doc['content'], chunk_size=500, overlap=100)
    
    for chunk in chunks:
        chunked_documents.append({
            'content': chunk,
            'metadata': doc['metadata']
        })

print(f"‚úÖ Created {len(chunked_documents)} chunks from {len(all_documents)} documents")


Chunking documents...
‚úÖ Created 468 chunks from 256 documents


In [10]:
print("\n" + "="*80)
print("CREATING VECTOR STORE")
print("="*80)
print("\nGenerating embeddings for all documents...")
print("(This may take a minute for large documents)\n")

# Initialize ChromaDB
chroma_client = chromadb.PersistentClient(path="./chroma_free_db")

# Delete existing collection if it exists
try:
    chroma_client.delete_collection(name="free_rag_collection")
    print("   Cleared existing collection")
except:
    pass

# Create new collection
collection = chroma_client.create_collection(
    name="free_rag_collection",
    metadata={"hnsw:space": "cosine"}
)

# Extract data
texts = [doc['content'] for doc in chunked_documents]
metadatas = [doc['metadata'] for doc in chunked_documents]
ids = [f"doc_{i}" for i in range(len(chunked_documents))]

# Generate embeddings (FREE - runs locally)
print(f"   Encoding {len(texts)} text chunks...")
embeddings = embedding_model.encode(
    texts,
    show_progress_bar=True,
    batch_size=32
)

# Add to ChromaDB
print("\n   Adding to vector store...")
collection.add(
    embeddings=embeddings.tolist(),
    documents=texts,
    metadatas=metadatas,
    ids=ids
)

print("\n" + "="*80)
print(f"‚úÖ VECTOR STORE CREATED!")
print(f"   Location: ./chroma_free_db")
print(f"   Documents: {len(texts)}")
print(f"   Embedding dimension: {embeddings.shape[1]}")
print("="*80)


CREATING VECTOR STORE

Generating embeddings for all documents...
(This may take a minute for large documents)

   Encoding 468 text chunks...


Batches: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 15/15 [00:13<00:00,  1.09it/s]



   Adding to vector store...

‚úÖ VECTOR STORE CREATED!
   Location: ./chroma_free_db
   Documents: 468
   Embedding dimension: 384


In [11]:
def mmr_retrieval(query: str, k: int = 5, lambda_param: float = 0.5, fetch_k: int = 20):
    """
    Maximal Marginal Relevance retrieval for diverse results
    
    Args:
        query: Search query
        k: Number of documents to return
        lambda_param: Balance between relevance (1.0) and diversity (0.0)
        fetch_k: Initial candidates to fetch before MMR reranking
    """
    # Encode query
    query_embedding = embedding_model.encode(query)
    
    # Get initial candidates
    results = collection.query(
        query_embeddings=[query_embedding.tolist()],
        n_results=min(fetch_k, collection.count()),
        include=['embeddings', 'documents', 'metadatas', 'distances']
    )
    
    if not results['documents'][0]:
        return []
    
    # Extract data
    candidate_docs = results['documents'][0]
    candidate_embeddings = np.array(results['embeddings'][0])
    candidate_metadatas = results['metadatas'][0]
    candidate_distances = results['distances'][0]
    
    # Convert distances to similarity scores (ChromaDB uses L2 distance)
    # For cosine similarity from L2: similarity = 1 - (distance^2 / 2)
    relevance_scores = [1 - (dist**2 / 2) for dist in candidate_distances]
    
    # MMR algorithm
    selected_indices = []
    selected_docs = []
    
    # Select first document (most relevant)
    first_idx = np.argmax(relevance_scores)
    selected_indices.append(first_idx)
    selected_docs.append({
        'content': candidate_docs[first_idx],
        'metadata': candidate_metadatas[first_idx],
        'relevance': relevance_scores[first_idx]
    })
    
    # Select remaining documents
    while len(selected_indices) < min(k, len(candidate_docs)):
        mmr_scores = []
        
        for i in range(len(candidate_docs)):
            if i in selected_indices:
                mmr_scores.append(-float('inf'))
                continue
            
            # Calculate MMR score
            relevance = relevance_scores[i]
            
            # Max similarity to already selected documents
            similarities = [
                np.dot(candidate_embeddings[i], candidate_embeddings[j]) / 
                (np.linalg.norm(candidate_embeddings[i]) * np.linalg.norm(candidate_embeddings[j]))
                for j in selected_indices
            ]
            max_similarity = max(similarities)
            
            # MMR formula: Œª * Relevance - (1-Œª) * MaxSimilarity
            mmr_score = lambda_param * relevance - (1 - lambda_param) * max_similarity
            mmr_scores.append(mmr_score)
        
        # Select next document
        next_idx = np.argmax(mmr_scores)
        selected_indices.append(next_idx)
        selected_docs.append({
            'content': candidate_docs[next_idx],
            'metadata': candidate_metadatas[next_idx],
            'relevance': relevance_scores[next_idx],
            'mmr_score': mmr_scores[next_idx]
        })
    
    return selected_docs

print("‚úÖ MMR retrieval function ready")

‚úÖ MMR retrieval function ready


In [12]:
print("\n" + "="*80)
print("LOADING FREE LOCAL LLM")
print("="*80)
print("\nüîÑ Loading model... (First time will download ~2-4GB)")
print("   This may take a few minutes on first run.\n")

from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

# Using TinyLlama - small, fast, free
# Alternative models:
# - "microsoft/phi-2" (better quality, needs more RAM)
# - "stabilityai/stablelm-2-zephyr-1_6b" (good balance)

model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

try:
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
        device_map="auto" if torch.cuda.is_available() else None,
        low_cpu_mem_usage=True
    )
    
    # Create pipeline
    llm_pipeline = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        max_new_tokens=512,
        do_sample=True,
        temperature=0.7,
        top_p=0.9,
        repetition_penalty=1.1
    )
    
    print("\n" + "="*80)
    print("‚úÖ LOCAL LLM LOADED SUCCESSFULLY!")
    print(f"   Model: {model_name}")
    print(f"   Device: {'GPU' if torch.cuda.is_available() else 'CPU'}")
    print("   Status: 100% Free, No API, Runs Offline")
    print("="*80)
    
    def generate_answer(prompt: str) -> str:
        """Generate answer using local LLM"""
        response = llm_pipeline(prompt)
        # Extract only the new generated text
        full_text = response[0]['generated_text']
        answer = full_text[len(prompt):].strip()
        return answer
    
except Exception as e:
    print(f"\n‚ùå Error loading model: {e}")
    print("\nüí° Alternative: Install Ollama for better local LLMs")
    print("   Visit: https://ollama.ai")
    
    # Fallback: simple template-based response
    def generate_answer(prompt: str) -> str:
        return "[Model not loaded. Please ensure you have enough RAM/GPU or use Ollama]"


LOADING FREE LOCAL LLM

üîÑ Loading model... (First time will download ~2-4GB)
   This may take a few minutes on first run.



`torch_dtype` is deprecated! Use `dtype` instead!
Loading weights: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 201/201 [00:02<00:00, 68.66it/s, Materializing param=model.norm.weight]                               
Passing `generation_config` together with generation-related arguments=({'top_p', 'repetition_penalty', 'temperature', 'max_new_tokens', 'do_sample'}) is deprecated and will be removed in future versions. Please pass either a `generation_config` object OR all generation parameters explicitly, but not both.



‚úÖ LOCAL LLM LOADED SUCCESSFULLY!
   Model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
   Device: CPU
   Status: 100% Free, No API, Runs Offline


In [13]:
def query_rag(
    question: str,
    k: int = 4,
    lambda_param: float = 0.5,
    show_sources: bool = True
):
    """
    Query the RAG system - 100% FREE!
    
    Args:
        question: Your question
        k: Number of relevant documents to retrieve
        lambda_param: MMR diversity (0=max diversity, 1=max relevance)
        show_sources: Whether to display source documents
    """
    print("\n" + "="*80)
    print("‚ùì QUESTION")
    print("="*80)
    print(f"{question}")
    print("="*80)
    
    # Step 1: Retrieve relevant documents
    print("\nüîç Retrieving relevant documents using MMR...")
    retrieved_docs = mmr_retrieval(
        query=question,
        k=k,
        lambda_param=lambda_param,
        fetch_k=20
    )
    
    if not retrieved_docs:
        print("‚ùå No relevant documents found!")
        return
    
    print(f"‚úÖ Retrieved {len(retrieved_docs)} relevant documents")
    
    # Step 2: Build context
    context_parts = []
    for i, doc in enumerate(retrieved_docs, 1):
        source = doc['metadata'].get('source', 'Unknown')
        page = doc['metadata'].get('page', '?')
        context_parts.append(
            f"[Document {i} - Source: {Path(source).name}, Page: {page}]\n{doc['content']}"
        )
    
    context = "\n\n" + "-"*80 + "\n\n".join(context_parts)
    
    # Step 3: Create prompt
    prompt = f"""<|system|>
You are a helpful AI assistant. Answer the question based on the provided context.
If the answer is not in the context, say so. Be concise and accurate.
</s>
<|user|>
Context:
{context}

Question: {question}
</s>
<|assistant|>
"""
    
    # Step 4: Generate answer
    print("\nü§ñ Generating answer...\n")
    print("="*80)
    print("üí° ANSWER")
    print("="*80)
    
    try:
        answer = generate_answer(prompt)
        print(answer)
    except Exception as e:
        print(f"‚ùå Error generating answer: {e}")
        return
    
    # Step 5: Show sources
    if show_sources:
        print("\n" + "="*80)
        print("üìö SOURCE DOCUMENTS")
        print("="*80)
        
        for i, doc in enumerate(retrieved_docs, 1):
            print(f"\n[Source {i}]")
            print(f"  üìÑ File: {Path(doc['metadata']['source']).name}")
            print(f"  üìë Page: {doc['metadata']['page']}")
            print(f"  üéØ Relevance: {doc['relevance']:.3f}")
            print(f"\n  Content preview:")
            preview = doc['content'][:250].replace('\n', ' ')
            print(f"  {preview}...")
            print("-" * 80)

print("‚úÖ Query function ready!")

‚úÖ Query function ready!


In [17]:
# Test with a sample question
query_rag(
    question="What did amy do?",
    k=3,
    lambda_param=0.3,
    show_sources=True
)

Both `max_new_tokens` (=512) and `max_length`(=2048) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)



‚ùì QUESTION
What did amy do?

üîç Retrieving relevant documents using MMR...
‚úÖ Retrieved 3 relevant documents

ü§ñ Generating answer...

üí° ANSWER
Amy did not know how to use a computer, which caused Tracy to lose her job as a computer expert at the prison. This led to her becoming a saleswoman in the children's department at Saks Fifth Avenue, where a hysterical customer recognized her as a murderess and discharged her immediately. The unfairness of what happened to her further contributed to her feelings of desperation and resulted in her losing everything she had worked for.

üìö SOURCE DOCUMENTS

[Source 1]
  üìÑ File: barry.pdf
  üìë Page: 46
  üéØ Relevance: 0.876

  Content preview:
  the daytime she had the illusion of freedom. After breakfast in the prison kitchen, she walked over to the warden's cottage and made breakfast for Amy. Tracy had learned a good deal about cooking from Charles, and she was tempted by the varieties of ...
---------------------------------

In [20]:
def interactive_mode():
    """
    Interactive question-answering mode
    """
    print("\n" + "="*80)
    print("üöÄ INTERACTIVE RAG SYSTEM (100% Free!)")
    print("="*80)
    print("\nAsk questions about your documents!")
    print("Commands:")
    print("  - Type your question to get an answer")
    print("  - 'quit' or 'exit' to stop")
    print("  - 'settings' to adjust retrieval parameters")
    print("\n" + "="*80 + "\n")
    
    k = 4
    lambda_param = 0.5
    
    while True:
        question = input("\nüí¨ Your question: ").strip()
        
        if question.lower() in ['quit', 'exit', 'q']:
            print("\nüëã Goodbye!")
            break
        
        if question.lower() == 'settings':
            print("\nCurrent settings:")
            print(f"  k (documents to retrieve): {k}")
            print(f"  lambda (diversity): {lambda_param}")
            
            try:
                new_k = input("\nNew k value (press Enter to keep current): ").strip()
                if new_k:
                    k = int(new_k)
                
                new_lambda = input("New lambda value 0-1 (press Enter to keep current): ").strip()
                if new_lambda:
                    lambda_param = float(new_lambda)
                
                print(f"\n‚úÖ Settings updated: k={k}, lambda={lambda_param}")
            except:
                print("‚ùå Invalid input. Settings unchanged.")
            continue
        
        if not question:
            continue
        
        try:
            query_rag(question, k=k, lambda_param=lambda_param, show_sources=True)
        except Exception as e:
            print(f"\n‚ùå Error: {str(e)}")
            print("Please try rephrasing your question.")

# Uncomment to start interactive mode:
# interactive_mode()

In [22]:
interactive_mode()


üöÄ INTERACTIVE RAG SYSTEM (100% Free!)

Ask questions about your documents!
Commands:
  - Type your question to get an answer
  - 'quit' or 'exit' to stop
  - 'settings' to adjust retrieval parameters




Both `max_new_tokens` (=512) and `max_length`(=2048) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)



‚ùì QUESTION
who is author of the book

üîç Retrieving relevant documents using MMR...
‚úÖ Retrieved 4 relevant documents

ü§ñ Generating answer...

üí° ANSWER
The author of the book mentioned in the given context is Barry Michels.

üìö SOURCE DOCUMENTS

[Source 1]
  üìÑ File: barry.pdf
  üìë Page: 107
  üéØ Relevance: 0.800

  Content preview:
  107 BOOK THREE Chapter 20 It's time to begin my new life, Tracy decided. But what kind of life? I've gone from an innocent, naive victim to a... what? A thief--- that's what. She thought of Joe Romano and Anthony Orsatti and Perry Pope and Judge Lawr...
--------------------------------------------------------------------------------

[Source 2]
  üìÑ File: Paradigms_of_Programming.pdf
  üìë Page: 2
  üéØ Relevance: 0.742

  Content preview:
  GettingStarted Black-BoxAbstraction Foodforthought Must Read textbooks:- SICP (pdf available online) 2/28 [Table 1 on Page 2] GettingStarted Black-BoxAbstraction Foodforthought | Must Read text

In [25]:
def compare_retrieval_methods(query: str, k: int = 5):
    """
    Compare standard similarity search vs MMR retrieval
    """
    print("\n" + "="*80)
    print("üî¨ RETRIEVAL METHOD COMPARISON")
    print("="*80)
    print(f"Query: {query}\n")
    
    # Get query embedding
    query_emb = embedding_model.encode(query)
    
    # Method 1: Standard similarity search
    print("\n1Ô∏è‚É£ STANDARD SIMILARITY SEARCH")
    print("-" * 80)
    standard_results = collection.query(
        query_embeddings=[query_emb.tolist()],
        n_results=k
    )
    
    for i, (doc, dist) in enumerate(zip(standard_results['documents'][0], standard_results['distances'][0]), 1):
        print(f"\n[{i}] Similarity: {1 - (dist**2/2):.3f}")
        print(f"    {doc[:120]}...")
    
    # Method 2: MMR (Balanced)
    print("\n\n2Ô∏è‚É£ MMR RETRIEVAL (Œª=0.5, Balanced)")
    print("-" * 80)
    mmr_balanced = mmr_retrieval(query, k=k, lambda_param=0.5)
    
    for i, doc in enumerate(mmr_balanced, 1):
        print(f"\n[{i}] Relevance: {doc['relevance']:.3f}")
        print(f"    {doc['content'][:120]}...")
    
    # Method 3: MMR (High Diversity)
    print("\n\n3Ô∏è‚É£ MMR RETRIEVAL (Œª=0.1, High Diversity)")
    print("-" * 80)
    mmr_diverse = mmr_retrieval(query, k=k, lambda_param=0.1)
    
    for i, doc in enumerate(mmr_diverse, 1):
        print(f"\n[{i}] Relevance: {doc['relevance']:.3f}")
        print(f"    {doc['content'][:120]}...")
    
    print("\n" + "="*80)

# Example:
compare_retrieval_methods("What was tracy upto?", k=4)


üî¨ RETRIEVAL METHOD COMPARISON
Query: What was tracy upto?


1Ô∏è‚É£ STANDARD SIMILARITY SEARCH
--------------------------------------------------------------------------------

[1] Similarity: 0.899
    109 She had dinner in her cabin. As she ate, she wondered what ill fate had placed Jeff Stevens in her path again. She w...

[2] Similarity: 0.892
    looked at her in surprise. "Come on. You mean you really don't know?" "Know what?" "Max Pierpont is one of the richest m...

[3] Similarity: 0.880
    89 Tracy felt her heart twisting in agony. She remembered the airport in New Orleans when they had handcuffed her, the s...

[4] Similarity: 0.877
    waited until dark before she set out. The parades had moved on to Chartres Street, and in the distance Tracy could hear ...


2Ô∏è‚É£ MMR RETRIEVAL (Œª=0.5, Balanced)
--------------------------------------------------------------------------------

[1] Relevance: 0.899
    109 She had dinner in her cabin. As she ate, she wondered what ill