# RAG Demo - Instructor Version

## From Hallucination to Accuracy

This notebook demonstrates the progression from AI hallucination to accurate responses using Retrieval-Augmented Generation (RAG).

## Setup

In [None]:
import json
import os
from typing import List, Dict, Any

# Try to import OpenAI for semantic search demo
try:
    from openai import OpenAI
    import numpy as np
    OPENAI_AVAILABLE = True
    print("✅ OpenAI available - semantic search demo will use real embeddings")
except ImportError:
    OPENAI_AVAILABLE = False
    print("⚠️  OpenAI not installed - will use mock embeddings for demo")

# Load products from JSON
with open('mock_products.json', 'r') as f:
    data = json.load(f)
    PRODUCTS = data['products']

print(f"\nLoaded {len(PRODUCTS)} products:")
for p in PRODUCTS:
    print(f"  - {p['name']}")

## Part 1: The Hallucination Problem

Let's see what happens when AI doesn't have access to real product information.

In [None]:
def simulate_ai_without_rag(query: str) -> str:
    """Simulate AI response without any real data"""
    
    if "TurboCache" in query:
        return """
Based on the name, TurboCache Pro likely includes:
- Advanced ML-based caching algorithms
- Automatic scaling to 100TB
- Built-in blockchain verification  
- Quantum-resistant encryption
- GraphQL API support
- Free tier with 5GB storage

Price: Probably starts at $49/month

⚠️ Note: These features are COMPLETELY MADE UP!
The AI is hallucinating plausible-sounding features.
"""
    return "I don't have information about that product."

# Demonstrate hallucination
query = "Tell me about TurboCache Pro features"
print(f"Query: {query}")
print("\n" + "="*50)
print("AI Response WITHOUT Real Data:")
print("="*50)
print(simulate_ai_without_rag(query))

## Part 2: Keyword-Based RAG

Now let's implement simple keyword search to retrieve real product information.

In [None]:
def search_products_keyword(query: str, products: List[Dict]) -> List[Dict]:
    """Simple keyword search"""
    query_words = query.lower().split()
    matches = []
    
    for product in products:
        match_count = sum(
            1 for word in query_words 
            if word in product['keywords']
        )
        
        if match_count > 0:
            matches.append({
                'product': product,
                'relevance': match_count
            })
    
    matches.sort(key=lambda x: x['relevance'], reverse=True)
    return [m['product'] for m in matches]

# Test keyword search
test_queries = [
    "fast cache performance",
    "security encryption",
    "cloud backup",
    "quantum computing"  # Should find nothing
]

for query in test_queries:
    results = search_products_keyword(query, PRODUCTS)
    print(f"Query: '{query}'")
    print(f"Results: {[r['name'] for r in results] if results else 'No matches'}")
    print()

In [None]:
def create_rag_response(query: str, products: List[Dict]) -> str:
    """Generate response using RAG"""
    
    # Search for relevant products
    results = search_products_keyword(query, products)
    
    if not results:
        return "No matching products found in our catalog."
    
    # Use first result for response
    product = results[0]
    
    response = f"""
Based on our product catalog:

Product: {product['name']}
Description: {product['description']}

Features:
{chr(10).join(f'- {f}' for f in product['features'])}

Price: {product['price']}

✅ This information is ACCURATE - retrieved from real data!
"""
    return response

# Compare with/without RAG
query = "tell me about turbocache pro"
print(f"Query: {query}")
print("\n" + "="*50)
print("AI Response WITH RAG:")
print("="*50)
print(create_rag_response(query, PRODUCTS))

## Part 3: Semantic Search (Advanced)

Semantic search finds related concepts even without exact keyword matches.

In [None]:
if OPENAI_AVAILABLE:
    # Real embeddings demo
    client = OpenAI()
    
    def get_embedding(text: str):
        response = client.embeddings.create(
            input=text,
            model="text-embedding-3-small"
        )
        return response.data[0].embedding
    
    def cosine_similarity(a, b):
        return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
    
    # Generate embeddings for products
    print("Generating embeddings for products...")
    for product in PRODUCTS:
        text = f"{product['name']} {product['description']}"
        product['embedding'] = get_embedding(text)
    print("✅ Embeddings generated")
    
    def semantic_search(query: str, products: List[Dict], top_k: int = 2) -> List[Dict]:
        """Search using semantic similarity"""
        query_embedding = get_embedding(query)
        
        similarities = []
        for product in products:
            sim = cosine_similarity(query_embedding, product['embedding'])
            similarities.append((product, sim))
        
        similarities.sort(key=lambda x: x[1], reverse=True)
        return [s[0] for s in similarities[:top_k]]
    
else:
    # Mock semantic search
    def semantic_search(query: str, products: List[Dict], top_k: int = 2) -> List[Dict]:
        """Mock semantic search for demo"""
        # Simulate semantic understanding
        mock_mappings = {
            "high-speed": "TurboCache Pro",
            "protect": "SecureVault Enterprise",
            "team": "CloudSync Manager",
            "insights": "DataFlow Analytics"
        }
        
        for key, product_name in mock_mappings.items():
            if key in query.lower():
                return [p for p in products if p['name'] == product_name]
        return []

# Compare keyword vs semantic search
semantic_queries = [
    "high-speed data access",  # No "fast" keyword
    "protect sensitive information",  # No "security" keyword
    "team collaboration tools"  # Inferred meaning
]

for query in semantic_queries:
    print(f"\nQuery: '{query}'")
    print("-" * 40)
    
    # Keyword search
    keyword_results = search_products_keyword(query, PRODUCTS)
    print(f"Keyword search: {[r['name'] for r in keyword_results] if keyword_results else '❌ No matches'}")
    
    # Semantic search
    semantic_results = semantic_search(query, PRODUCTS)
    print(f"Semantic search: {[r['name'] for r in semantic_results] if semantic_results else 'No matches'}")
    
    if semantic_results and not keyword_results:
        print("✅ Semantic search found meaning without exact keywords!")

## Part 4: Cost & Performance Analysis

In [None]:
# Cost comparison
print("RAG Cost Analysis (per 1000 queries):")
print("="*50)

cost_data = [
    ("Keyword Search", "<$0.01", "Instant", "70%"),
    ("Semantic Search", "~$0.05", "100ms", "90%"),
    ("GPT-4 without RAG", "~$0.30", "1-2s", "Variable/Hallucination"),
    ("Fine-tuning", "$500+ upfront", "Instant", "95%")
]

print(f"{'Method':<20} {'Cost':<15} {'Speed':<10} {'Accuracy'}")
print("-" * 60)
for method, cost, speed, accuracy in cost_data:
    print(f"{method:<20} {cost:<15} {speed:<10} {accuracy}")

print("\n💡 Key Insights:")
print("- RAG is 6x cheaper than raw GPT-4 calls")
print("- Eliminates hallucination risk")
print("- Start with keyword search, upgrade to semantic as needed")

## Production Considerations

In [None]:
def chunk_text(text: str, chunk_size: int = 500, overlap: int = 50) -> List[str]:
    """Demonstrate text chunking for large documents"""
    words = text.split()
    chunks = []
    
    for i in range(0, len(words), chunk_size - overlap):
        chunk = ' '.join(words[i:i + chunk_size])
        chunks.append(chunk)
    
    return chunks

# Example of chunking
long_doc = "This is a very long document about product features. " * 200
chunks = chunk_text(long_doc, chunk_size=50, overlap=10)

print(f"Document chunking example:")
print(f"- Original: {len(long_doc.split())} words")
print(f"- Chunks: {len(chunks)} chunks")
print(f"- Chunk size: ~50 words with 10 word overlap")
print(f"\nFirst chunk preview:")
print(chunks[0][:100] + "...")

print("\n📋 Production Checklist:")
checklist = [
    "✅ Handle 'no results' gracefully",
    "✅ Set similarity thresholds",
    "✅ Implement caching for embeddings",
    "✅ Monitor retrieval quality",
    "✅ Add user feedback loop",
    "✅ Version your knowledge base"
]
for item in checklist:
    print(f"  {item}")

## Summary

### Key Takeaways

1. **RAG eliminates hallucinations** by grounding responses in real data
2. **Start simple** with keyword search - often sufficient
3. **Semantic search** adds intelligence but costs more
4. **The pattern**: Search → Augment → Generate
5. **Production needs**: Error handling, monitoring, feedback

### When to Use Each Approach

| Scenario | Recommendation |
|----------|---------------|
| Product catalog | Keyword RAG |
| Technical documentation | Semantic RAG |
| Frequently changing data | RAG (not fine-tuning) |
| Domain-specific language | Fine-tuning |
| General knowledge | No RAG needed |