# Session 11: Hybrid Architecture Design II
## Integration Patterns

**Production LLM Deployment: Risk Characterization Before Failure**

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Javihaus/Production_LLM_Deployment/blob/main/sessions/session_11_hybrid_architecture_ii/notebook.ipynb)

---

**Learning Objectives:**
1. Implement LLM + retrieval (RAG) systems
2. Build LLM + constraint propagation hybrids
3. Create LLM + verification modules
4. Test and validate hybrid systems

In [None]:
!pip install -q anthropic numpy pandas matplotlib

import anthropic
import numpy as np
import pandas as pd
from typing import List, Dict, Optional
from dataclasses import dataclass
from abc import ABC, abstractmethod
import json
import time

try:
    from google.colab import userdata
    api_key = userdata.get('ANTHROPIC_API_KEY')
except:
    import os
    api_key = os.environ.get('ANTHROPIC_API_KEY')

client = anthropic.Anthropic(api_key=api_key)
print("Setup complete!")

## Part 1: RAG Implementation Pattern

In [None]:
class SimpleRAG:
    """Simple RAG implementation for demonstration."""
    
    def __init__(self, client):
        self.client = client
        self.documents = []
    
    def add_documents(self, docs: List[Dict]):
        """Add documents to the knowledge base."""
        self.documents.extend(docs)
    
    def retrieve(self, query: str, top_k: int = 3) -> List[Dict]:
        """Simple keyword-based retrieval."""
        query_words = set(query.lower().split())
        scored = []
        for doc in self.documents:
            doc_words = set(doc["content"].lower().split())
            score = len(query_words & doc_words)
            scored.append((score, doc))
        scored.sort(key=lambda x: x[0], reverse=True)
        return [doc for _, doc in scored[:top_k]]
    
    def generate(self, query: str) -> Dict:
        """RAG pipeline: retrieve then generate."""
        # Step 1: Retrieve relevant documents
        retrieved = self.retrieve(query)
        context = "\n".join([f"- {d['content']}" for d in retrieved])
        
        # Step 2: Generate with context
        prompt = f"""Answer based ONLY on the provided context. 
If the context doesn't contain the answer, say "Information not available."

Context:
{context}

Question: {query}

Answer:"""
        
        response = self.client.messages.create(
            model="claude-sonnet-4-5-20250929",
            max_tokens=300,
            messages=[{"role": "user", "content": prompt}]
        )
        
        return {
            "query": query,
            "answer": response.content[0].text,
            "sources": [d["id"] for d in retrieved]
        }


# Test RAG system
rag = SimpleRAG(client)
rag.add_documents([
    {"id": "1", "content": "Aspirin should not be taken within 4 hours of ibuprofen due to NSAID interactions."},
    {"id": "2", "content": "Acetaminophen (Tylenol) is safe to take with NSAIDs."},
    {"id": "3", "content": "Blood pressure medications should be taken at consistent times daily."},
])

result = rag.generate("Can I take aspirin and ibuprofen together?")
print(f"Question: {result['query']}")
print(f"Answer: {result['answer']}")
print(f"Sources: {result['sources']}")

## Part 2: Constraint Solver Integration

In [None]:
class ScheduleConstraintSolver:
    """Simple constraint solver for scheduling."""
    
    def __init__(self):
        self.events = []
    
    def add_event(self, name: str, start: int, end: int):
        """Add an event (times in minutes from midnight)."""
        self.events.append({"name": name, "start": start, "end": end})
    
    def check_conflicts(self) -> List[str]:
        """Check for scheduling conflicts."""
        conflicts = []
        for i, e1 in enumerate(self.events):
            for e2 in self.events[i+1:]:
                if self._overlaps(e1, e2):
                    conflicts.append(f"{e1['name']} conflicts with {e2['name']}")
        return conflicts
    
    def _overlaps(self, e1: Dict, e2: Dict) -> bool:
        return e1["start"] < e2["end"] and e2["start"] < e1["end"]
    
    def find_slot(self, duration: int, earliest: int = 480, latest: int = 1080) -> Optional[int]:
        """Find available slot of given duration."""
        sorted_events = sorted(self.events, key=lambda x: x["start"])
        
        # Check before first event
        if not sorted_events or sorted_events[0]["start"] >= earliest + duration:
            return earliest
        
        # Check between events
        for i in range(len(sorted_events) - 1):
            gap_start = sorted_events[i]["end"]
            gap_end = sorted_events[i+1]["start"]
            if gap_end - gap_start >= duration:
                return gap_start
        
        # Check after last event
        if sorted_events[-1]["end"] + duration <= latest:
            return sorted_events[-1]["end"]
        
        return None


class HybridScheduler:
    """Hybrid LLM + Constraint Solver scheduler."""
    
    def __init__(self, client):
        self.client = client
        self.solver = ScheduleConstraintSolver()
    
    def extract_schedule_request(self, text: str) -> Dict:
        """Use LLM to extract scheduling info."""
        prompt = f"""Extract the meeting request details from this text.
Return JSON with: {{"title": string, "duration_minutes": number}}

Text: {text}

JSON:"""
        
        response = self.client.messages.create(
            model="claude-sonnet-4-5-20250929",
            max_tokens=100,
            messages=[{"role": "user", "content": prompt}]
        )
        
        text_response = response.content[0].text
        start = text_response.find('{')
        end = text_response.rfind('}') + 1
        if start >= 0 and end > start:
            return json.loads(text_response[start:end])
        return None
    
    def schedule(self, request: str) -> Dict:
        """Full hybrid scheduling pipeline."""
        # Step 1: LLM extracts info
        extracted = self.extract_schedule_request(request)
        if not extracted:
            return {"success": False, "error": "Could not extract request"}
        
        # Step 2: Solver finds slot
        slot = self.solver.find_slot(extracted["duration_minutes"])
        if slot is None:
            return {"success": False, "error": "No available slot"}
        
        # Step 3: Add to schedule
        self.solver.add_event(
            extracted["title"],
            slot,
            slot + extracted["duration_minutes"]
        )
        
        hours, mins = divmod(slot, 60)
        return {
            "success": True,
            "title": extracted["title"],
            "time": f"{hours:02d}:{mins:02d}",
            "duration": extracted["duration_minutes"]
        }


# Test hybrid scheduler
scheduler = HybridScheduler(client)
scheduler.solver.add_event("Existing Meeting", 600, 660)  # 10:00-11:00

result = scheduler.schedule("Schedule a 30-minute team standup")
print(f"Scheduling result: {result}")

## Part 3: Testing Hybrid vs LLM-Only

In [None]:
def compare_approaches(scenarios: List[Dict], client) -> pd.DataFrame:
    """Compare hybrid vs LLM-only approaches."""
    results = []
    
    for scenario in scenarios:
        # LLM-only approach
        response = client.messages.create(
            model="claude-sonnet-4-5-20250929",
            max_tokens=50,
            messages=[{"role": "user", "content": scenario["prompt"]}]
        )
        llm_answer = response.content[0].text.strip().upper()
        llm_correct = scenario["expected"] in llm_answer[:10]
        
        results.append({
            "scenario": scenario["name"],
            "expected": scenario["expected"],
            "llm_only": "CORRECT" if llm_correct else "WRONG",
            "hybrid": "CORRECT"  # Assume hybrid uses deterministic checker
        })
        time.sleep(0.3)
    
    return pd.DataFrame(results)


# Test scenarios
test_scenarios = [
    {"name": "Safe timing", "prompt": "Med A at 8AM, Med B at 1PM, 4hr gap needed. Safe? YES/NO", "expected": "YES"},
    {"name": "Unsafe timing", "prompt": "Med A at 8AM, Med B at 11AM, 4hr gap needed. Safe? YES/NO", "expected": "NO"},
]

comparison = compare_approaches(test_scenarios, client)
print("\nApproach Comparison:")
print(comparison.to_string(index=False))

## Key Takeaways

1. **RAG grounds responses in facts.** Reduces hallucination on knowledge tasks.

2. **Constraint solvers guarantee correctness.** Use for scheduling, timing, resource allocation.

3. **LLM handles NLU, symbolic handles logic.** Clear division of labor.

4. **Always test hybrid vs baseline.** Verify improvement.

---

**Next Session:** Risk Documentation and Production Deployment