## Lab 5: RAG + Agentic RAG

This lab introduces Retrieval Augmented Generation (RAG) and Agentic RAG patterns. You'll:
- Build a simple RAG pipeline over curated local travel guides
- Extend it to Agentic RAG where an agent decides when to retrieve
- Add observability with Arize AX (OpenInference/OTel)

Format follows the style of previous labs in this `labs/` folder.

---

### Objectives
- Master Retrieval Augmented Generation (RAG) techniques
- Build agents that can effectively use external knowledge
- Implement agentic RAG patterns

### Prerequisites
- Python 3.10+
- `OPENAI_API_KEY` for LLM and embeddings
- Optional: `ARIZE_SPACE_ID` and `ARIZE_API_KEY` for tracing to Arize AX

### What you'll build
- A basic RAG retriever using `local_guides.json`
- A minimal Agentic RAG example where the model calls a tool to retrieve
- Optional tracing so you can observe prompts, tool calls, and spans


In [None]:
# Setup: installs, env, and imports
import os
import json
from pathlib import Path
from dotenv import load_dotenv

# Load environment  
project_root = Path.cwd()
if project_root.name != 'agent-mastery-course':
    project_root = project_root.parent
load_dotenv(project_root / 'backend' / '.env')

# Paths
LAB_ROOT = Path(__file__).resolve().parent if "__file__" in globals() else Path.cwd()
BACKEND_DIR = LAB_ROOT.parent / "backend"
LOCAL_GUIDES = BACKEND_DIR / "data" / "local_guides.json"
assert LOCAL_GUIDES.exists(), f"Expected data at {LOCAL_GUIDES}"

print("local_guides.json:", LOCAL_GUIDES)


In [None]:
# Arize AX tracing
from arize.otel import register
from openinference.instrumentation.langchain import LangChainInstrumentor
from openinference.instrumentation.litellm import LiteLLMInstrumentor
from openinference.instrumentation import using_prompt_template


# configure the Phoenix tracer
tracer_provider = register(
    space_id=os.getenv("ARIZE_SPACE_ID"),
    api_key=os.getenv("ARIZE_API_KEY"),
    project_name="lab5-rag-and-agentic-rag",
)

LangChainInstrumentor().instrument(tracer_provider=tracer_provider, include_chains=True, include_agents=True, include_tools=True)
LiteLLMInstrumentor().instrument(tracer_provider=tracer_provider, skip_dep_check=True)
print("Tracing initialized: spans will be exported to Arize AX")


In [None]:
# LLM init
from typing import Optional, List, Dict, Any

from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.vectorstores import InMemoryVectorStore
from langchain_core.documents import Document
from langchain_core.messages import SystemMessage

if not os.getenv("OPENAI_API_KEY"):
    raise ValueError("Please set OPENAI_API_KEY to run this notebook.")


llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.2)
print("LLM ready")


In [None]:
# Build a tiny retriever over local_guides.json

print("📚 Loading curated travel guides...")
# Load curated docs
raw = json.loads(LOCAL_GUIDES.read_text())

def _to_doc(row: Dict[str, Any]) -> Optional[Document]:
    description = row.get("description")
    city = row.get("city")
    if not description or not city:
        return None
    interests = row.get("interests", []) or []
    metadata = {"city": city, "interests": interests, "source": row.get("source")}
    interest_text = ", ".join(interests) if interests else "general travel"
    content = f"City: {city}\nInterests: {interest_text}\nGuide: {description}"
    return Document(page_content=content, metadata=metadata)

DOCS: List[Document] = []
for row in raw:
    d = _to_doc(row)
    if d:
        DOCS.append(d)

print(f"📄 Processed {len(DOCS)} travel guide documents")

# In-memory vector store if embeddings are available, else keyword fallback
embeddings = None
vectorstore = None
if OpenAIEmbeddings is not None and InMemoryVectorStore is not None and os.getenv("OPENAI_API_KEY"):
    try:
        print("🔍 Initializing semantic search with OpenAI embeddings...")
        embed_model = os.getenv("OPENAI_EMBED_MODEL", "text-embedding-3-small")
        embeddings = OpenAIEmbeddings(model=embed_model)
        vectorstore = InMemoryVectorStore(embedding=embeddings)
        print("⚡ Embedding documents into vector store...")
        vectorstore.add_documents(DOCS)
        print(f"✅ Vector store built with {len(DOCS)} docs using {embed_model}")
    except Exception as e:
        print(f"❌ Embedding init failed, falling back to keywords: {str(e)}")


def retrieve(destination: str, interests: Optional[str], k: int = 3) -> List[Dict[str, Any]]:
    if not DOCS:
        print("⚠️ No documents available for retrieval")
        return []
    
    query = destination if not interests else f"{destination} with interests {interests}"
    print(f"🔎 Searching for: '{query}'")
    
    if vectorstore is None:
        print("📝 Using keyword-based fallback search...")
        return _keyword_fallback(destination, interests, k)
    
    print("🚀 Using semantic vector search...")
    try:
        retriever = vectorstore.as_retriever(search_kwargs={"k": max(k, 4)})
        docs = retriever.invoke(query)
        docs = docs[:k]
        results = [
            {"content": d.page_content, "metadata": d.metadata, "score": float(d.metadata.get("score", 0.0))}
            for d in docs
        ]
        print(f"📋 Found {len(results)} relevant documents")
        return results
    except Exception as e:
        print(f"⚠️ Vector search failed, using keyword fallback: {str(e)}")
        return _keyword_fallback(destination, interests, k)


def _keyword_fallback(destination: str, interests: Optional[str], k: int) -> List[Dict[str, Any]]:
    dest_lower = destination.lower()
    interest_terms = [t.strip().lower() for t in (interests or "").split(",") if t.strip()]
    
    def _score(doc: Document) -> int:
        score = 0
        if dest_lower and dest_lower.split(",")[0] in doc.metadata.get("city", "").lower():
            score += 2
        for term in interest_terms:
            if term and term in " ".join(doc.metadata.get("interests") or []).lower():
                score += 1
            if term and term in doc.page_content.lower():
                score += 1
        return score
    
    ranked = sorted((( _score(d), d) for d in DOCS), key=lambda x: x[0], reverse=True)
    out: List[Dict[str, Any]] = []
    for sc, d in ranked[:k]:
        if sc <= 0 and dest_lower not in d.metadata.get("city", "").lower():
            continue
        out.append({"content": d.page_content, "metadata": d.metadata, "score": float(sc)})
    
    print(f"📋 Found {len(out)} relevant documents using keyword matching")
    return out

print("Docs loaded:", len(DOCS))


In [None]:
# Basic RAG: simple query then LLM synthesis

def _compact(text: str, limit: int = 220) -> str:
    cleaned = " ".join(text.split())
    if len(cleaned) <= limit:
        return cleaned
    truncated = cleaned[:limit].rstrip()
    last_space = truncated.rfind(" ")
    if last_space > 0:
        truncated = truncated[:last_space]
    return truncated.rstrip(",.;- ")


def rag_answer(destination: str, interests: Optional[str] = None) -> str:
    print(f"🎯 Starting Basic RAG for {destination} with interests: {interests}")
    
    # Step 1: Retrieve relevant documents
    hits = retrieve(destination, interests, k=3)
    
    # Step 2: Display retrieved documents
    if not hits:
        print("❌ No relevant documents found")
        context = "No curated context available."
    else:
        print(f"📖 Retrieved Documents:")
        for i, h in enumerate(hits, 1):
            city = h['metadata'].get('city', 'Unknown')
            score = h.get('score', 0)
            content_preview = h['content'][:100] + "..." if len(h['content']) > 100 else h['content']
            print(f"  [{i}] {city} (score: {score:.2f})")
            print(f"      {content_preview}")
        
        context = "\n".join([
            f"[{i+1}] {h['metadata'].get('city')}: {h['content']}" for i, h in enumerate(hits)
        ])
    
    # Step 3: Generate response using LLM
    print("🤖 Generating response with LLM...")
    prompt_t = (
        "You are a helpful travel assistant.\n"
        "Use the retrieved notes to recommend authentic experiences in {destination} for interests: {interests}.\n"
        "Cite the numbered items when you rely on them.\n\n"
        "Context:\n{context}"
    )
    vars_ = {"destination": destination, "interests": interests or "local culture", "context": context}
    with using_prompt_template(template=prompt_t, variables=vars_, version="v1"):
        res = llm.invoke([SystemMessage(content=prompt_t.format(**vars_))])
    
    print("✅ Basic RAG complete!")
    return res.content

# Test Basic RAG
print("=" * 50)
print("Testing Basic RAG")
print("=" * 50)
result = rag_answer("Lisbon", "food, music")
print(f"\n📝 Final Answer:\n{result[:300]}{'...' if len(result) > 300 else ''}")


In [None]:
# Agentic RAG: give the model a retriever tool
from langchain_core.tools import tool

@tool
def local_guides_retrieve(destination: str, interests: Optional[str] = None) -> str:
    """Retrieve curated local guide notes matching destination and optional interests."""
    print(f"🔧 Tool called: local_guides_retrieve(destination='{destination}', interests='{interests}')")
    hits = retrieve(destination, interests, k=3)
    if not hits:
        print("  📭 Tool result: No curated context available")
        return "No curated context available."
    
    print(f"  📦 Tool retrieved {len(hits)} documents:")
    for i, h in enumerate(hits, 1):
        city = h['metadata'].get('city', 'Unknown')
        score = h.get('score', 0)
        print(f"    [{i}] {city} (score: {score:.2f})")
    
    lines = [f"[{i+1}] {h['metadata'].get('city')}: {h['content']}" for i, h in enumerate(hits)]
    result = "\n".join(lines)
    print(f"  ✅ Tool returning {len(result)} characters of context")
    return result


def agentic_rag_answer(destination: str, interests: Optional[str] = None) -> str:
    print(f"🤖 Starting Agentic RAG for {destination} with interests: {interests}")
    
    # Step 1: Create agent with retriever tool
    tools = [local_guides_retrieve]
    agent = llm.bind_tools(tools)
    print(f"🛠️ Agent equipped with {len(tools)} tool(s): {[t.name for t in tools]}")
    
    # Step 2: Initial agent call
    prompt_t = (
        "You are a travel assistant.\n"
        "First, call local_guides_retrieve to get context for {destination}.\n"
        "Then synthesize concise recommendations citing items like [1], [2]."
    )
    vars_ = {"destination": destination}
    print("📤 Sending initial prompt to agent...")
    
    with using_prompt_template(template=prompt_t, variables=vars_, version="v1"):
        res = agent.invoke([SystemMessage(content=prompt_t.format(**vars_))])
    
    out = res.content
    print(f"📥 Agent response received (tool_calls: {len(getattr(res, 'tool_calls', []))})")
    
    # Step 3: Handle tool calls if any
    if getattr(res, "tool_calls", None):
        print(f"🔄 Processing {len(res.tool_calls)} tool call(s)...")
        try:
            call = res.tool_calls[0]
            print(f"  🎯 Tool call: {call['name']} with args: {call.get('args', {})}")
            
            if call["name"] == "local_guides_retrieve":
                args = call.get("args", {}) or {}
                args.setdefault("destination", destination)
                tool_result = local_guides_retrieve.invoke(args)  # type: ignore
                
                print("🤖 Sending context to LLM for synthesis...")
                followup = (
                    f"Using this context, write a concise recommendation for {destination}.\n"
                    f"Context:\n{tool_result}"
                )
                final = llm.invoke([SystemMessage(content=followup)])
                out = final.content
                print("✅ Final synthesis complete!")
        except Exception as e:
            print(f"❌ Tool execution failed: {str(e)}")
    else:
        print("ℹ️ No tool calls made by agent")
    
    print("✅ Agentic RAG complete!")
    return _compact(out)

# Test Agentic RAG
print("\n" + "=" * 50)
print("Testing Agentic RAG")
print("=" * 50)
result = agentic_rag_answer("Prague", "architecture, coffee")
print(f"\n📝 Final Answer:\n{result[:300]}{'...' if len(result) > 300 else ''}")


### Demo: Try it out

Use the cells below to run both the Basic RAG and Agentic RAG flows.


In [None]:
# Demo parameters
DESTINATION = "Barcelona"
INTERESTS = "architecture, food"

print("Destination:", DESTINATION)
print("Interests:", INTERESTS)


In [None]:
# Run Basic RAG
print("🚀 Running Basic RAG Demo...")
print(f"Query: {DESTINATION} with interests: {INTERESTS}")
print("-" * 40)
basic = rag_answer(DESTINATION, INTERESTS)
print(f"\n🎉 Basic RAG Result:\n{basic}")


In [None]:
# Run Agentic RAG
print("\n🤖 Running Agentic RAG Demo...")
print(f"Query: {DESTINATION} with interests: {INTERESTS}")
print("-" * 40)
agentic = agentic_rag_answer(DESTINATION, INTERESTS)
print(f"\n🎉 Agentic RAG Result:\n{agentic}")


### Tips and Next Steps
- Explore how the backend example composes multiple agents and tools in `backend/main.py`.
- Try swapping embeddings models or adding sources to `local_guides.json`.
- With tracing enabled, inspect chains, tools, prompts, and latencies in Arize AX.
