# Notebook 3: RAG as a Tool

Add a fourth tool — `search_docs` — and let the agent decide when to search the document vs use a structured lookup vs do both.

## Setup

In [None]:
import json
import sys
from pathlib import Path
from dotenv import load_dotenv
from openai import OpenAI
import chromadb

sys.path.insert(0, str(Path("..").resolve()))
from tools import TOOL_SCHEMAS, TOOL_FUNCTIONS

load_dotenv()
client = OpenAI()
MODEL = "gpt-4o-mini"
EMBEDDING_MODEL = "text-embedding-3-small"
TOP_K = 5

SYSTEM_PROMPT = (
    "You are a helpful assistant for AI Agent Insure, a specialty insurer for "
    "AI systems, autonomous agents, and ML infrastructure. "
    "Use search_docs to look up detailed information from the company document. "
    "Use the other tools for structured lookups (product details, pricing, eligibility). "
    "Call as many tools as you need — do not guess when a tool can give you the answer."
)

## Part 1: Build the RAG index

In [None]:
def load_and_chunk(path: Path) -> list[str]:
    """Load a markdown file and split into sentence-level chunks."""
    doc = path.read_text(encoding="utf-8").strip()
    chunks = [s.strip() for s in doc.replace("\n", " ").split(".") if s.strip()]
    return [c + "." for c in chunks]


data_path = Path("../data/AI_Agent_Insure.md")
chunks = load_and_chunk(data_path)
print(f"Loaded {len(chunks)} chunks from {data_path.name}")

resp = client.embeddings.create(model=EMBEDDING_MODEL, input=chunks)
embeddings = [r.embedding for r in resp.data]

# EphemeralClient = in-memory only, lives for this session.
chroma = chromadb.EphemeralClient()
coll = chroma.create_collection("agent_rag")
coll.add(
    ids=[str(i) for i in range(len(chunks))],
    embeddings=embeddings,
    documents=chunks
)
print("RAG index built and ready.")

## Part 2: Wrap RAG as a tool

In [None]:
def search_docs(query: str) -> str:
    """Embed the query, retrieve top-k chunks from ChromaDB, return as string."""
    q_emb = client.embeddings.create(model=EMBEDDING_MODEL, input=[query])
    q_vec = q_emb.data[0].embedding

    results = coll.query(query_embeddings=[q_vec], n_results=TOP_K)
    retrieved = results["documents"][0]

    if not retrieved:
        return "No relevant content found in the document for that query."

    return "\n---\n".join(retrieved)


SEARCH_DOCS_SCHEMA = {
    "type": "function",
    "function": {
        "name": "search_docs",
        "description": (
            "Search the AI Agent Insure company document for detailed information. "
            "Use this for questions about company background, mission, operational model, "
            "claims process, strategic positioning, or anything not covered by the "
            "other structured tools."
        ),
        "parameters": {
            "type": "object",
            "properties": {
                "query": {
                    "type": "string",
                    "description": "The search query to run against the company document."
                }
            },
            "required": ["query"]
        }
    }
}

# Add search_docs to the existing tool list and dispatch dict.
ALL_SCHEMAS   = TOOL_SCHEMAS + [SEARCH_DOCS_SCHEMA]
ALL_FUNCTIONS = {**TOOL_FUNCTIONS, "search_docs": search_docs}

print(f"Agent has {len(ALL_SCHEMAS)} tools: {[s['function']['name'] for s in ALL_SCHEMAS]}")

## Part 3: Agent loop — same logic, extended tool set

In [None]:
def run_agent(user_question: str, max_iterations: int = 10, verbose: bool = True) -> str:
    """Identical loop to Notebook 2 — only the tool list (ALL_SCHEMAS) has changed."""
    messages = [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user",   "content": user_question}
    ]

    for iteration in range(max_iterations):
        if verbose:
            print(f"--- Iteration {iteration + 1} ---")

        response = client.chat.completions.create(
            model=MODEL,
            messages=messages,
            tools=ALL_SCHEMAS,
            tool_choice="auto",
            temperature=0
        )

        choice = response.choices[0]
        messages.append(choice.message)

        if verbose:
            print(f"  finish_reason: {choice.finish_reason}")

        if choice.finish_reason == "stop":
            return choice.message.content

        if choice.finish_reason == "tool_calls":
            for tc in choice.message.tool_calls:
                fn_name = tc.function.name
                fn_args = json.loads(tc.function.arguments)

                if verbose:
                    print(f"  → Tool    : {fn_name}")
                    print(f"    Args    : {fn_args}")

                result = ALL_FUNCTIONS[fn_name](**fn_args)

                if verbose:
                    # Truncate long RAG results so they don't flood the log.
                    preview = result[:300] + "..." if len(result) > 300 else result
                    print(f"    Result  : {preview}")

                messages.append({
                    "role": "tool",
                    "tool_call_id": tc.id,
                    "content": result
                })

    return f"[Agent stopped after {max_iterations} iterations without a final answer]"

## Part 4: RAG-only question → `search_docs`

In [None]:
answer = run_agent("How does AI Agent Insure handle claims and incident response?")
print()
print("FINAL ANSWER:", answer)

## Part 5: Structured tool question → `get_pricing_estimate`

In [None]:
answer = run_agent("What would Agentic Workflow Uptime Insurance cost for an enterprise?")
print()
print("FINAL ANSWER:", answer)

## Part 6: Both tools — `search_docs` + `get_pricing_estimate`

In [None]:
answer = run_agent(
    "Can you explain AI Agent Insure's underwriting philosophy and tell me "
    "what Model & Data Security Insurance would cost for a startup?"
)
print()
print("FINAL ANSWER:", answer)

## Part 7: Streamlit app

Everything above is packaged into `../app/agent_app.py`.

```bash
cd 6_Agents/app
streamlit run agent_app.py
```

## Key concepts

- RAG is just a tool — the agent decides when to use it
- Tool descriptions drive routing — the model reads them to choose
- Adding a new tool = one schema + one function in the dispatch dict