### Setup local Python environment.
cd path/to/your/folder

python -m venv venv

venv\Scripts\activate

# Minimal RAG + Agentic AI example using Python + OpenAI.

What it shows:
- Building a tiny in-memory vector store with OpenAI embeddings
- An "agent" (LLM) that decides when to call retrieval
- A simple agent loop: ask -> maybe retrieve -> answer

Prereqs:

    pip install openai numpy

Set env:

    export OPENAI_API_KEY="sk-..."

In [None]:
import os
import json
from typing import List, Dict
import numpy as np
from openai import OpenAI

In [None]:
# OpenAI API Key.

# For Google Colab environment.
from google.colab import userdata
key = userdata.get('OPENAI_API_KEY')

# For local environment.
#import os
#
#key = os.getenv("OPENAI_API_KEY")

if not key:
    raise ValueError("API key not found. Please set the MY_API_KEY environment variable.")

print("API Key loaded successfully!")

In [None]:
# ---------- OpenAI client ----------
#client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
client = OpenAI(api_key=key)

EMBEDDING_MODEL = "text-embedding-3-small"   # small, cheap embeddings
CHAT_MODEL = "gpt-4.1-mini"                 # fast, good for tools/agents


In [None]:
# ---------- 1. Tiny knowledge base (you'd replace this with real docs) ----------

KB_DOCS = [
    {
        "id": "doc1",
        "title": "Claims submission process",
        "text": (
            "Customers must submit claims within 30 days of the incident. "
            "They should provide policy number, date of incident, and all supporting documents. "
            "Claims can be submitted via the mobile app or the web portal."
        ),
    },
    {
        "id": "doc2",
        "title": "Fraud detection policy",
        "text": (
            "Suspicious claims are flagged when claim amount is unusually high compared to "
            "customer's historical patterns or when multiple claims are filed in a short time. "
            "Flagged claims go to the special investigations unit."
        ),
    },
    {
        "id": "doc3",
        "title": "Refund and cancellation rules",
        "text": (
            "Policyholders can cancel within the first 15 days for a full refund, "
            "provided no claims have been filed. After that, pro-rated refunds apply."
        ),
    },
]

In [None]:
# ---------- 2. Build simple in-memory vector store ----------

def get_embedding(text: str) -> List[float]:
    """Call OpenAI embeddings API."""
    resp = client.embeddings.create(
        model=EMBEDDING_MODEL,
        input=text,
    )
    return resp.data[0].embedding

print("Building vector store...")
KB_EMBEDDINGS = np.array([get_embedding(doc["text"]) for doc in KB_DOCS])
KB_IDS = [doc["id"] for doc in KB_DOCS]
print("Vector store ready with", len(KB_DOCS), "documents.")


In [None]:
# Function to Search.
def search_knowledge_base(query: str, k: int = 2) -> List[Dict]:
    """Simple cosine-similarity search over KB_DOCS.
    Cosine-similarity search is a method used to find how similar two pieces of text 
    (or any high-dimensional vectors) are, based on the angle between them.
    """
    q_emb = np.array(get_embedding(query))

    doc_norms = np.linalg.norm(KB_EMBEDDINGS, axis=1)
    q_norm = np.linalg.norm(q_emb)
    sims = KB_EMBEDDINGS @ q_emb / (doc_norms * q_norm + 1e-8)

    top_idx = sims.argsort()[-k:][::-1]

    results = []
    for i in top_idx:
        doc = KB_DOCS[i]
        results.append(
            {
                "id": doc["id"],
                "title": doc["title"],
                "score": float(sims[i]),
                "text": doc["text"],
            }
        )
    return results

In [None]:
# Format the result.
def format_retrieval_results(results: List[Dict]) -> str:
    """Turn retrieved docs into a context block for the model."""
    lines = []
    for r in results:
        lines.append(f"[{r['id']}] {r['title']} (score={r['score']:.3f})")
        lines.append(r["text"])
        lines.append("")  # blank line
    return "\n".join(lines)

In [None]:
# ---------- 3. Agent prompt ----------

AGENT_SYSTEM_PROMPT = """
You are an internal knowledge RAG agent for an insurance company.

You have two modes:

1) If you need company knowledge to answer:
   - Respond *only* with a single line:
     CALL_RETRIEVER: {"query": "<short search query>"}

2) If you already have enough information (including any retrieved context):
   - Answer the user clearly.
   - Cite document IDs like [doc1], [doc2] when using internal knowledge.

Never invent the CALL_RETRIEVER line unless you really need more context.
"""


In [None]:
# ---------- 4. Agent loop (Agentic RAG) ----------

def run_rag_agent(user_question: str, max_tool_loops: int = 2) -> str:
    """
    Agentic RAG loop:
        user question -> model may call retriever -> we search + add context -> model answers.
    """

    messages = [
        {"role": "system", "content": AGENT_SYSTEM_PROMPT},
        {"role": "user", "content": user_question},
    ]

    for step in range(max_tool_loops + 1):
        # 1) Ask the model what to do next
        chat_resp = client.chat.completions.create(
            model=CHAT_MODEL,
            messages=messages,
            temperature=0.2,
        )

        reply = chat_resp.choices[0].message.content.strip()
        print(f"\n--- Agent step {step} ---")
        print("MODEL RAW REPLY:\n", reply)

        # 2) If the model calls the retriever, execute tool + loop again
        if reply.startswith("CALL_RETRIEVER:"):
            try:
                json_str = reply.split("CALL_RETRIEVER:", 1)[1].strip()
                tool_args = json.loads(json_str)
                search_query = tool_args["query"]
            except Exception as e:
                # Fallback: treat as normal answer if parsing failed
                print("Failed to parse retriever call:", e)
                return reply

            # Run our retrieval tool
            results = search_knowledge_base(search_query, k=2)
            context_block = format_retrieval_results(results)

            # Add this interaction to the conversation
            messages.append({
                "role": "assistant",
                "content": reply,  # the tool call itself
            })
            messages.append({
                "role": "system",
                "content": f"Retrieved internal knowledge:\n{context_block}",
            })

            # Continue loop; the agent will now answer using this context
            continue

        # 3) If there's no retriever call, we treat this as the final answer
        return reply

    return "Sorry, I couldn't complete the reasoning in the allowed steps."


In [None]:
# ---------- 5. Small demo ----------

if __name__ == "__main__":
    question = (
        "A customer filed a claim 40 days after the incident and wants a refund "
        "for cancelling the policy. What should we tell them? Use internal rules."
    )
    final_answer = run_rag_agent(question)
    print("\n=== FINAL ANSWER ===")
    print(final_answer)