# Minimal RAG Agentic AI demo with AutoGen (Python)

### Setup local Python environment.
cd path/to/your/folder

python -m venv venv

venv\Scripts\activate

In [5]:
%pip install -U "autogen-agentchat" "autogen-ext[openai]" chromadb


Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [6]:
#!pip show autogen-agentchat

In [7]:
# OpenAI API Key.

# For Google Colab environment.
#from google.colab import userdata
#key = userdata.get('OPENAI_API_KEY')

# For local environment.
import os

key = os.getenv("OPENAI_API_KEY")

if not key:
    raise ValueError("API key not found. Please set the MY_API_KEY environment variable.")

print("API Key loaded successfully!")

API Key loaded successfully!


### Simple RAG index with Chroma

This is a tiny, self-contained RAG backend: load files from ./data, chunk text, store in Chroma, and expose a rag_search() function.

In [8]:
import os
import glob
import textwrap
from typing import List

import chromadb
from chromadb.utils import embedding_functions

In [9]:
# ---------- 1) Create a Chroma client & embedding function ----------
chroma_client = chromadb.Client()

embedding_fn = embedding_functions.OpenAIEmbeddingFunction(
    #api_key=os.environ["OPENAI_API_KEY"],
    api_key=key,
    model_name="text-embedding-3-small",
)

COLLECTION_NAME = "demo_docs"
collection = None  # will be created inside index_docs

In [10]:
# ---------- 2) Helper: read all text-like files ----------
def read_file(path: str) -> str:
    with open(path, "r", encoding="utf-8", errors="ignore") as f:
        return f.read()


def get_all_files(data_dir: str = "./data") -> List[str]:
    exts = ("*.txt", "*.md")
    files = []
    for ext in exts:
        files.extend(glob.glob(os.path.join(data_dir, ext)))
    return files

In [11]:
# ---------- 3) SAFE chunking implementation ----------
def chunk_text(text: str, max_chars: int = 800, overlap: int = 120) -> List[str]:
    """
    Split text into overlapping chunks, without infinite loops.
    - If text shorter than max_chars: return single chunk.
    - Otherwise: advance forward; for the last chunk, we stop when end == n.
    """
    n = len(text)
    if n <= max_chars:
        return [text]

    chunks = []
    start = 0

    while start < n:
        end = min(start + max_chars, n)
        chunks.append(text[start:end])

        if end == n:
            break  # reached the end safely

        # move forward with overlap
        start = max(0, end - overlap)

    return chunks

In [None]:
# ---------- 4) Index docs into a fresh collection ----------
def index_docs(data_dir: str = "./data"):
    global collection

    files = get_all_files(data_dir)
    if not files:
        raise RuntimeError(f"No .txt/.md files found in {data_dir}")

    # Always recreate the collection for this demo
    try:
        chroma_client.delete_collection(COLLECTION_NAME)
    except Exception:
        pass  # it's fine if it didn't exist yet

    collection = chroma_client.create_collection(
        name=COLLECTION_NAME,
        embedding_function=embedding_fn,
    )

    doc_ids = []
    texts = []
    metadatas = []

    for path in files:
        print(f"Processing {path}")
        raw = read_file(path)
        chunks = chunk_text(raw, max_chars=800, overlap=120)

        for i, ch in enumerate(chunks):
            doc_ids.append(f"{os.path.basename(path)}__{i}")
            texts.append(ch)
            metadatas.append({"source": os.path.basename(path), "chunk": i})

    collection.add(ids=doc_ids, documents=texts, metadatas=metadatas)
    print(f"Indexed {len(texts)} chunks from {len(files)} files.")


# Call this once after your docs are in ./data
index_docs("./data")

### Define a RAG tool (rag_search) for the agent

In [13]:
async def rag_search(query: str, top_k: int = 4) -> str:
    """
    Tool: search internal docs for relevant snippets.
    Returns a concatenated context string the model can use.
    """
    results = collection.query(
        query_texts=[query],
        n_results=top_k,
    )

    docs = results.get("documents", [[]])[0]
    metas = results.get("metadatas", [[]])[0]

    if not docs:
        return "No relevant context found in the knowledge base."

    formatted_chunks = []
    for doc, meta in zip(docs, metas):
        src = meta.get("source", "unknown")
        chunk_id = meta.get("chunk", 0)
        snippet = textwrap.shorten(doc.replace("\n", " "), width=400, placeholder=" ...")
        formatted_chunks.append(f"[{src}#{chunk_id}] {snippet}")

    return "\n".join(formatted_chunks)


### Create the RAG assistant (new autogen_agentchat style)

In [14]:
import asyncio
from autogen_agentchat.agents import AssistantAgent
from autogen_agentchat.messages import TextMessage
from autogen_ext.models.openai import OpenAIChatCompletionClient


In [15]:
# ---------- 1) Model client ----------
model_client = OpenAIChatCompletionClient(
    model="gpt-4.1-mini",   # or gpt-4o, gpt-4.1, etc.
    api_key=key,
)

# ---------- 2) RAG Answering Agent ----------
rag_agent = AssistantAgent(
    name="rag_assistant",
    model_client=model_client,
    system_message=(
        "You are a Q&A assistant over our internal security documentation.\n"
        "- Use the `rag_search` tool to fetch relevant context.\n"
        "- Ground your answers ONLY in that context.\n"
        "- If the answer is not in context, say you don't know.\n"
    ),
    tools=[rag_search],          # <-- our RAG tool
    max_tool_iterations=2,       # let it call the tool at most twice
)


### Helper to pull out the final answer text.

In [16]:
from autogen_agentchat.base import TaskResult

def extract_last_text(result: TaskResult) -> str:
    # Find the last TextMessage in the task result
    for msg in reversed(result.messages):
        if isinstance(msg, TextMessage):
            return msg.content
    return ""


### Optional second agent: Writer / Refiner.

In [17]:
writer_agent = AssistantAgent(
    name="writer_agent",
    model_client=model_client,
    system_message=(
        "You are a senior technical writer.\n"
        "Rewrite answers for the specified audience in clear, simple language.\n"
        "Keep it concise and structured with bullets where useful."
    ),
)


### Refinement helper.

In [18]:
async def refine_for_audience(raw_answer: str,
                              audience: str = "non-technical business stakeholder",
                              max_words: int = 200) -> str:
    prompt = f"""
    Here is an AI-generated answer:

    ---
    {raw_answer}
    ---

    TASK:
    1. Rewrite this for the audience: {audience}.
    2. Use clear, plain language and avoid heavy jargon.
    3. Keep it under {max_words} words.
    4. Use bullets or short paragraphs for readability.
    """

    result = await writer_agent.run(task=textwrap.dedent(prompt).strip())
    return extract_last_text(result)


### Full end-to-end Agentic RAG flow

This is your agentic pipeline without RetrieveUserProxyAgent:

1. rag_agent uses a RAG tool (rag_search) to get context from Chroma.

2. It generates a grounded answer.

3. writer_agent rewrites the answer for a particular audience.

In [19]:
async def rag_then_refine(question: str):
    print("\n" + "#" * 80)
    print("QUESTION:", question)
    print("#" * 80)

    # 1) Ask the RAG assistant
    task = (
        "Answer the following question using ONLY the internal docs. "
        "ALWAYS call the `rag_search` tool first to fetch context.\n\n"
        f"Question: {question}"
    )
    rag_result = await rag_agent.run(task=task)
    raw_answer = extract_last_text(rag_result)

    print("\n--- RAW RAG ANSWER ---\n")
    print(raw_answer)

    # 2) Refine for executives
    refined = await refine_for_audience(
        raw_answer,
        audience="C-level executive with limited technical background",
        max_words=180,
    )

    print("\n--- REFINED FOR EXECUTIVES ---\n")
    print(refined)
    print("\n" + "#" * 80 + "\n")

    return raw_answer, refined


# Run one or two demo questions
async def main():
    await rag_then_refine("What are the main steps in our security review process?")
    await rag_then_refine("Summarize the key benefits of our security policy.")

await main()



################################################################################
QUESTION: What are the main steps in our security review process?
################################################################################

--- RAW RAG ANSWER ---

Answer the following question using ONLY the internal docs. ALWAYS call the `rag_search` tool first to fetch context.

Question: What are the main steps in our security review process?

--- REFINED FOR EXECUTIVES ---

Certainly! Here's a clear, concise version for a C-level executive:

---

**Main Steps in Our Security Review Process**

- **Step 1: Gather Information**  
  We start by collecting all relevant data using a dedicated search tool to ensure we have full context.

- **Step 2: Analyze Risks**  
  Our team reviews the information to identify any potential security risks or vulnerabilities.

- **Step 3: Evaluate Controls**  
  We check existing security measures to see if they effectively address the identified risks.

- **Step 