# RAG Agent

The RAG (Retrieval Augmented Generation):

1. Indexing: Loading documents, chunking them, and storing them in a vector database

2. Retrieval: Finding and using relevant chunks during conversation runtime

Note: The quality of a RAG system depends on factors like chunking and retrieval (models, embeddings, etc.). You may have to experiment with those to get the best results.

## Simple RAG Agent

In [2]:
# source: https://microsoft.github.io/autogen/stable/user-guide/agentchat-user-guide/memory.html#:~:text=To%20begin%2C%20let%E2%80%99s%20create%20a%20simple%20document%20indexer%20that%20we%20will%20used%20to%20load%20documents%2C%20chunk%20them%2C%20and%20store%20them%20in%20a%20ChromaDBVectorMemory%20memory%20store.
import re
from typing import List

import aiofiles
import aiohttp
from autogen_core.memory import Memory, MemoryContent, MemoryMimeType


class SimpleDocumentIndexer:
    """Basic document indexer for AutoGen Memory."""

    def __init__(self, memory: Memory, chunk_size: int = 1500) -> None:
        self.memory = memory
        self.chunk_size = chunk_size

    async def _fetch_content(self, source: str) -> str:
        """Fetch content from URL or file."""
        if source.startswith(("http://", "https://")):
            async with aiohttp.ClientSession() as session:
                async with session.get(source) as response:
                    return await response.text()
        else:
            async with aiofiles.open(source, "r", encoding="utf-8") as f:
                return await f.read()

    def _strip_html(self, text: str) -> str:
        """Remove HTML tags and normalize whitespace."""
        text = re.sub(r"<[^>]*>", " ", text)
        text = re.sub(r"\s+", " ", text)
        return text.strip()

    def _split_text(self, text: str) -> List[str]:
        """Split text into fixed-size chunks."""
        chunks: list[str] = []
        # Just split text into fixed-size chunks
        for i in range(0, len(text), self.chunk_size):
            chunk = text[i : i + self.chunk_size]
            chunks.append(chunk.strip())
        return chunks

    async def index_documents(self, sources: List[str]) -> int:
        """Index documents into memory."""
        total_chunks = 0

        for source in sources:
            try:
                content = await self._fetch_content(source)

                # Strip HTML if content appears to be HTML
                if "<" in content and ">" in content:
                    content = self._strip_html(content)

                chunks = self._split_text(content)

                for i, chunk in enumerate(chunks):
                    await self.memory.add(
                        MemoryContent(
                            content=chunk, mime_type=MemoryMimeType.TEXT, metadata={"source": source, "chunk_index": i}
                        )
                    )

                total_chunks += len(chunks)

            except Exception as e:
                print(f"Error indexing {source}: {str(e)}")

        return total_chunks


In [6]:
import os
from pathlib import Path

from autogen_agentchat.agents import AssistantAgent
from autogen_agentchat.ui import Console
from autogen_ext.memory.chromadb import ChromaDBVectorMemory, PersistentChromaDBVectorMemoryConfig
from autogen_ext.models.openai import OpenAIChatCompletionClient

# Initialize vector memory

rag_memory = ChromaDBVectorMemory(
    config=PersistentChromaDBVectorMemoryConfig(
        collection_name="autogen_docs",
        persistence_path=os.path.join(str(Path.home()), ".chromadb_autogen"),
        k=3,  # Return top 3 results
        score_threshold=0.4,  # Minimum similarity score
    )
)

await rag_memory.clear()  # Clear existing memory


# Index AutoGen documentation
indexer = SimpleDocumentIndexer(memory=rag_memory)
sources = [
    "https://microsoft.github.io/autogen/stable/user-guide/agentchat-user-guide/memory.html",
    "https://pypi.org/project/autogenstudio/",
    "https://pypi.org/project/autogen-agentchat/",
    "https://pypi.org/project/autogen-core/",
    "https://pypi.org/project/autogen-ext/",
    "https://microsoft.github.io/autogen/0.2/",
    "https://microsoft.github.io/autogen/autogen/user-guide/autogenstudio-user-guide/index.html",
    "https://microsoft.github.io/autogen/autogen/user-guide/agentchat-user-guide/index.html",
    "https://microsoft.github.io/autogen/autogen/user-guide/core-user-guide/index.html",
    "https://microsoft.github.io/autogen/autogen/user-guide/extensions-user-guide/index.html",
    "https://microsoft.github.io/autogen/autogen/reference/index.html",
    "https://microsoft.github.io/autogen/autogen/user-guide/agentchat-user-guide/migration-guide.html",
    "https://microsoft.github.io/autogen/autogen/user-guide/agentchat-user-guide/quickstart.html",
    "https://microsoft.github.io/autogen/autogen/user-guide/core-user-guide/quickstart.html",
]
chunks: int = await indexer.index_documents(sources)
print(f"Indexed {chunks} chunks from {len(sources)} AutoGen documents")

Indexed 72 chunks from 14 AutoGen documents


In [None]:
# Create our RAG assistant agent
rag_assistant = AssistantAgent(
    name="rag_assistant", model_client=OpenAIChatCompletionClient(model="gpt-5"), memory=[rag_memory]
)

# Ask questions about AutoGen
stream = rag_assistant.run_stream(task="Basic RAG agent in autogen?")
await Console(stream)

# Remember to close the memory when done
await rag_memory.close()

---------- TextMessage (user) ----------
Basic RAG agent in autogen?
---------- MemoryQueryEvent (rag_assistant) ----------
[MemoryContent(content='rom 4 AutoGen documents # Create our RAG assistant agent rag_assistant = AssistantAgent ( name = &quot;rag_assistant&quot; , model_client = OpenAIChatCompletionClient ( model = &quot;gpt-4o&quot; ), memory = [ rag_memory ] ) # Ask questions about AutoGen stream = rag_assistant . run_stream ( task = &quot;What is AgentChat?&quot; ) await Console ( stream ) # Remember to close the memory when done await rag_memory . close () ---------- TextMessage (user) ---------- What is AgentChat? ---------- MemoryQueryEvent (rag_assistant) ---------- [MemoryContent(content=&#39;e of the AssistantAgent , we can now proceed to the next section to learn about the teams feature in AgentChat. previous Messages next Teams On this page Assistant Agent Getting Result Multi-Modal Input Streaming Messages Using Tools and Workbench Built-in Tools and Workbench Funct