# RAG in an agent


# Setup

In [1]:
from typing import Any


from conversational_toolkit.llms.base import LLMMessage, Roles
from conversational_toolkit.tools.base import Tool
from conversational_toolkit.chunking.base import Chunk
from conversational_toolkit.llms.openai import OpenAILLM
from conversational_toolkit.embeddings.sentence_transformer import (
    SentenceTransformerEmbeddings,
)
from conversational_toolkit.retriever.vectorstore_retriever import VectorStoreRetriever
from conversational_toolkit.agents.tool_agent import ToolAgent, QueryWithContext

from sme_kt_zh_collaboration_rag.feature0_baseline_rag import (
    load_chunks,
    build_llm,
    build_vector_store,
    VS_PATH,
    EMBEDDING_MODEL,
)

Consider using the pymupdf_layout package for a greatly improved page layout analysis.


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
chunks = load_chunks(max_files=5)
embedding_model = SentenceTransformerEmbeddings(model_name=EMBEDDING_MODEL)
db_chroma = await build_vector_store(
    chunks, embedding_model, db_path=VS_PATH, reset=False
)
vector_store = VectorStoreRetriever(embedding_model, db_chroma, top_k=5)

2026-02-26 17:51:17.024 | INFO     | sme_kt_zh_collaboration_rag.feature0_baseline_rag:load_chunks:202 - Chunking 5 files from C:\Users\sieverin\SDSC\Code\sme-kt-zh-collaboration-rag\data


5


2026-02-26 17:51:17.285 | DEBUG    | sme_kt_zh_collaboration_rag.feature0_baseline_rag:load_chunks:214 -   ART_internal_procurement_policy.pdf: 12 chunks
2026-02-26 17:51:17.469 | DEBUG    | sme_kt_zh_collaboration_rag.feature0_baseline_rag:load_chunks:214 -   ART_logylight_incomplete_datasheet.pdf: 6 chunks
2026-02-26 17:51:17.569 | DEBUG    | sme_kt_zh_collaboration_rag.feature0_baseline_rag:load_chunks:214 -   ART_product_catalog.pdf: 7 chunks
2026-02-26 17:51:17.578 | DEBUG    | sme_kt_zh_collaboration_rag.feature0_baseline_rag:load_chunks:214 -   ART_product_overview.xlsx: 1 chunks
2026-02-26 17:51:17.674 | DEBUG    | sme_kt_zh_collaboration_rag.feature0_baseline_rag:load_chunks:214 -   ART_relicyc_logypal1_datasheet_2021.pdf: 5 chunks
2026-02-26 17:51:17.674 | INFO     | sme_kt_zh_collaboration_rag.feature0_baseline_rag:load_chunks:218 - Done, 31 chunks total
2026-02-26 17:51:19.708 | DEBUG    | conversational_toolkit.embeddings.sentence_transformer:__init__:57 - Sentence Transfo

In [4]:
llm = build_llm(backend="openai")

2026-02-26 17:51:19.914 | INFO     | sme_kt_zh_collaboration_rag.feature0_baseline_rag:build_llm:137 - LLM backend: OpenAI (gpt-4o-mini)
2026-02-26 17:51:20.127 | DEBUG    | conversational_toolkit.llms.openai:__init__:63 - OpenAI LLM loaded: gpt-4o-mini; temperature: 0.3; seed: 42; tools: None; tool_choice: None; response_format: {'type': 'text'}


In [5]:
def chunks_to_text(chunks: list[Chunk]) -> str:
    text = ""

    for chunk in chunks:
        text += (
            f"## Chunk {chunk.title}:\n```\n{chunk.content}\n```\n" + "-" * 30 + "\n\n"
        )

    text = text[:-4]

    return text

In [6]:
class RetrieveRelevantChunks(Tool):
    def __init__(
        self, name: str, description: str, parameters: dict[str, Any], retriever
    ):
        self.name = name
        self.description = description
        self.parameters = parameters
        self.retriever = retriever

    async def call(self, args: dict[str, Any]) -> dict[str, Any]:
        query_with_history = args.get("query")

        retrieved = [await self.retriever.retrieve(q) for q in [query_with_history]]

        retrieved_as_text = [chunks_to_text(r) for r in retrieved]

        return {"result": retrieved_as_text}

In [7]:
retriever_tool = RetrieveRelevantChunks(
    name="retrieve_relevant_chunks",
    description="Retrieves the most relevant chunks based on a query.",
    # What parameters it expects
    parameters={
        "type": "object",
        "properties": {
            "query": {
                "type": "string",
                "description": "The query to retrieve relevant chunks for.",
            },
        },
        "required": ["query"],
        "additionalProperties": False,
    },
    retriever=vector_store,
)

# Define the Agent

In [8]:
llm = OpenAILLM(tool_choice="auto", tools=[retriever_tool])

# Define the prompt
prompt = "You are a helpful assistant, answer shortly. Use the tools only when they are relevant, but if you do so, trust the results from the tools and use them in your answer, cite them precisely if you use them."
prompt_as_message = LLMMessage(content=prompt, role=Roles.SYSTEM)

2026-02-26 17:51:20.374 | DEBUG    | conversational_toolkit.llms.openai:__init__:63 - OpenAI LLM loaded: gpt-4o-mini; temperature: 0.5; seed: 42; tools: [<__main__.RetrieveRelevantChunks object at 0x0000023D47757D70>]; tool_choice: auto; response_format: {'type': 'text'}


In [9]:
rag_agent = ToolAgent(system_prompt=prompt, llm=llm, max_steps=5)

# Test the agent

## First Simple Question

In [10]:
conversation = [prompt_as_message]

In [11]:
query = "What is a Einstein theory of relativity in the context of physics?"
query_as_message = LLMMessage(content=query, role=Roles.USER)
query_with_context = QueryWithContext(query=query, history=[])

answer = await rag_agent.answer(query_with_context)

2026-02-26 17:51:25.824 | DEBUG    | conversational_toolkit.agents.tool_agent:answer_stream:106 - [{'content': "Einstein's theory of relativity consists of two main parts: special relativity and general relativity.\n\n1. **Special Relativity (1905)**: This theory addresses the physics of objects moving at constant speeds, particularly at speeds close to the speed of light. It introduces the concepts of time dilation (time moving slower for objects in motion relative to a stationary observer) and length contraction (objects appearing shorter in the direction of motion). A key result of special relativity is the equation \\(E=mc^2\\), which expresses the equivalence of mass and energy.\n\n2. **General Relativity (1915)**: This theory extends the principles of special relativity to include acceleration and gravity. It posits that gravity is not a force in the traditional sense but a curvature of spacetime caused by mass. Massive objects like planets and stars warp the fabric of spacetime,

In [12]:
print(answer.content)

Einstein's theory of relativity consists of two main parts: special relativity and general relativity.

1. **Special Relativity (1905)**: This theory addresses the physics of objects moving at constant speeds, particularly at speeds close to the speed of light. It introduces the concepts of time dilation (time moving slower for objects in motion relative to a stationary observer) and length contraction (objects appearing shorter in the direction of motion). A key result of special relativity is the equation \(E=mc^2\), which expresses the equivalence of mass and energy.

2. **General Relativity (1915)**: This theory extends the principles of special relativity to include acceleration and gravity. It posits that gravity is not a force in the traditional sense but a curvature of spacetime caused by mass. Massive objects like planets and stars warp the fabric of spacetime, causing other objects to follow curved paths, which we perceive as gravitational attraction.

Together, these theorie

In [13]:
conversation += [query_as_message, answer]

## Our Question

In [14]:
query = "Which pallets in our portfolio have a third-party verified EPD?"
query_as_message = LLMMessage(content=query, role=Roles.USER)
query_with_context = QueryWithContext(query=query, history=conversation)

answer = await rag_agent.answer(query_with_context)

2026-02-26 17:51:27.052 | DEBUG    | conversational_toolkit.embeddings.sentence_transformer:get_embeddings:76 - sentence-transformers/all-MiniLM-L6-v2 embeddings size: (1, 384)
2026-02-26 17:51:28.998 | DEBUG    | conversational_toolkit.agents.tool_agent:answer_stream:106 - [{'content': '', 'tool_calls': [ToolCall(id='call_eXUL3QnVyoKallDnATIkqckp', function=Function(name='retrieve_relevant_chunks', arguments='{"query":"pallets in portfolio with third-party verified EPD"}'), type='function')], 'role': <Roles.ASSISTANT: 'assistant'>, 'function_name': 'llm'}, {'result': ['## Chunk ## Q1: portfolio_scope:\n```\n## Q1: portfolio_scope\n\n**Question:** Does PrimePack AG offer a product called the "Lara Pallet"?\n\n**Expected answer:** No. The Lara Pallet is not part of PrimePack AG\'s portfolio. The product catalog explicitly lists it under products that are *not* offered. The active pallet portfolio consists of: Noé Pallet (32-100, CPR System), Wooden Pallet 1208 (32-101, CPR System), Recy

In [15]:
print(answer.content)

The pallets in your portfolio that have a third-party verified Environmental Product Declaration (EPD) are:

1. **Logypal 1** - This pallet is made from 100% recycled plastic content.

These EPDs have been verified by accredited certification bodies, ensuring compliance with ISO 14025:2006 standards.


In [16]:
conversation += [query_as_message, answer]

# Test Memory

In [17]:
query = "Summarize the conversation (some words per message)?"
query_as_message = LLMMessage(content=query, role=Roles.USER)
query_with_context = QueryWithContext(query=query, history=conversation)

answer = await rag_agent.answer(query_with_context)

2026-02-26 17:51:31.498 | DEBUG    | conversational_toolkit.agents.tool_agent:answer_stream:106 - [{'content': "1. **User**: Asked about Einstein's theory of relativity in physics.\n2. **Assistant**: Explained special relativity (time dilation, length contraction, \\(E=mc^2\\)) and general relativity (gravity as spacetime curvature).\n3. **User**: Inquired about pallets with third-party verified EPD.\n4. **Assistant**: Listed Logypal 1 as the pallet with a third-party verified EPD made from 100% recycled plastic.", 'tool_calls': [], 'role': <Roles.ASSISTANT: 'assistant'>, 'function_name': 'llm'}]


In [18]:
print(answer.content)

1. **User**: Asked about Einstein's theory of relativity in physics.
2. **Assistant**: Explained special relativity (time dilation, length contraction, \(E=mc^2\)) and general relativity (gravity as spacetime curvature).
3. **User**: Inquired about pallets with third-party verified EPD.
4. **Assistant**: Listed Logypal 1 as the pallet with a third-party verified EPD made from 100% recycled plastic.


----------------