# RAG as subagent

# Setup

In [None]:
from typing import Any


from conversational_toolkit.llms.base import LLMMessage, Roles
from conversational_toolkit.tools.base import Tool
from conversational_toolkit.chunking.base import Chunk
from conversational_toolkit.llms.openai import OpenAILLM
from conversational_toolkit.embeddings.sentence_transformer import (
    SentenceTransformerEmbeddings,
)
from conversational_toolkit.retriever.vectorstore_retriever import VectorStoreRetriever
from conversational_toolkit.agents.tool_agent import ToolAgent, QueryWithContext

from sme_kt_zh_collaboration_rag.feature0_baseline_rag import (
    load_chunks,
    build_llm,
    build_vector_store,
    VS_PATH,
    EMBEDDING_MODEL,
)

In [6]:
chunks = load_chunks(max_files=5)
embedding_model = SentenceTransformerEmbeddings(model_name=EMBEDDING_MODEL)
db_chroma = await build_vector_store(
    chunks, embedding_model, db_path=VS_PATH, reset=False
)
vector_store = VectorStoreRetriever(embedding_model, db_chroma, top_k=5)

2026-02-26 17:58:52.249 | INFO     | sme_kt_zh_collaboration_rag.feature0_baseline_rag:load_chunks:202 - Chunking 5 files from C:\Users\sieverin\SDSC\Code\sme-kt-zh-collaboration-rag\data


5


2026-02-26 17:58:52.525 | DEBUG    | sme_kt_zh_collaboration_rag.feature0_baseline_rag:load_chunks:214 -   ART_internal_procurement_policy.pdf: 12 chunks
2026-02-26 17:58:52.705 | DEBUG    | sme_kt_zh_collaboration_rag.feature0_baseline_rag:load_chunks:214 -   ART_logylight_incomplete_datasheet.pdf: 6 chunks
2026-02-26 17:58:52.805 | DEBUG    | sme_kt_zh_collaboration_rag.feature0_baseline_rag:load_chunks:214 -   ART_product_catalog.pdf: 7 chunks
2026-02-26 17:58:52.819 | DEBUG    | sme_kt_zh_collaboration_rag.feature0_baseline_rag:load_chunks:214 -   ART_product_overview.xlsx: 1 chunks
2026-02-26 17:58:52.911 | DEBUG    | sme_kt_zh_collaboration_rag.feature0_baseline_rag:load_chunks:214 -   ART_relicyc_logypal1_datasheet_2021.pdf: 5 chunks
2026-02-26 17:58:52.912 | INFO     | sme_kt_zh_collaboration_rag.feature0_baseline_rag:load_chunks:218 - Done, 31 chunks total
2026-02-26 17:58:54.707 | DEBUG    | conversational_toolkit.embeddings.sentence_transformer:__init__:57 - Sentence Transfo

In [7]:
llm = build_llm(backend="openai")

2026-02-26 17:58:54.770 | INFO     | sme_kt_zh_collaboration_rag.feature0_baseline_rag:build_llm:137 - LLM backend: OpenAI (gpt-4o-mini)
2026-02-26 17:58:54.978 | DEBUG    | conversational_toolkit.llms.openai:__init__:63 - OpenAI LLM loaded: gpt-4o-mini; temperature: 0.3; seed: 42; tools: None; tool_choice: None; response_format: {'type': 'text'}


In [8]:
def chunks_to_text(chunks: list[Chunk]) -> str:
    text = ""

    for chunk in chunks:
        text += (
            f"## Chunk {chunk.title}:\n```\n{chunk.content}\n```\n" + "-" * 30 + "\n\n"
        )

    text = text[:-4]

    return text

In [9]:
class RetrieveRelevantChunks(Tool):
    def __init__(
        self, name: str, description: str, parameters: dict[str, Any], retriever
    ):
        self.name = name
        self.description = description
        self.parameters = parameters
        self.retriever = retriever

    async def call(self, args: dict[str, Any]) -> dict[str, Any]:
        query_with_history = args.get("query")

        retrieved = [await self.retriever.retrieve(q) for q in [query_with_history]]

        retrieved_as_text = [chunks_to_text(r) for r in retrieved]

        return {"result": retrieved_as_text}

In [10]:
retriever_tool = RetrieveRelevantChunks(
    name="retrieve_relevant_chunks",
    description="Retrieves the most relevant chunks based on a query.",
    # What parameters it expects
    parameters={
        "type": "object",
        "properties": {
            "query": {
                "type": "string",
                "description": "The query to retrieve relevant chunks for.",
            },
        },
        "required": ["query"],
        "additionalProperties": False,
    },
    retriever=vector_store,
)

# Setup RAG subagent

In [11]:
llm = OpenAILLM(tool_choice="auto", tools=[retriever_tool])

# Define the prompt
prompt = "You are a helpful assistant, answer shortly. Use the tools only when they are relevant, but if you do so, trust the results from the tools and use them in your answer, cite them precisely if you use them."
prompt_as_message = LLMMessage(content=prompt, role=Roles.SYSTEM)

2026-02-26 17:58:55.218 | DEBUG    | conversational_toolkit.llms.openai:__init__:63 - OpenAI LLM loaded: gpt-4o-mini; temperature: 0.5; seed: 42; tools: [<__main__.RetrieveRelevantChunks object at 0x000001D38C02B2F0>]; tool_choice: auto; response_format: {'type': 'text'}


In [12]:
rag_agent = ToolAgent(system_prompt=prompt, llm=llm, max_steps=5)

# Define it as a tool

In [13]:
class RAGAgentAsTool(Tool):
    def __init__(
        self,
        name: str,
        description: str,
        parameters: dict[str, Any],
    ):
        self.name = name
        self.description = description
        self.parameters = parameters

    async def call(self, args: dict[str, Any]) -> dict[str, Any]:
        query = args.get("query")

        answer = await rag_agent.answer(
            query_with_context=QueryWithContext(query=query, history=[])
        )
        answer_as_text = str(answer.content)

        return {"result": answer_as_text}


rag_agent_as_tool_tool = RAGAgentAsTool(
    name="rag_agent_as_tool",
    description="Uses the RAG agent to answer queries about pellets, boxes and so on.",
    parameters={
        "type": "object",
        "properties": {
            "query": {
                "type": "string",
                "description": "The query to retrieve relevant chunks for.",
            },
        },
        "required": ["query"],
        "additionalProperties": False,
    },
)

# Define the Main Agent


In [14]:
llm_main_agent = OpenAILLM(tool_choice="auto", tools=[rag_agent_as_tool_tool])

# Define the prompt, no need to explain the sources
prompt_main_agent = "You are a helpful assistant, answer shortly."
prompt_as_message_main_agent = LLMMessage(content=prompt_main_agent, role=Roles.SYSTEM)

main_agent = ToolAgent(system_prompt=prompt_main_agent, llm=llm_main_agent, max_steps=5)

2026-02-26 17:58:55.463 | DEBUG    | conversational_toolkit.llms.openai:__init__:63 - OpenAI LLM loaded: gpt-4o-mini; temperature: 0.5; seed: 42; tools: [<__main__.RAGAgentAsTool object at 0x000001D38C2904A0>]; tool_choice: auto; response_format: {'type': 'text'}


# Test the agent

## First Simple Question

In [15]:
conversation = [prompt_as_message_main_agent]

In [16]:
query = "What is a Einstein theory of relativity in the context of physics?"
query_as_message = LLMMessage(content=query, role=Roles.USER)
query_with_context = QueryWithContext(query=query, history=[])

answer = await main_agent.answer(query_with_context)

2026-02-26 17:59:00.222 | DEBUG    | conversational_toolkit.agents.tool_agent:answer_stream:106 - [{'content': "Einstein's theory of relativity consists of two main parts: special relativity and general relativity.\n\n1. **Special Relativity (1905)**: This theory addresses the physics of objects moving at constant speeds, particularly at speeds close to the speed of light. It introduces the concepts of time dilation (time moving slower for objects in motion compared to those at rest) and length contraction (objects appearing shorter in the direction of motion). It also establishes the famous equation \\(E=mc^2\\), which shows the equivalence of mass and energy.\n\n2. **General Relativity (1915)**: This theory extends the principles of special relativity to include acceleration and gravity. It describes gravity not as a force but as the curvature of spacetime caused by mass. Massive objects like planets and stars warp the fabric of spacetime, causing other objects to follow curved paths

In [17]:
print(answer.content)

Einstein's theory of relativity consists of two main parts: special relativity and general relativity.

1. **Special Relativity (1905)**: This theory addresses the physics of objects moving at constant speeds, particularly at speeds close to the speed of light. It introduces the concepts of time dilation (time moving slower for objects in motion compared to those at rest) and length contraction (objects appearing shorter in the direction of motion). It also establishes the famous equation \(E=mc^2\), which shows the equivalence of mass and energy.

2. **General Relativity (1915)**: This theory extends the principles of special relativity to include acceleration and gravity. It describes gravity not as a force but as the curvature of spacetime caused by mass. Massive objects like planets and stars warp the fabric of spacetime, causing other objects to follow curved paths, which we perceive as gravitational attraction.

Together, these theories revolutionized our understanding of space, 

In [18]:
conversation += [query_as_message, answer]

## Our Question

In [19]:
query = "Which pallets in our portfolio have a third-party verified EPD?"
query_as_message = LLMMessage(content=query, role=Roles.USER)
query_with_context = QueryWithContext(query=query, history=conversation)

answer = await main_agent.answer(query_with_context)

2026-02-26 17:59:02.130 | DEBUG    | conversational_toolkit.embeddings.sentence_transformer:get_embeddings:76 - sentence-transformers/all-MiniLM-L6-v2 embeddings size: (1, 384)
2026-02-26 17:59:05.565 | DEBUG    | conversational_toolkit.agents.tool_agent:answer_stream:106 - [{'content': '', 'tool_calls': [ToolCall(id='call_XDYNXLXKWU1Z1cVXWwtEE2TL', function=Function(name='retrieve_relevant_chunks', arguments='{"query":"third-party verified EPD pallets"}'), type='function')], 'role': <Roles.ASSISTANT: 'assistant'>, 'function_name': 'llm'}, {'result': ['## Chunk ### THIRD-PARTY VERIFICATION:\n```\n### THIRD-PARTY VERIFICATION\n\nIndependent third-party verification of the declaration and data,\n\naccording to ISO 14025:2006, via:\n\n [X] EPD verification by accredited certification body\n\n Third-party verification: Bureau Veritas Italia S.p.A. is an approved certification\n\nbody accountable for the third-party verification\n\n The certification body is accredited by:\n\nAccredia (N000

In [20]:
print(answer.content)

In your portfolio, the following pallets have a third-party verified EPD:

1. **Logypal 1**: Made of 100% recycled plastic, classified as distribution packaging.
2. **CPR Pallets**: Designed for freight transport, made from high-density polyethylene (PEHD), and are washable and rust-proof.

These EPDs are independently verified according to ISO 14025:2006. For more details, you can check the EPD programme at [environdec.com](http://www.environdec.com).


In [21]:
conversation += [query_as_message, answer]

# Test Memory

In [22]:
query = "Summarize the conversation (some words per message)?"
query_as_message = LLMMessage(content=query, role=Roles.USER)
query_with_context = QueryWithContext(query=query, history=conversation)

answer = await main_agent.answer(query_with_context)

2026-02-26 17:59:10.099 | DEBUG    | conversational_toolkit.agents.tool_agent:answer_stream:106 - [{'content': "1. **User Inquiry**: Asked about Einstein's theory of relativity in physics.\n2. **Assistant Response**: Explained special and general relativity, covering key concepts like time dilation, length contraction, and gravity as spacetime curvature.\n3. **User Inquiry**: Asked which pallets in their portfolio have a third-party verified EPD.\n4. **Assistant Response**: Listed pallets with verified EPD: Logypal 1 and CPR Pallets, mentioning their materials and EPD verification details.", 'tool_calls': [], 'role': <Roles.ASSISTANT: 'assistant'>, 'function_name': 'llm'}]


In [23]:
print(answer.content)

1. **User Inquiry**: Asked about Einstein's theory of relativity in physics.
2. **Assistant Response**: Explained special and general relativity, covering key concepts like time dilation, length contraction, and gravity as spacetime curvature.
3. **User Inquiry**: Asked which pallets in their portfolio have a third-party verified EPD.
4. **Assistant Response**: Listed pallets with verified EPD: Logypal 1 and CPR Pallets, mentioning their materials and EPD verification details.


----------------