# RAG as tool

# Setup

In [1]:
import json
from typing import Any


from conversational_toolkit.llms.base import LLMMessage, Roles
from conversational_toolkit.tools.base import Tool
from conversational_toolkit.chunking.base import Chunk
from conversational_toolkit.llms.openai import OpenAILLM
from conversational_toolkit.embeddings.sentence_transformer import (
    SentenceTransformerEmbeddings,
)
from conversational_toolkit.retriever.vectorstore_retriever import VectorStoreRetriever

from sme_kt_zh_collaboration_rag.feature0_baseline_rag import (
    load_chunks,
    build_llm,
    build_vector_store,
    VS_PATH,
    EMBEDDING_MODEL,
)

Consider using the pymupdf_layout package for a greatly improved page layout analysis.


  from .autonotebook import tqdm as notebook_tqdm


In [None]:
chunks = load_chunks(max_files=5)
embedding_model = SentenceTransformerEmbeddings(model_name=EMBEDDING_MODEL)
db_chroma = await build_vector_store(
    chunks, embedding_model, db_path=VS_PATH, reset=False
)
vector_store = VectorStoreRetriever(embedding_model, db_chroma, top_k=5)

2026-02-26 17:44:01.693 | INFO     | sme_kt_zh_collaboration_rag.feature0_baseline_rag:load_chunks:202 - Chunking 5 files from C:\Users\sieverin\SDSC\Code\sme-kt-zh-collaboration-rag\data


5


2026-02-26 17:44:01.962 | DEBUG    | sme_kt_zh_collaboration_rag.feature0_baseline_rag:load_chunks:214 -   ART_internal_procurement_policy.pdf: 12 chunks
2026-02-26 17:44:02.169 | DEBUG    | sme_kt_zh_collaboration_rag.feature0_baseline_rag:load_chunks:214 -   ART_logylight_incomplete_datasheet.pdf: 6 chunks
2026-02-26 17:44:02.290 | DEBUG    | sme_kt_zh_collaboration_rag.feature0_baseline_rag:load_chunks:214 -   ART_product_catalog.pdf: 7 chunks
2026-02-26 17:44:02.323 | DEBUG    | sme_kt_zh_collaboration_rag.feature0_baseline_rag:load_chunks:214 -   ART_product_overview.xlsx: 1 chunks
2026-02-26 17:44:02.434 | DEBUG    | sme_kt_zh_collaboration_rag.feature0_baseline_rag:load_chunks:214 -   ART_relicyc_logypal1_datasheet_2021.pdf: 5 chunks
2026-02-26 17:44:02.434 | INFO     | sme_kt_zh_collaboration_rag.feature0_baseline_rag:load_chunks:218 - Done, 31 chunks total
2026-02-26 17:44:04.741 | DEBUG    | conversational_toolkit.embeddings.sentence_transformer:__init__:57 - Sentence Transfo

In [3]:
llm = build_llm(backend="openai")

2026-02-26 17:44:05.965 | INFO     | sme_kt_zh_collaboration_rag.feature0_baseline_rag:build_llm:137 - LLM backend: OpenAI (gpt-4o-mini)
2026-02-26 17:44:06.178 | DEBUG    | conversational_toolkit.llms.openai:__init__:63 - OpenAI LLM loaded: gpt-4o-mini; temperature: 0.3; seed: 42; tools: None; tool_choice: None; response_format: {'type': 'text'}


# Create the tool

First, let's create a tool that sends back to the LLM the relevant chunks, if it is called

In [4]:
def chunks_to_text(chunks: list[Chunk]) -> str:
    text = ""

    for chunk in chunks:
        text += (
            f"## Chunk {chunk.title}:\n```\n{chunk.content}\n```\n" + "-" * 30 + "\n\n"
        )

    text = text[:-4]

    return text

In [5]:
class RetrieveRelevantChunks(Tool):
    def __init__(
        self, name: str, description: str, parameters: dict[str, Any], retriever
    ):
        self.name = name
        self.description = description
        self.parameters = parameters
        self.retriever = retriever

    async def call(self, args: dict[str, Any]) -> dict[str, Any]:
        query_with_history = args.get("query")

        retrieved = [await self.retriever.retrieve(q) for q in [query_with_history]]

        retrieved_as_text = [chunks_to_text(r) for r in retrieved]

        return {"result": retrieved_as_text}

In [6]:
retriever_tool = RetrieveRelevantChunks(
    name="retrieve_relevant_chunks",
    description="Retrieves the most relevant chunks based on a query.",
    # What parameters it expects
    parameters={
        "type": "object",
        "properties": {
            "query": {
                "type": "string",
                "description": "The query to retrieve relevant chunks for.",
            },
        },
        "required": ["query"],
        "additionalProperties": False,
    },
    retriever=vector_store,
)

In [7]:
# Test if it works
result = await retriever_tool.call(
    {"query": "Which pallets in our portfolio have a third-party verified EPD?"}
)

print(result["result"])

2026-02-26 17:44:06.259 | DEBUG    | conversational_toolkit.embeddings.sentence_transformer:get_embeddings:76 - sentence-transformers/all-MiniLM-L6-v2 embeddings size: (1, 384)


['## Chunk ### THIRD-PARTY VERIFICATION:\n```\n### THIRD-PARTY VERIFICATION\n\nIndependent third-party verification of the declaration and data,\n\naccording to ISO 14025:2006, via:\n\n [X] EPD verification by accredited certification body\n\n Third-party verification: Bureau Veritas Italia S.p.A. is an approved certification\n\nbody accountable for the third-party verification\n\n The certification body is accredited by:\n\nAccredia (N0009PRD)\n\n Procedure for follow-up of data during EPD validity involves third-party verifier:\n\n [X] Yes [ ] No\n\n An EPD should provide current information, and may be updated if conditions\n\nchange. The stated validity is therefore subject to the continued registration\n\nand publication at www.environdec.com.\n\n EPDs within the same product category but from different programmes may not\n\nbe comparable.\n\n EPD owner has the sole ownership, liability and responsibility of the EPD.\n\nThe environmental impacts of different EPDs can be compared o

# Provide the tool to the LLM

In [8]:
system_prompt = """You are a helpful assistant that answers questions.

You have access to the following tool:
- retrieve_relevant_chunks: Retrieves the most relevant chunks based on a query.

Only use the tool if it's relevant, else answer based on your own knowledge. Always try to use the tool if you think it can help you answer the question better.

If you use the tool, follow these guidelines:
- Use the chunks as your only source of truth. Do not rely on outside knowledge.
- Use all relevant chunks when forming your answer. Do not ignore any provided information.
- If the answer cannot be found in the chunks, clearly say that you do not know.
- Keep your answer concise and focused, without unnecessary details.
- Cite your sources from the provided chunks."""

prompt_message = LLMMessage(content=system_prompt, role=Roles.SYSTEM)

prompt_template = """# User question:\n{question}\n\nYour answer:\n\n"""

In [9]:
llm = OpenAILLM(tools=[retriever_tool], tool_choice="auto")

2026-02-26 17:44:06.691 | DEBUG    | conversational_toolkit.llms.openai:__init__:63 - OpenAI LLM loaded: gpt-4o-mini; temperature: 0.5; seed: 42; tools: [<__main__.RetrieveRelevantChunks object at 0x000001C30C047BF0>]; tool_choice: auto; response_format: {'type': 'text'}


# Test the tool

## General Question

In [10]:
query = "What is Einstein's theory of relativity? Answer concisely in 2-3 sentences."
user_message = LLMMessage(role=Roles.USER, content=query)

response = await llm.generate(conversation=[prompt_message, user_message])

2026-02-26 17:44:13.412 | DEBUG    | conversational_toolkit.llms.openai:generate:87 - Completion: ChatCompletion(id='chatcmpl-DDYl9GYx4N1out3LWESJzVdYr9yVz', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content="Einstein's theory of relativity consists of two parts: special relativity and general relativity. Special relativity, proposed in 1905, establishes that the laws of physics are the same for all non-accelerating observers and introduces the concept that the speed of light is constant in a vacuum. General relativity, published in 1915, extends this idea to include gravity, describing it as the curvature of spacetime caused by mass.", refusal=None, role='assistant', annotations=[], audio=None, function_call=None, tool_calls=None))], created=1772124251, model='gpt-4o-mini-2024-07-18', object='chat.completion', service_tier='default', system_fingerprint='fp_3ee6fe3e89', usage=CompletionUsage(completion_tokens=91, prompt_tokens=230, tota

In [11]:
# Here the LLM should not call the tool, as it can answer based on its own knowledge
response

LLMMessage(content="Einstein's theory of relativity consists of two parts: special relativity and general relativity. Special relativity, proposed in 1905, establishes that the laws of physics are the same for all non-accelerating observers and introduces the concept that the speed of light is constant in a vacuum. General relativity, published in 1915, extends this idea to include gravity, describing it as the curvature of spacetime caused by mass.", role=<Roles.ASSISTANT: 'assistant'>, tool_calls=[], tool_call_id=None, name=None)

## Normal Question

In [12]:
# Let's ask a question about PrimePack AG
query = "Which pallets in our portfolio have a third-party verified EPD?"
user_message = LLMMessage(role=Roles.USER, content=query)

response = await llm.generate(conversation=[prompt_message, user_message])

2026-02-26 17:44:14.338 | DEBUG    | conversational_toolkit.llms.openai:generate:87 - Completion: ChatCompletion(id='chatcmpl-DDYlBpODCT1Vpd2NhOXUB5DtGK6xq', choices=[Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=[], audio=None, function_call=None, tool_calls=[ChatCompletionMessageFunctionToolCall(id='call_5P1l117kIvfuCy9nzpqtmuPy', function=Function(arguments='{"query":"third-party verified EPD pallets portfolio"}', name='retrieve_relevant_chunks'), type='function')]))], created=1772124253, model='gpt-4o-mini-2024-07-18', object='chat.completion', service_tier='default', system_fingerprint='fp_373a14eb6f', usage=CompletionUsage(completion_tokens=22, prompt_tokens=223, total_tokens=245, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0

In [13]:
# Here the LLM indeed calls the tool
# Note that the query is not the same, the LLM rewrote it
response

LLMMessage(content='', role=<Roles.ASSISTANT: 'assistant'>, tool_calls=[ToolCall(id='call_5P1l117kIvfuCy9nzpqtmuPy', function=Function(name='retrieve_relevant_chunks', arguments='{"query":"third-party verified EPD pallets portfolio"}'), type='function')], tool_call_id=None, name=None)

In [14]:
results = {}

for tool_call in response.tool_calls:
    tool_name = tool_call.function.name
    tool_args = tool_call.function.arguments

    tool = next((t for t in llm.tools if t.name == tool_name), None)

    if tool is not None:
        tools_args_json = json.loads(tool_args)
        tool_result = await tool.call(tools_args_json)

        results[tool_name] = tool_result

print(results["retrieve_relevant_chunks"]["result"])

2026-02-26 17:44:14.381 | DEBUG    | conversational_toolkit.embeddings.sentence_transformer:get_embeddings:76 - sentence-transformers/all-MiniLM-L6-v2 embeddings size: (1, 384)


['## Chunk ## Q1: portfolio_scope:\n```\n## Q1: portfolio_scope\n\n**Question:** Does PrimePack AG offer a product called the "Lara Pallet"?\n\n**Expected answer:** No. The Lara Pallet is not part of PrimePack AG\'s portfolio. The product catalog explicitly lists it under products that are *not* offered. The active pallet portfolio consists of: Noé Pallet (32-100, CPR System), Wooden Pallet 1208 (32-101, CPR System), Recycled Plastic Pallet (32-102, CPR System), Logypal 1 (32-103, Relicyc), LogyLight (32-104, Relicyc), and EP 08 (32-105, StabilPlastik). If a customer asks about the Lara Pallet, the correct response is to refer them to the current product catalog.\n\n**Primary source:** `data/artificial_markdown/ART_product_catalog.md`\n\n**Failure mode to watch:** The system invents a description for the Lara Pallet — e.g. by extrapolating from other pallet documents — instead of stating the product does not exist.\n\n---\n\n\n```\n------------------------------\n\n## Chunk ### THIRD-P

In [15]:
tools_answers = []

for tool_call in response.tool_calls:
    tool_name = tool_call.function.name
    result = results[tool_name]
    call_id = tool_call.id

    tool_answer = LLMMessage(
        role=Roles.TOOL,
        name=tool_name,
        content=json.dumps(result),
        tool_call_id=call_id,
    )
    tools_answers.append(tool_answer)

In [None]:
conversation = [user_message, response, *tools_answers]

final_response = await llm.generate(conversation)

2026-02-26 17:44:18.218 | DEBUG    | conversational_toolkit.llms.openai:generate:87 - Completion: ChatCompletion(id='chatcmpl-DDYlCtIoqEA9TSuKYaS3naK8cW0V6', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='The pallets in your portfolio that have a third-party verified Environmental Product Declaration (EPD) are:\n\n1. **Logypal 1** - This pallet is classified as distribution packaging and is made of 100% recycled plastic content, which is post-consumer plastic waste. \n\nThe third-party verification is conducted by Bureau Veritas Italia S.p.A., and the EPD is compliant with ISO 14025:2006 standards. \n\nIf you need more specific details or additional pallets, please let me know!', refusal=None, role='assistant', annotations=[], audio=None, function_call=None, tool_calls=None))], created=1772124254, model='gpt-4o-mini-2024-07-18', object='chat.completion', service_tier='default', system_fingerprint='fp_373a14eb6f', usage=CompletionUsa

In [17]:
print(final_response.content)

The pallets in your portfolio that have a third-party verified Environmental Product Declaration (EPD) are:

1. **Logypal 1** - This pallet is classified as distribution packaging and is made of 100% recycled plastic content, which is post-consumer plastic waste. 

The third-party verification is conducted by Bureau Veritas Italia S.p.A., and the EPD is compliant with ISO 14025:2006 standards. 

If you need more specific details or additional pallets, please let me know!


--------------------------