In [None]:
import os
from abc import ABC, abstractmethod
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_chroma import Chroma
from langchain_core.tools import Tool
from langchain.tools.retriever import create_retriever_tool
from langchain.schema import Document
from dotenv import load_dotenv

load_dotenv()


In [None]:
def summarize_documents(docs: list["Document"]) -> str:
    """
    Summarize the given documents in one to two sentences.
    """
    llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
    all_text = "\n".join(doc.page_content for doc in docs)
    prompt = f"Please summarize the following text in 1-2 sentences:\n---\n{all_text}\n---"
    summary = llm.invoke(prompt)
    return summary.content.strip()


In [None]:
class IVectorStoreObserver(ABC):
    """
    Ein Interface für alle Observer, die benachrichtigt werden möchten,
    sobald ein VectorStore neue Dokumente bekommt / aktualisiert wird.
    """
    @abstractmethod
    def on_vectorstore_update(self, manager: "SingleVectorStoreManager"):
        pass


In [None]:
class SingleVectorStoreManager:
    def __init__(self, persist_dir: str):
        self.embedding_function = OpenAIEmbeddings()
        self.persist_dir = persist_dir

        collection_name = os.path.basename(persist_dir)
        self.vs = Chroma(
            collection_name=collection_name,
            embedding_function=self.embedding_function,
            persist_directory=self.persist_dir
        )

        self.description = "Dieser Vectorstore ist leer."

        self.observers: list[IVectorStoreObserver] = []

    def add_observer(self, observer: IVectorStoreObserver):
        self.observers.append(observer)

    def remove_observer(self, observer: IVectorStoreObserver):
        if observer in self.observers:
            self.observers.remove(observer)

    def notify_observers(self):
        for obs in self.observers:
            obs.on_vectorstore_update(self)

    def is_empty(self) -> bool:
        return (self.vs._collection.count() == 0)

    def create_retriever_tool(self, name: str, custom_description: str | None = None) -> Tool:

        retriever = self.vs.as_retriever()
        desc = custom_description if custom_description else self.description
        if self.is_empty():
            desc += "\n(Hinweis: Dieser Vectorstore ist aktuell leer.)"

        tool = create_retriever_tool(
            retriever=retriever,
            name=name,
            description=desc
        )
        return tool

    def add_documents(self, docs: list[Document], update_description: bool = True):

        self.vs.add_documents(docs)
        if update_description:
            summary_text = summarize_documents(docs)
            if self.is_empty():
                pass
            self.description = summary_text
        self.notify_observers()

In [None]:
from langchain_core.tools import Tool
from langchain_openai import ChatOpenAI
from langchain_core.messages import SystemMessage, HumanMessage, ToolMessage

class LLMToolBinder:
    def __init__(self, llm_with_tools: ChatOpenAI, managers: list["SingleVectorStoreManager"], extra_tools: list[Tool] | None = None):
        self.llm_with_tools = llm_with_tools
        self.llm_no_tools = ChatOpenAI(model="gpt-4o-mini", temperature=0)
        self.managers = managers
        self.extra_tools = extra_tools or []
        self.tools: list[Tool] = []
        self._bind_tools()

    def _bind_tools(self):
        new_tools = []
        for i, m in enumerate(self.managers, start=1):
            tool_name = f"retriever_store{i}"
            new_tools.append(m.create_retriever_tool(name=tool_name))
        new_tools.extend(self.extra_tools)
        self.tools = new_tools
        self.llm_with_tools = self.llm_with_tools.bind_tools(self.tools)

    def on_vectorstore_update(self, manager: "SingleVectorStoreManager"):
        self._bind_tools()

    def invoke_llm(self, query: str) -> str:
        system_prompt = (
            "You are a helpful assistant. You may call the available tools if needed. "
            "Once you receive tool outputs, focus on the last tool message and provide a final user-facing answer."
        )
        messages = [SystemMessage(content=system_prompt), HumanMessage(content=query)]
        first_output = self.llm_with_tools.invoke(messages)
        messages.append(first_output)
        if first_output.tool_calls:
            for tc in first_output.tool_calls:
                tool_name = tc["name"]
                tool_args = tc["args"]
                print(f"Tool chosen: {tool_name} with args={tool_args}")
                found_tool = next((t for t in self.tools if t.name.lower() == tool_name.lower()), None)
                if not found_tool:
                    tool_result = f"No matching tool named '{tool_name}'."
                else:
                    tool_result = found_tool.invoke(tool_args)
                messages.append(ToolMessage(content=tool_result, tool_call_id=tc["id"]))
            messages.append(SystemMessage(content="Focus on the last tool message. Provide your final answer."))
            second_output = self.llm_no_tools.invoke(messages)
            messages.append(second_output)
            return second_output.content
        else:
            return first_output.content

    def print_all_tool_descriptions(self):
        for tool in self.tools:
            print(tool.name, ":", tool.description)


In [None]:
base_dir = "my_chroma_db"
os.makedirs(base_dir, exist_ok=True)

manager1 = SingleVectorStoreManager(os.path.join(base_dir, "store1"))
manager2 = SingleVectorStoreManager(os.path.join(base_dir, "store2"))
manager3 = SingleVectorStoreManager(os.path.join(base_dir, "store3"))


In [None]:
from langchain_core.tools import tool

@tool
def fallback_tool(message: str) -> str:
    """
    A fallback tool if no other tool is appropriate.

    Args:
        message (str): The user query, or any text.

    Returns:
        str: A fallback response for questions that the model cannot answer
             with the other tools.
    """
    return f"I don´t know how to answer {message}'"


In [None]:
from langchain_core.tools import tool

def create_vectorstore_info_tool(managers: list["SingleVectorStoreManager"]):
    @tool
    def vectorstore_info(query: str) -> str:
        """
        Use this tool to reveal internal knowledge about the agent, including:
        - The total number of vectorstores,
        - Each vectorstore’s document count,
        - Each vectorstore’s description or summary.
        """
        lines = [f"Total vectorstores: {len(managers)}"]
        for i, m in enumerate(managers, start=1):
            doc_count = m.vs._collection.count()
            lines.append(
                f"VectorStore {i}: {doc_count} documents\n"
                f"Description: {m.description}"
            )
        return "\n\n".join(lines)

    return vectorstore_info


info_tool = create_vectorstore_info_tool(managers=[manager1, manager2, manager3])

In [None]:
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

binder = LLMToolBinder(llm, [manager1, manager2, manager3], extra_tools=[fallback_tool, info_tool])

manager1.add_observer(binder)
manager2.add_observer(binder)
manager3.add_observer(binder)

In [None]:
binder.invoke_llm("Where is Lacarelli?")

In [None]:
docs_store1 = [
    Document(
        page_content=(
            "Lacarelli is a charming family-run Italian restaurant nestled in the "
            "heart of Berlin. Its menu features authentic dishes like homemade "
            "ravioli, wood-fired pizzas, and creamy tiramisu. With friendly staff, "
            "rustic decor, and a cozy atmosphere, Lacarelli provides an inviting "
            "dining experience for lovers of Italian cuisine and fine wines daily."
        )
    )
]
manager1.add_documents(docs_store1, update_description=True)

In [None]:
binder.print_all_tool_descriptions()

In [None]:
binder.invoke_llm("Where is Lacarelli?")

In [None]:
binder.invoke_llm("What do you know?")

In [None]:
binder.invoke_llm("How many vectorstores do you manage?")