In [10]:
from langchain_core.tools import tool
from pyprojroot import here
from langchain_chroma import Chroma
from langchain.embeddings import GPT4AllEmbeddings
from accord.utils.common import get_config
from accord.utils.file_loader import extract_text_content
from glob import glob
import os

class RAGTool:
    retriever = {}
    tools = []

    def __init__(self) -> None:
        self.retriever = {}
        self.config = get_config(f"{here()}/configs/tools_config.yaml")
        self.embedding_model = GPT4AllEmbeddings()
        self.create_retriever()
        self.create_tools()

    def create_tools(self):
        RAGTool.tools = []
        for key in RAGTool.retriever.keys():
            info_file_path = os.path.join(
                here(),
                self.config.data_embedding.root_dir,
                key,
                self.config.data_embedding.info_doc_name
            )
            if not os.path.exists(info_file_path):
                continue
            tool_doc = extract_text_content(info_file_path)
            if not tool_doc:
                continue
            
            tool_doc = tool_doc[0].page_content.replace('"', '\\"')  # Escape double quotes

            tool_name = key.replace(" ", "_").replace("-", "_")

            tool_config = f"""
from langchain_core.tools import tool

@tool
def {tool_name}(query: str) -> str:
    \"\"\"{tool_doc}\"\"\"
    relevant_docs = RAGTool.retriever["{key}"].invoke(query)
    return "\\n\\n".join([doc.page_content for doc in relevant_docs])
"""
            local_namespace = {}
            exec(tool_config, globals(), local_namespace)
            RAGTool.tools.append(local_namespace[tool_name])


    def create_retriever(self):
        for db_path in glob(f"{here()}/{self.config.data_embedding.vectordb_dir}/*"):
            tool_name = os.path.basename(db_path)
            vectordb = Chroma(
                collection_name=self.config.data_embedding.collection_name,
                persist_directory=db_path,
                embedding_function=self.embedding_model
            )
            retriever = vectordb.as_retriever(
                search_type="similarity_score_threshold",
                search_kwargs={"k": self.config.data_embedding.k, "score_threshold": 0.4},
            )
            RAGTool.retriever[tool_name] = retriever

In [13]:
# Initialize and generate tools
RAGTool()

<__main__.RAGTool at 0x75c411ffe840>

In [14]:
RAGTool.tools

[StructuredTool(name='test_doc', description='Search about Bangladesh and its details.\nAlso have some information git repo RAG-DeepSeek-r1', args_schema=<class 'langchain_core.utils.pydantic.test_doc'>, func=<function test_doc at 0x75c40906a5c0>),
 StructuredTool(name='swiss_airline_policy', description='Consult the company policies to check whether certain options are permitted.', args_schema=<class 'langchain_core.utils.pydantic.swiss_airline_policy'>, func=<function swiss_airline_policy at 0x75c40906bba0>),
 StructuredTool(name='stories', description='Search among the fictional stories and find the answer to the query. Input should be the query.', args_schema=<class 'langchain_core.utils.pydantic.stories'>, func=<function stories at 0x75c40906ae80>)]

In [None]:
query = "population of bangladesh"
response = RAGTool.tools[0](query)  # Call the first dynamically created tool

print(response)