In [2]:
from langchain_core.documents import Document  
from langchain_text_splitters import RecursiveCharacterTextSplitter   
from langchain_community.retrievers import BM25Retriever
from smolagents import Tool, CodeAgent, InferenceClientModel

In [15]:
class engineer_advisor_tool(Tool):
    name = "An_engineering_advisor_tool"
    description = "Uses semantic search to retrieve relevant engineering solutions for technical problems."
    inputs = {
    "query" : {
    "type" : "string",
    "description" : "A query related to engineering advice. Must be technical"
    }
    }
    output_type = "string"

    def __init__(self, docs, **kwargs):
        super().__init__(**kwargs)
        self.retriever = BM25Retriever.from_documents(
            docs,
            k = 5
        )

    def forward(self, query : str) -> str:
        assert isinstance(query, str), "Your search query must be a string"
        
        docs = self.retriever.invoke(
            query,
        )
        return "\nEngineering solutions:\n" + "".join(
            [
                f"\n\n===== Solution {str(i)} =====\n" + 
                f"Problem: {doc.page_content}\n" +
                f"Solutions: {', '.join(doc.metadata['solutions'])}\n" +
                f"Tools: {', '.join(doc.metadata['tools'])}\n" +
                f"Emergency: {doc.metadata['emergency']}"
                for i, doc in enumerate(docs)
            ]
        )


In [17]:
data = [
    {
        "problem": "database slow",
        "solutions": ["add indexes", "use connection pooling", "add more RAM"],
        "tools": ["EXPLAIN command", "pg_stat_statements"],
        "emergency": "kill slow queries"
    },
    {
        "problem": "api crashing", 
        "solutions": ["add error handling", "check memory usage", "add logging"],
        "tools": ["debugger", "logs", "monitoring"],
        "emergency": "restart service"
    },
    {
        "problem": "website down",
        "solutions": ["check server status", "look at error logs", "restart web server"],
        "tools": ["ping", "curl", "systemctl"],
        "emergency": "restart everything"
    },
    {
        "problem": "memory full",
        "solutions": ["clear cache", "restart service", "add swap space"],
        "tools": ["htop", "free -m", "docker system prune"],
        "emergency": "reboot server"
    }
]

source_docs = [
    Document(page_content=doc["problem"], metadata={"solutions": doc["solutions"], "tools": doc["tools"], "emergency": doc["emergency"]})
    for doc in data
]

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 500,
    chunk_overlap = 50,
    add_start_index = True,
    strip_whitespace = True,
    separators=["\n\n", "\n", ".", " ", ""]
)

processed_docs = text_splitter.split_documents(source_docs)

engineer_advisor = engineer_advisor_tool(processed_docs)

agent = CodeAgent(
    model = InferenceClientModel(model_id = "meta-llama/Llama-4-Scout-17B-16E-Instruct", token = "hf_token"),
    tools = [engineer_advisor]
)

agent.run("my database has been slow ever since my mysql had become corrupted. what can i do to fix it?")
    



'To repair a corrupted MySQL database, try dumping the database, stopping the MySQL service, running `mysqlcheck`, and restarting the MySQL service. You can use the `mysqlcheck` command directly or automate the process using a Python script.'