In [3]:
import requests
from langchain.retrievers import WikipediaRetriever
from langchain.tools import BaseTool
from langchain.chat_models import ChatOpenAI
from langchain.agents import initialize_agent, AgentType
from langchain.utilities import DuckDuckGoSearchAPIWrapper
from pydantic import BaseModel, Field
from typing import Type
from bs4 import BeautifulSoup
from langchain.schema import SystemMessage

# Initialize LLM
llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0.1)

# Define Argument Schema for Tools
class SearchToolArgsSchema(BaseModel):
    query: str = Field(description="The query you will search for")

class URLArgsSchema(BaseModel):
    url: str = Field(description="The URL of the webpage to scrape")

class SaveFileArgsSchema(BaseModel):
    content: str = Field(description="The research content to save into a .txt file")

# 📌 Wikipedia Search Tool
class WikiSearchTool(BaseTool):
    name: str = "WikiSearchTool"
    description: str = "Search Wikipedia for relevant information and extract the text from the page."
    
    args_schema: Type[SearchToolArgsSchema] = SearchToolArgsSchema

    def _run(self, query: str):
        retriever = WikipediaRetriever()
        result = retriever.get_relevant_documents(query)
        return "\n\n".join([doc.page_content for doc in result]) if result else "No relevant Wikipedia pages found."

# 📌 DuckDuckGo Search Tool
class DuckDuckGoSearchTool(BaseTool):
    name: str = "DuckDuckGoSearchTool"
    description: str = "Use DuckDuckGo to search the web for relevant links."
    
    args_schema: Type[SearchToolArgsSchema] = SearchToolArgsSchema

    def _run(self, query: str):
        ddg = DuckDuckGoSearchAPIWrapper()
        result = ddg.run(query)
        return result

# 📌 Web Scraping Tool
class WebScraperTool(BaseTool):
    name: str = "WebScraperTool"
    description: str = "Extract the text content of a given webpage."

    args_schema: Type[URLArgsSchema] = URLArgsSchema

    def _run(self, url: str):
        try:
            response = requests.get(url)
            soup = BeautifulSoup(response.text, "html.parser")
            text = soup.get_text(separator="\n").strip()
            return text[:2000]  # Limit output to 2000 characters
        except Exception as e:
            return f"Error scraping website: {str(e)}"

# 📌 Save Research to File Tool
class SaveResearchTool(BaseTool):
    name: str = "SaveResearchTool"
    description: str = "Save the research content into a .txt file."

    args_schema: Type[SaveFileArgsSchema] = SaveFileArgsSchema

    def _run(self, content: str):
        try:
            with open("research.txt", "w", encoding="utf-8") as f:
                f.write(content)
            return "Research saved to research.txt"
        except Exception as e:
            return f"Error saving file: {str(e)}"

# 📌 Initialize Agent
agent = initialize_agent(
    llm=llm,
    verbose=True,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    handle_parsing_errors=True,
    tools=[
        DuckDuckGoSearchTool(),
        WikiSearchTool(),
        WebScraperTool(),
        SaveResearchTool()
    ],
    agent_kwargs={
        "system_message": SystemMessage(
            content="""
            You are an AI research assistant that gathers information using multiple sources.

            1. Choose the search tool DuckDuckGoSearchTool or WikiSearchTool ramdomly to find the latest and most relevant information from the web.
            2. If you just used Wikipedia, then next time use DuckDuckGo, and vice versa.  
                (For example, if the previous search was DuckDuckGo, the next search must be Wikipedia.)
            4. If a website is found in DuckDuckGo, extract and analyze the content.
            5. Ensure that no search tool is used twice in a row unless absolutely necessary.
            6. Summarize the research findings after both sources have been queried.
            7. If enough useful information has been collected, finalize by saving the research into a `.txt` file.

            Follow these steps to ensure a diverse and comprehensive search process.
            """
        )
    }
)

# 📌 Run the agent with the required query
query = "Research about the XZ backdoor"
result = agent.invoke(query)

# Print the result
print(result)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI need to gather information about the XZ backdoor, which is likely a type of malware or security vulnerability. I will start by searching for relevant information on Wikipedia to get a comprehensive overview. 

Action: WikiSearchTool  
Action Input: "XZ backdoor"  [0m
Observation: [33;1m[1;3mIn February 2024, a malicious backdoor was introduced to the Linux build of the xz utility within the liblzma library in versions 5.6.0 and 5.6.1 by an account using the name "Jia Tan". The backdoor gives an attacker who possesses a specific Ed448 private key remote code execution through OpenSSH on the affected Linux system. The issue has been given the Common Vulnerabilities and Exposures number CVE-2024-3094 and has been assigned a CVSS score of 10.0, the highest possible score.
While xz is commonly present in most Linux distributions, at the time of discovery the backdoored version had not yet been widely deployed to production sy