In [99]:
from typing import Any, Type
from langchain.chat_models import ChatOpenAI
from langchain.tools import BaseTool
from pydantic import BaseModel, Field
from langchain.agents import initialize_agent, AgentType
from langchain_community.utilities import WikipediaAPIWrapper
from langchain_community.tools import WikipediaQueryRun
from langchain_community.tools import DuckDuckGoSearchResults
from langchain_community.document_loaders import WebBaseLoader
from langchain.schema import SystemMessage

llm = ChatOpenAI(
    model_name="gpt-4o-mini",
    temperature=0.1,
)

In [None]:
class WikipediaSearchToolArgsSchema(BaseModel):

    query: str = Field(..., description="The search query.")


class DuckDuckGoSearchToolArgsSchema(BaseModel):

    query: str = Field(..., description="The search query.")


class DuckDuckGoScraperToolArgsSchema(BaseModel):

    url: str = Field(..., description="The URL to scrape.")


class SaveToFileToolArgsSchema(BaseModel):

    content: str = Field(..., description="The content to save.")


class WikipediaSearchTool(BaseTool):

    name = "WikipediaSearch"
    description = """
    Use this tool to perform searches on Wikipedia.
    It takes a query as an argument.
    Example query: "Artificial Intelligence"
    """
    args_schema: Type[WikipediaSearchToolArgsSchema] = WikipediaSearchToolArgsSchema

    def _run(self, query) -> Any:
        wikipedia_api = WikipediaAPIWrapper()
        wikipedia = WikipediaQueryRun(api_wrapper=wikipedia_api)
        return wikipedia.run(query)


class DuckDuckGoSearchTool(BaseTool):

    name = "DuckDuckGoSearch"
    description = """
    Use this tool to perform web searches using the DuckDuckGo search engine.
    It takes a query as an argument.
    Example query: "Latest technology news"
    """
    args_schema: Type[DuckDuckGoSearchToolArgsSchema] = DuckDuckGoSearchToolArgsSchema

    def _run(self, query) -> Any:
        search = DuckDuckGoSearchResults()
        return search.run(query)


class DuckDuckGoScraperTool(BaseTool):

    name = "DuckDuckGoScraper"
    description = """
    If you found the website link in DuckDuckGo,
    Use this to get the content of the link for my research.
    It takes a URL as an argument.
    """
    args_schema: Type[DuckDuckGoScraperToolArgsSchema] = DuckDuckGoScraperToolArgsSchema

    def _run(self, url):
        loader = WebBaseLoader(url)
        docs = loader.load()
        return docs[0].page_content.replace("\n", "")


class SaveToFileTool(BaseTool):

    name = "Save_to_File"
    description = """
    Use this tool to save the content as a .txt file.
    """
    args_schema: Type[SaveToFileToolArgsSchema] = SaveToFileToolArgsSchema

    def _run(self, content) -> Any:
        print(content)
        with open("research_results.txt", "w") as file:
            file.write(content)
        return "Research results saved to research_results.txt"

In [103]:
system_message = SystemMessage(
    content="""
        You are a research expert.

        Your task is to use Wikipedia or DuckDuckGo to gather comprehensive and accurate information about the query provided. 

        When you find a relevant website through DuckDuckGo, you must scrape the content from that website. Use this scraped content to thoroughly research and formulate a detailed answer to the question. 

        Combine information from Wikipedia, DuckDuckGo searches, and any relevant websites you find. Ensure that the final answer is well-organized and detailed, and include citations with links (URLs) for all sources used.

        Your research should be saved to a .txt file, and the content should match the detailed findings provided. Make sure to include all sources and relevant information.

        The information from Wikipedia must be included.

        Ensure that the final .txt file contains detailed information, all relevant sources, and citations.
        """
)

agent = initialize_agent(
    llm=llm,
    verbose=True,
    agent=AgentType.OPENAI_FUNCTIONS,
    handle_parsing_errors=True,
    tools=[
        WikipediaSearchTool(),
        DuckDuckGoSearchTool(),
        DuckDuckGoScraperTool(),
        SaveToFileTool(),
    ],
    agent_kwargs={"system_message": system_message},
)

In [104]:
query = "Research about the XZ backdoor"
results = agent.run(query)
print(results)

  warn_deprecated(




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `WikipediaSearch` with `{'query': 'XZ backdoor'}`


[0m[36;1m[1;3mPage: XZ Utils backdoor
Summary: In February 2024, a malicious backdoor was introduced to the Linux build of the xz utility within the liblzma library in versions 5.6.0 and 5.6.1 by an account using the name "Jia Tan". The backdoor gives an attacker who possesses a specific Ed448 private key remote code execution through OpenSSH on the affected Linux system. The issue has been given the Common Vulnerabilities and Exposures number CVE-2024-3094 and has been assigned a CVSS score of 10.0, the highest possible score.
While xz is commonly present in most Linux distributions, at the time of discovery the backdoored version had not yet been widely deployed to production systems, but was present in development versions of major distributions. The backdoor was discovered by the software developer Andres Freund, who announced his findings on 29 March 2024.

TypeError: DDGS.text() got an unexpected keyword argument 'max_results'