In [35]:
from typing import Type
import os
import requests
from pydantic import BaseModel, Field
from langchain.chat_models import ChatOpenAI
from langchain.tools import BaseTool
from langchain.agents import initialize_agent, AgentType
from langchain_community.tools import WikipediaQueryRun, DuckDuckGoSearchRun
from langchain_community.utilities import WikipediaAPIWrapper ,DuckDuckGoSearchAPIWrapper
from langchain_community.document_loaders import WebBaseLoader
from langchain.schema import Document
from bs4 import BeautifulSoup

llm = ChatOpenAI(
    temperature=0.1,
)

def save_the_file(docs,name):
    f = open(f"./search_file/{name}.txt","w",encoding="utf-8")
    f.write(docs)

class WikipediaSearchToolArgsSchema(BaseModel):
    query: str = Field(
        description="A query to find a URL in Wikipedia. Example: Research about Apple."
    )

class WikipediaSearchTool(BaseTool):
    name = "WikipediaSearchTool"
    description = """
    Take the query as an argument and collect the link information of the search results.
    Returns the most useful or first URL found.
    """
    args_schema: Type[WikipediaSearchToolArgsSchema] = WikipediaSearchToolArgsSchema

    def _run(self, query):
        wiki = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())
        wiki_result = wiki.run(query)
        save_the_file(wiki_result,"wiki")
        return wiki_result

class DuckDuckGoSearchToolArgsSchema(BaseModel):
    query: str = Field(
        description="A query to find a URL in DuckDuckGo. Example: Research about Apple."
    )

class DuckDuckGoSearchTool(BaseTool):
    name = "DuckDuckGoSearchTool"
    description = """
    Take the query as an argument and collect the link information of the search results.
    Returns the most useful or first URL found.
    """
    args_schema: Type[DuckDuckGoSearchToolArgsSchema] = DuckDuckGoSearchToolArgsSchema

    def _run(self, query):
        ddg = DuckDuckGoSearchRun(api_wrapper=DuckDuckGoSearchAPIWrapper())
        ddg_result = ddg.run(query)
        save_the_file(ddg_result, "ddg")
        return ddg_result

class WebScrapeToolArgsSchema(BaseModel):
    url: str = Field(
        description="Site url found on Wikipedia and DuckDuckGo. Example: https://www.example.com"
    )

class WebScrapeTool(BaseTool):
    name = "WebScrapeTool"
    description = """
    Load text from the URL found in Wikipedia and DuckDuckGo and turn it into a document.
    Provides the relevant URL at the end of the document.
    Example: https://www.example.com/article
    """
    args_schema: Type[WebScrapeToolArgsSchema] = WebScrapeToolArgsSchema

    def _run(self, url):
        docs = WebBaseLoader(url)
        return docs.load()
    
class SaveFileToolArgsSchema(BaseModel):
    doc: str

class SaveFileTool(BaseTool):
    name = "SaveFileTool"
    description = "Saves the content into a file."
    args_schema: Type[SaveFileToolArgsSchema] = SaveFileToolArgsSchema

    def _run(self, doc):
        file_name = "result.txt"
        with open(f'./files/{file_name}', 'w', encoding='utf-8') as f:
            f.write(doc)
        return f"Content saved to {file_name}"


tools = [
    WikipediaSearchTool(),
    DuckDuckGoSearchTool(),
    WebScrapeTool(),
    SaveFileTool(),
]

agent = initialize_agent(
    llm=llm,
    verbose=True,
    agent_type=AgentType.OPENAI_FUNCTIONS,
    handle_parsing_errors=True,
    tools=tools,
)

query = "Research about the XZ backdoor"
agent.invoke(query)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI should start by searching for information about the XZ backdoor to gather relevant details.
Action: WikipediaSearchTool
Action Input: XZ backdoor[0m
Observation: [36;1m[1;3mPage: XZ Utils backdoor
Summary: In February 2024, a malicious backdoor was introduced to the Linux utility xz within the liblzma library in versions 5.6.0 and 5.6.1 by an account using the name "Jia Tan". The backdoor gives an attacker who possesses a specific Ed448 private key remote code execution capabilities on the affected Linux system. The issue has been given the Common Vulnerabilities and Exposures number CVE-2024-3094 and has been assigned a CVSS score of 10.0, the highest possible score.
While xz is commonly present in most Linux distributions, at the time of discovery the backdoored version had not yet been widely deployed to production systems, but was present in development versions of major distributions. The backdoor was discovered by 

{'input': 'Research about the XZ backdoor',
 'output': 'The XZ backdoor is a malicious backdoor introduced to the Linux utility xz within the liblzma library in versions 5.6.0 and 5.6.1, allowing an attacker remote code execution capabilities on the affected Linux system.'}