In [3]:
import nest_asyncio
nest_asyncio.apply()

import time
from typing import Type
from langchain.chat_models import ChatOpenAI
from langchain.tools import BaseTool
from pydantic import BaseModel, Field
from langchain.agents import initialize_agent, AgentType
from langchain.utilities import DuckDuckGoSearchAPIWrapper
from langchain.tools import DuckDuckGoSearchResults
from langchain.retrievers import WikipediaRetriever
from langchain.document_loaders import WebBaseLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.docstore.document import Document

llm = ChatOpenAI(temperature=0.1)

class SearchToolArgsSchema(BaseModel):
    query: str = Field(description="The query you will search for.")

class WikipediaSearchTool(BaseTool):
    name = "WikipediaSearchTool"
    description = """
    Use this tool to research.
    It takes a query as an argument.
    """
    args_schema: Type[SearchToolArgsSchema] = SearchToolArgsSchema

    def _run(self, query):
        retriever = WikipediaRetriever(top_k_results=1)
        return retriever.get_relevant_documents(query)

class DuckDuckGoSearchTool(BaseTool):
    name = "DuckDuckGoSearchTool"
    description = """
    Use this tool to find the ONLY ONE link to the most relevant result for my research.
    It takes a query as an argument.
    """
    args_schema: Type[SearchToolArgsSchema] = SearchToolArgsSchema

    def _run(self, query):
        wrapper = DuckDuckGoSearchAPIWrapper(max_results=1)
        search = DuckDuckGoSearchResults(api_wrapper=wrapper)
        return search.run(query)

class LinkScrapeToolArgsSchema(BaseModel):
    link: str = Field(description="The ONE link you will extract its content. Example: 'https://www.wired.com/story/xz-backdoor-everything-you-need-to-know/'")
    
class LinkScrapeTool(BaseTool):
    name = "LinkScrapeTool"
    description = """
    If you found the website link in DuckDuckGo,
    Use this to get the content of the link for my research.
    You should enter ONLY ONE link.
    """
    args_schema: Type[LinkScrapeToolArgsSchema] = LinkScrapeToolArgsSchema

    def _run(self, link):
        loader = WebBaseLoader(link)
        data = loader.load()
        text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
        texts = text_splitter.split_documents(data)
        return texts[0].page_content
    
class SaveTXTfileToolArgsSchema(BaseModel):
    doc: str = Field(description="The content extracted from the link that you will save to a .txt file.")

class SaveTXTfileTool(BaseTool):
    name = "SaveTXTfileTool"
    description = """
        If you found the website link in DuckDuckGo and extracted the content,
        Use this to save the content to a .txt file for my research.
        You shoud enter a document that you extracted from the website.
    """
    args_schema: Type[SaveTXTfileToolArgsSchema] = SaveTXTfileToolArgsSchema
    def _run(self, doc):
        file_path = f"../files/{time.strftime('%H%M%S')}.txt"
        with open(file_path, 'w') as f:
            f.write(doc)

agent = initialize_agent(
    llm=llm,
    verbose=True,
    agent=AgentType.OPENAI_FUNCTIONS,
    handle_parsing_errors=True,
    tools=[
        WikipediaSearchTool(),
        DuckDuckGoSearchTool(),
        LinkScrapeTool(),
        SaveTXTfileTool(),
    ],
)

prompt = "Research about the XZ backdoor."

agent.invoke(prompt)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `WikipediaSearchTool` with `{'query': 'XZ backdoor'}`


[0m[36;1m[1;3m[Document(page_content='On 29 March 2024, software developer Andres Freund reported that he had found a maliciously introduced backdoor in the Linux utility xz within the liblzma library in versions 5.6.0 and 5.6.1 released in February 2024.While xz is commonly present in most Linux distributions, the backdoor only targeted Debian- and RPM-based systems running on the x86-64 architecture. At the time of discovery the backdoored version had not yet been widely deployed to production systems, but was present in development versions of major distributions.The backdoor gives an attacker who possesses a specific Ed448 private key remote code execution capabilities on the affected Linux system. The issue has been assigned a CVSS score of 10.0, the highest possible score.\n\n\n== Background ==\nMicrosoft employee and PostgreSQL developer Andres Freun

{'input': 'Research about the XZ backdoor.',
 'output': "The XZ backdoor refers to a maliciously introduced backdoor in the Linux utility xz within the liblzma library in versions 5.6.0 and 5.6.1 released in February 2024. This backdoor targeted Debian- and RPM-based systems running on the x86-64 architecture. The backdoor gives an attacker who possesses a specific Ed448 private key remote code execution capabilities on the affected Linux system. The issue has been assigned a CVSS score of 10.0, the highest possible score.\n\nThe backdoor was reported by software developer Andres Freund after investigating a performance regression in Debian Sid. The attacker made efforts to obfuscate the code, and once the compromised version is incorporated into the operating system, it alters the behavior of OpenSSH's SSH server daemon, allowing unauthorized access to the entire system remotely.\n\nThe campaign to insert the backdoor into the XZ Utils project was a culmination of approximately three 