In [4]:
from typing import Any, Type
from langchain.chat_models import ChatOpenAI
from langchain.tools import BaseTool
from pydantic import BaseModel, Field
from langchain.agents import initialize_agent, AgentType 
from langchain.tools import WikipediaQueryRun
from langchain.tools import DuckDuckGoSearchResults
from langchain.utilities import WikipediaAPIWrapper
from langchain.document_loaders import WebBaseLoader
from IPython.core.interactiveshell import InteractiveShell
import datetime
import os

InteractiveShell.ast_node_interactivity = "all"

# Initialize LLM
llm = ChatOpenAI(temperature=0.1, model_name="gpt-4-turbo")

class WikipediaSearchToolArgsSchema(BaseModel):
    query: str = Field(
        description="The query to research. Example: Research about the XZ backdoor"
    )

class WikipediaSearchTool(BaseTool):
    name = "WikipediaSearchTool"
    description = "Use this to search Wikipedia content for given terms."
    args_schema: Type[WikipediaSearchToolArgsSchema] = WikipediaSearchToolArgsSchema

    def _run(self, query):
        wikipedia = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())
        return wikipedia.run(query)

class DuckDuckGoSearchTool(BaseTool):
    name = "DuckDuckGoSearchTool"
    description = "Use this to return URLs of searched web pages from given terms."
    args_schema: Type[WikipediaSearchToolArgsSchema] = WikipediaSearchToolArgsSchema

    def _run(self, query):
        ddg_search = DuckDuckGoSearchResults()
        return ddg_search.run(query)

class WebContentExtractorToolArgsSchema(BaseModel):
    urls: list = Field(
        description="The urls to extract. urls are list of url."
    )

class WebContentExtractorTool(BaseTool):
    name = "WebContentExtractorTool"
    description = "Extracts web content using WebBaseLoader."

    args_schema: Type[WebContentExtractorToolArgsSchema] = WebContentExtractorToolArgsSchema
    
    def _run(self, urls):
        web_loader = WebBaseLoader(urls)
        loaded_contents = web_loader.load()
        return loaded_contents

class SaveToFileTool(BaseTool):
    name = "SaveToFileTool"
    description = "Saves research results to a text file."

    def _run(self, content):
        if not os.path.exists('research'):
            os.makedirs('research')
        
        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
        filename = f"research/research_{timestamp}.txt"
        
        # Save content to file
        with open(filename, 'w', encoding='utf-8') as f:
            f.write(content)
        
        return f"Research saved to {filename}"

agent = initialize_agent(
    llm=llm,
    verbose=True,
    agent=AgentType.OPENAI_FUNCTIONS,
    handle_parsing_errors=True,
    tools=[
        WikipediaSearchTool(),
        DuckDuckGoSearchTool(),
        WebContentExtractorTool(),
        SaveToFileTool(),
    ],
)

prompt = """Research about the XZ backdoor. Follow these steps:
1. Search Wikipedia for information
2. Search DuckDuckGo for additional sources
3. Extract content from relevant websites
4. Summarize all findings with reference URLs
5. Save the complete research to a text file"""

results = agent.invoke(prompt)
print("\nResearch completed successfully!")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `WikipediaSearchTool` with `{'query': 'XZ backdoor'}`


[0m[36;1m[1;3mPage: XZ Utils backdoor
Summary: In February 2024, a malicious backdoor was introduced to the Linux build of the xz utility within the liblzma library in versions 5.6.0 and 5.6.1 by an account using the name "Jia Tan". The backdoor gives an attacker who possesses a specific Ed448 private key remote code execution through OpenSSH on the affected Linux system. The issue has been given the Common Vulnerabilities and Exposures number CVE-2024-3094 and has been assigned a CVSS score of 10.0, the highest possible score.
While xz is commonly present in most Linux distributions, at the time of discovery the backdoored version had not yet been widely deployed to production systems, but was present in development versions of major distributions. The backdoor was discovered by the software developer Andres Freund, who announced his findings on 29 March 2