In [9]:
import os
import requests
from typing import Type
from bs4 import BeautifulSoup
from pydantic import BaseModel, Field
from langchain.chat_models import ChatOpenAI
from langchain.tools import BaseTool
from langchain.agents import initialize_agent, AgentType
from langchain.utilities import DuckDuckGoSearchAPIWrapper
from langchain.prompts.chat import SystemMessagePromptTemplate

llm = ChatOpenAI(temperature=0.1, model_name="gpt-3.5-turbo-1106")

class WikipediaSearchToolArgs(BaseModel):
    query: str = Field(description="The query to search on Wikipedia.")

class WikipediaSearchTool(BaseTool):
    name = "WikipediaSearchTool"
    description = "This tool searches Wikipedia for the given query and returns the summary of the first result. Use it to get a brief overview of the topic."
    args_schema: Type[WikipediaSearchToolArgs] = WikipediaSearchToolArgs

    def _run(self, query: str):
        search_url = f"https://en.wikipedia.org/w/api.php?action=query&list=search&srsearch={query}&format=json"
        response = requests.get(search_url)
        data = response.json()
        if "query" in data and "search" in data["query"] and data["query"]["search"]:
            title = data["query"]["search"][0]["title"]
            summary_url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{title}"
            summary_response = requests.get(summary_url)
            summary_data = summary_response.json()
            return summary_data.get("extract", "No summary found.")
        else:
            return "No results found on Wikipedia."

class DuckDuckGoSearchToolArgs(BaseModel):
    query: str = Field(description="The query to search on DuckDuckGo.")

class DuckDuckGoSearchTool(BaseTool):
    name = "DuckDuckGoSearchTool"
    description = "This tool uses DuckDuckGo to search for the given query and returns the search results as text."
    args_schema: Type[DuckDuckGoSearchToolArgs] = DuckDuckGoSearchToolArgs

    def _run(self, query: str):
        ddg = DuckDuckGoSearchAPIWrapper()
        return ddg.run(query)

class WebScrapingToolArgs(BaseModel):
    url: str = Field(description="The URL of the website to scrape text from.")

class WebScrapingTool(BaseTool):
    name = "WebScrapingTool"
    description = "This tool scrapes the HTML content from the given URL and extracts plain text."
    args_schema: Type[WebScrapingToolArgs] = WebScrapingToolArgs

    def _run(self, url: str):
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')
        text = "\n".join(line.strip() for line in soup.get_text().splitlines() if line.strip())
        return text

class SaveToFileToolArgs(BaseModel):
    file_name: str = Field(description="The name of the file to save the content, e.g., output.txt.")
    content: str = Field(description="The text content to save in the file.")

class SaveToFileTool(BaseTool):
    name = "SaveToFileTool"
    description = "This tool saves the provided text content to a specified .txt file."
    args_schema: Type[SaveToFileToolArgs] = SaveToFileToolArgs

    def _run(self, file_name: str, content: str):
        with open(file_name, 'w', encoding='utf-8') as f:
            f.write(content)
        return f"Research results have been saved to {file_name}."

system_message = (
    "You are a research AI agent. When performing any research, you MUST always save the final research results to a .txt file using the SaveToFileTool. Do not skip the file creation step under any circumstances."
)

system_prompt = SystemMessagePromptTemplate.from_template(system_message)

agent = initialize_agent(
    llm=llm,
    tools=[
        WikipediaSearchTool(),
        DuckDuckGoSearchTool(),
        WebScrapingTool(),
        SaveToFileTool(),
    ],
    agent=AgentType.OPENAI_FUNCTIONS,
    verbose=True,
    handle_parsing_errors=True,
    agent_kwargs={"system_message":system_prompt}
)

query = "Research about the XZ backdoor"
result = agent.invoke(query)
print(result)




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `WikipediaSearchTool` with `{'query': 'XZ backdoor'}`


[0m[36;1m[1;3mIn February 2024, a malicious backdoor was introduced to the Linux build of the xz utility within the liblzma library in versions 5.6.0 and 5.6.1 by an account using the name "Jia Tan". The backdoor gives an attacker who possesses a specific Ed448 private key remote code execution through OpenSSH on the affected Linux system. The issue has been given the Common Vulnerabilities and Exposures number CVE-2024-3094 and has been assigned a CVSS score of 10.0, the highest possible score.[0m[32;1m[1;3m
Invoking: `SaveToFileTool` with `{'file_name': 'XZ_Backdoor_Research.txt', 'content': 'In February 2024, a malicious backdoor was introduced to the Linux build of the xz utility within the liblzma library in versions 5.6.0 and 5.6.1 by an account using the name "Jia Tan". The backdoor gives an attacker who possesses a specific Ed448 private key remo