In [10]:
from typing import Any, Type
from pydantic import BaseModel, Field
from langchain.chat_models import ChatOpenAI
from langchain.tools import BaseTool, DuckDuckGoSearchRun
from langchain.agents import initialize_agent, AgentType
from langchain.utilities import DuckDuckGoSearchAPIWrapper, WikipediaAPIWrapper
from langchain.document_loaders import WebBaseLoader
from langchain.schema import SystemMessage
from bs4 import BeautifulSoup

#----------- Tools ------------#

class DuckDuckGoSearchToolArgsSchema(BaseModel):
  query: str = Field(description="The query you will search for")

class DuckDuckGoSearchTool(BaseTool):
  name = "DuckDuckGoSearchTool"
  description = """
  Use this tool when you need to search using the DuckDuckGo search engine.
  """
  args_schema: Type[DuckDuckGoSearchToolArgsSchema] = DuckDuckGoSearchToolArgsSchema
  
  def _run(self, query):
    ddg = DuckDuckGoSearchRun()
    return ddg.run(query)
   
class WikipediaSearchToolArgsSchema(BaseModel):
  query: str = Field(description="The query you will search for on Wikipedia.")

class WikipediaSearchTool(BaseTool):
  name = "WikipediaSearchTool"
  description = """
  Use this tool if you need to find content on Wikipedia related to a topic.
  It takes a query as an argument.
  Example query: RAG
  """
  args_schema: Type[WikipediaSearchToolArgsSchema] = WikipediaSearchToolArgsSchema
  
  def _run(self, query):
    wiki = WikipediaAPIWrapper() #WikipediaQueryRun써야 할까?
    return wiki.run(query)

class SiteScrapingToolArgsSchema(BaseModel):
  url: str = Field(description="The URL of the website you want to scrape")

class SiteScrapingTool(BaseTool):
  name = "SiteScrapingTool"
  description = """
    If you found the website link in DuckDuckGo or Wikipedia
    Use this to scrape the content of the link for my research.
  """
  args_schema: Type[SiteScrapingToolArgsSchema] = SiteScrapingToolArgsSchema

  def _run(self, url):
    soup = WebBaseLoader(url)
    docs = soup.load()
    #page_contents = [doc.page_content for doc in docs]
    page_content = docs[0].page_content
    print("loaded==",page_content)
    return page_content

class SaveToTXTToolArgsSchema(BaseModel):
  url: str = Field(description="The URL of the website you want to scrape")
  scraped_data: str = Field(description="String type data you want to save in a file")

class SaveToTXTTool(BaseTool): 
  name = "SaveToTXTTool"
  description = """
    A tool that saves the results as a txt file when the agent completes all researchs.
  """
  args_schema: Type[SaveToTXTToolArgsSchema] = SaveToTXTToolArgsSchema

  def _run(self, url, scraped_data):  
    with open('search_result.txt','w') as file:
      num = file.write(scraped_data)
      if num!=0:
        print("scrapted File created.")
      else:
        print("No scrapted File.") 

#----------- agent ------------#

llm = ChatOpenAI(
  temperature=0.1,
  model="gpt-4o-mini",
)

agent = initialize_agent(
    llm=llm,
    verbose=True,
    agent=AgentType.OPENAI_FUNCTIONS,
    handle_parsing_errors=True,
    tools=[
      DuckDuckGoSearchTool(),
      WikipediaSearchTool(),
      SiteScrapingTool(),
      SaveToTXTTool(),
    ],
    agent_kwargs={
        "system_message": SystemMessage(
            content="""
           You are a meticulous researcher. When doing research, you have a habit of first doing a web search using DuckDuckGo and consulting Wikipedia and then. Among the information found in multiple places, overlapping information is shown only once and information that is not known is not made up. If there are links related to the search term found on Wikipedia and DuckDuckGo, scrape them. For scraped sites, the relevant link is attached at the end of the material.Once you have finished researching a given topic, organize everything you researched into a txt file.
        """
        )
    },
)
agent.invoke("Research about the XZ backdoor.")




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `DuckDuckGoSearchTool` with `{'query': 'XZ backdoor'}`


[0m[36;1m[1;3mThe XZ Backdoor: Everything You Need to Know Details are starting to emerge about a stunning supply chain attack that sent the open source software community reeling. The Mystery of 'Jia Tan,' the XZ Backdoor Mastermind. The thwarted XZ Utils supply chain attack was years in the making. Now, clues suggest nation-state hackers were behind the persona that ... A backdoor was planted in the XZ utils package, a popular open-source compression utility for Linux/UNIX systems, by a threat actor named JiaT75. The backdoor exploited the SSH protocol to gain remote access and control over affected devices. A malicious code inserted into a Linux compression tool by a developer named JiaT75 could have compromised millions of systems via SSH. A Microsoft worker discovered the backdoor and alerted the open source community, preventing a major disaster. The

{'input': 'Research about the XZ backdoor.',
 'output': 'I have completed the research on the XZ backdoor. Here is a summary of the findings:\n\n# XZ Backdoor Research\n\n## Overview\nThe XZ backdoor refers to a malicious backdoor introduced into the XZ Utils, a popular open-source compression utility for Linux/UNIX systems. The backdoor was discovered in February 2024 and is associated with the Common Vulnerabilities and Exposures (CVE) number CVE-2024-3094, receiving a CVSS score of 10.0, the highest possible score.\n\n## Background\n- **Discovery**: The backdoor was discovered by Andres Freund, a Microsoft employee and PostgreSQL developer, on March 29, 2024, while investigating performance issues in Debian Sid.\n- **Perpetrator**: The backdoor was introduced by an account using the name "Jia Tan". It is suspected that this name, along with others associated with the campaign, are pseudonyms.\n- **Timeline**: The campaign to insert the backdoor spanned approximately three years, fro