In [10]:
from langchain_core.tools import Tool
from langchain.agents import initialize_agent
from langchain.llms import OpenAI
from langchain_community.utilities.wikipedia import WikipediaAPIWrapper
import requests
from bs4 import BeautifulSoup


# Define a custom web scraper tool
def scrape_website(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html.parser")
    return soup.get_text()


web_scraper_tool = Tool(
    name="Web Scraper",
    func=scrape_website,
    description="Extract the main content of a website using its URL.",
)

# Define tools
wikipedia_tool = Tool(
    name="Wikipedia Search",
    func=WikipediaAPIWrapper().run,
    description="Search in Wikipedia to extract information about the topic.",
)


def duckduckgo_search_and_scrape(query):
    response = requests.get(
        f"https://html.duckduckgo.com/html/?q={query}",
        headers={"User-Agent": "Mozilla/5.0"},
    )
    response.raise_for_status()
    soup = BeautifulSoup(response.text, "html.parser")
    results = [
        a["href"] for a in soup.find_all("a", href=True) if a["href"].startswith("http")
    ]

    if results:
        first_result_url = results[0]
        website_content = scrape_website(first_result_url)
        return f"DuckDuckGo result from {first_result_url}:\n{website_content}"

    return "No results found on DuckDuckGo."


duckduckgo_tool = Tool(
    name="DuckDuckGo Search",
    func=duckduckgo_search_and_scrape,
    description="Search DuckDuckGo, find a website, and extract its content.",
)

# Initialize the agent
llm = OpenAI()
tools = [wikipedia_tool, duckduckgo_tool, web_scraper_tool]
agent = initialize_agent(tools, llm, agent="zero-shot-react-description", verbose=False)

# Research query
query = "Research about the XZ backdoor"
result = agent.run(query)

# Save the research to a file
with open("XZ_backdoor_research.txt", "w", encoding="utf-8") as file:
    file.write(result)

print("Research completed and saved to 'XZ_backdoor_research.txt'.")

Research completed and saved to 'XZ_backdoor_research.txt'.
