In [73]:
import streamlit as st
import os
import re
import requests
from langchain.schema import SystemMessage
from typing import Type
from langchain.chat_models import ChatOpenAI
from pydantic import BaseModel, Field
from langchain.agents import initialize_agent, AgentType
from langchain.tools import BaseTool, DuckDuckGoSearchResults, WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper


llm = ChatOpenAI(temperature=0.1)

class SymbolSearchToolArgsSchema(BaseModel):
    query: str = Field(
        description="The query you will search for. Example query: Research about the malware"
    )


class DuckduckgoSearchTool(BaseTool):
    name = "StockMarketSymbolSearchTool"
    description = """
    Use this tool to find research about specific areas of the computer industry.
    It takes a query as an argument.
    """
    args_schema: Type[SymbolSearchToolArgsSchema] = (
        SymbolSearchToolArgsSchema
    )

    def _run(self, query):
        ddg = DuckDuckGoSearchResults()
        return ddg.run(query)

class WikipediaSearchTool(BaseTool):
    name = "WikipediaSearchTool"
    description = """
    Use this tool to find research about specific areas of the computer industry.
    It takes a query as an argument.
    """
    args_schema: Type[SymbolSearchToolArgsSchema] = (
        SymbolSearchToolArgsSchema
    )
    def _run(self, query):
        wiki = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())
        return wiki.run(query)


class CompanyOverviewArgsSchema(BaseModel):
    symbol: str = Field(description="Stock symbol of the company. Example: AAPL, TSLA")


class CompanyOverviewTool(BaseTool):
    name = "CompanyOverview"
    description = """
    Use this to get an overview of the financials of the company.
    You should enter a stock symbol.
    """
    args_schema: Type[CompanyOverviewArgsSchema] = CompanyOverviewArgsSchema

    def _run(self, symbol):
        r = requests.get(
            f"https://www.alphavantage.co/query?function=OVERVIEW&symbol={symbol}&apikey={alpha_vantage_api_key}"
        )
        return r.json()
    

agent = initialize_agent(
    llm=llm,
    verbose=True,
    agent=AgentType.OPENAI_FUNCTIONS,
    handle_parsing_errors=True,
    tools=[
        DuckduckgoSearchTool(),
        # WikipediaSearchTool(),
    ],
)

result = agent.invoke("Research about the XZ backdoor")

if result:
  file_path = f"./stored/{result['input']}.txt"

  with open(file_path, "w") as f:
    f.write(result['output'])
    links = re.findall(r'(\(.*\))', result['output'])
    for i in range(len(links)):
      links[i] = links[i][1: -2]
    



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `StockMarketSymbolSearchTool` with `{'query': 'XZ backdoor research'}`


[0m[36;1m[1;3m[snippet: What does the backdoor do? Malicious code added to xz Utils versions 5.6.0 and 5.6.1 modified the way the software functions. The backdoor manipulated sshd, the executable file used to make remote ..., title: What we know about the xz Utils backdoor that almost infected the world ..., link: https://arstechnica.com/security/2024/04/what-we-know-about-the-xz-utils-backdoor-that-almost-infected-the-world/], [snippet: That operation matches the style of the XZ Utils backdoor far more than the cruder supply chain attacks of APT41 or Lazarus, by comparison. "It could very well be someone else," says Aitel., title: The Mystery of 'Jia Tan,' the XZ Backdoor Mastermind | WIRED, link: https://www.wired.com/story/jia-tan-xz-backdoor/], [snippet: Timeline of the xz open source attack. Posted on Monday, April 1, 2024. Updated Wed

In [74]:
from langchain.document_loaders import AsyncChromiumLoader
from langchain.document_transformers import Html2TextTransformer
import nest_asyncio

html2text_transformer = Html2TextTransformer()
nest_asyncio.apply()
loader = AsyncChromiumLoader(links)
docs = loader.load()
transformed = html2text_transformer.transform_documents(docs)
file_path = f"./stored/link about{result['input']}.txt"

with open(file_path, "w") as f:
  for i in transformed:
    f.write(i.page_content)
