In [12]:
from bs4 import BeautifulSoup
from httpx import HTTPError
from langchain.agents import create_sql_agent, AgentType, initialize_agent
from langchain.agents.agent_toolkits import SQLDatabaseToolkit
from langchain.chat_models import ChatOpenAI
from langchain.tools import BaseTool
from langchain.utilities import DuckDuckGoSearchAPIWrapper, WikipediaAPIWrapper
from pydantic import BaseModel, Field
from typing import Type
from fake_useragent import UserAgent
import os

import requests

llm = ChatOpenAI(
	temperature=0.1,
)


# search schema
class SearchToolArgsSchema(BaseModel):
	query: str = Field(
		description="당신이 검색할 때 사용하는 쿼리입니다. 쿼리 예시: Stock Market Symbol for Apple Company"
	)


# duck duck go search
class DuckDuckGoSearchTool(BaseTool):
	name = "DuckDuckGoSearchTool"
	description = """
		이 도구를 사용해서 원하는 query 를 DuckDuckGo 에서 검색합니다.
		이 도구는 인자로 query 를 사용합니다.
	"""

	args_schema: Type[
		SearchToolArgsSchema
	] = SearchToolArgsSchema

	def _safe_search(self, query):
		ddg = DuckDuckGoSearchAPIWrapper(
			safesearch="moderate",
			time="y",
			max_results=5,
		)
		return ddg.run(query)

	def _run(self, query):
		try:
			return self._safe_search(query)
		except HTTPError as e:
			return f"DuckDuckGo 검색 실패(HTTPError: {e})." 
		except Exception as e:
			return f"DuckDuckGo 검색 실패(Unexpected: {e})."


# wikipedia search
class WikipediaSearchTool(BaseTool):
	name = "WikipediaSearchTool"
	description = """
		이 도구를 사용해서 원하는 query 를 Wikipedia 에서 검색합니다.
		이 도구는 인자로 query 를 사용합니다.
	"""

	args_schema: Type[
		SearchToolArgsSchema
	] = SearchToolArgsSchema 

	def _run(self, query):
		wiki = WikipediaAPIWrapper()
		return wiki.run(query)


# text scrap schema
class textScrapSchema(BaseModel):
	url: str = Field(
		description="당신이 검색해야 하는 주소를 입력받습니다. 예시: 'https://ko.wikipedia.org/wiki/%ED%8F%AC%ED%95%AD_%EB%B6%81%EC%86%A1%EB%A6%AC_%EB%B6%81%EC%B2%9C%EC%88%98'"
	)

# text scrap class
class textScrapTool(BaseTool):
	name = "textScrapTool"
	description = """
		이 도구를 사용해서 원하는 url 을 방문 후, 컨텐츠를 스크랩합니다.
		이 도구는 인자로 url 을 사용합니다.
	"""

	args_schema: Type[textScrapSchema] = textScrapSchema

	def _run(self, url):
		ua = UserAgent()
		headers = {'User-Agent': ua.random}

		r = requests.get(url, headers=headers)
		soup = BeautifulSoup(r.text, 'html.parser')
		paragraphs = soup.select('p')
		return "\n\n".join(p.get_text() for p in paragraphs)


# text file save schema
class textFileSaveSchema(BaseModel):
	text: str = Field(
		description="당신이 파일로 저장해야 하는 텍스트를 입력받습니다."
	)

# text file save class
class textFileSaveTool(BaseTool):
	name = "textFileSaveTool"
	description = """
		이 도구를 사용해서, 입력받은 텍스트를 파일로 저장합니다.
		이 도구는 인자로 text 를 사용합니다.
	"""

	args_schema: Type[textFileSaveSchema] = textFileSaveSchema

	def _run(self, text):
		with open("../files/summary.txt", "w+") as f:
			f.write(text)


# agent setting
agent = initialize_agent(
	llm=llm,
	verbose=True,
	agent=AgentType.OPENAI_FUNCTIONS,
	handle_parsing_errors=True,
	tools=[
		WikipediaSearchTool(),
		DuckDuckGoSearchTool(),
		textScrapTool(),
		textFileSaveTool(),
	]
)

prompt = "'XZ backdoor' 에 대해서 Wikipedia 와 DuckDuckGo 에서 검색해 줘. 검색 중 에러가 발생했다면 무시해줘. 유효한 검색 결과를 찾았다면, 내용을 스크랩 해줘. 그리고 스크랩 한 정보는 txt 파일 형태로 저장해줘."
agent.invoke(prompt)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `WikipediaSearchTool` with `{'query': 'XZ backdoor'}`


[0m[36;1m[1;3mPage: XZ Utils backdoor
Summary: In February 2024, a malicious backdoor was introduced to the Linux build of the xz utility within the liblzma library in versions 5.6.0 and 5.6.1 by an account using the name "Jia Tan". The backdoor gives an attacker who possesses a specific Ed448 private key remote code execution through OpenSSH on the affected Linux system. The issue has been given the Common Vulnerabilities and Exposures number CVE-2024-3094 and has been assigned a CVSS score of 10.0, the highest possible score.
While xz is commonly present in most Linux distributions, at the time of discovery the backdoored version had not yet been widely deployed to production systems, but was present in development versions of major distributions. The backdoor was discovered by the software developer Andres Freund, who announced his findings on 29 March 2

{'input': "'XZ backdoor' 에 대해서 Wikipedia 와 DuckDuckGo 에서 검색해 줘. 검색 중 에러가 발생했다면 무시해줘. 유효한 검색 결과를 찾았다면, 내용을 스크랩 해줘. 그리고 스크랩 한 정보는 txt 파일 형태로 저장해줘.",
 'output': '검색 결과를 스크랩하여 txt 파일로 저장했습니다. 해당 정보는 다음과 같습니다:\n\n"In February 2024, a malicious backdoor was introduced to the Linux build of the xz utility within the liblzma library in versions 5.6.0 and 5.6.1 by an account using the name "Jia Tan". The backdoor gives an attacker who possesses a specific Ed448 private key remote code execution through OpenSSH on the affected Linux system. The issue has been given the Common Vulnerabilities and Exposures number CVE-2024-3094 and has been assigned a CVSS score of 10.0, the highest possible score.\nWhile xz is commonly present in most Linux distributions, at the time of discovery the backdoored version had not yet been widely deployed to production systems, but was present in development versions of major distributions. The backdoor was discovered by the software developer Andres Freund, who anno