In [1]:
from langchain_openai import ChatOpenAI
from langchain.schema import SystemMessage
from langchain.agents import initialize_agent, AgentType
from langchain.tools import BaseTool
from typing import Type, List
from pydantic import BaseModel, Field

llm = ChatOpenAI(
    # model_name="gpt-3.5-turbo-1106",  # 필요하면 4o 등으로 교체
    temperature=0.2,
)


In [None]:
import wikipedia

class WikiArgs(BaseModel):
    query: str = Field(description="검색어 (예: 'xz backdoor')")

class WikiSearchTool(BaseTool):
    name = "WikipediaSearch"
    description = "위키피디아에서 주어진 주제로 요약 정보를 검색합니다."
    args_schema: Type[WikiArgs] = WikiArgs

    def _run(self, query: str):
        wikipedia.set_lang("en")
        try:
            page = wikipedia.page(query, auto_suggest=False)
        except wikipedia.exceptions.DisambiguationError as e:
            # 가장 첫 항목으로 진행
            page = wikipedia.page(e.options[0])
        return {"title": page.title, "summary": page.summary, "url": page.url}


In [3]:
from duckduckgo_search import DDGS

class DDGArgs(BaseModel):
    query: str = Field(description="검색어 (예: 'xz backdoor exploit')")

class DuckDuckGoSearchTool(BaseTool):
    name: str = "DuckDuckGoSearch"
    description: str = "DuckDuckGo에서 주어진 쿼리로 웹 검색 결과 상위 5개를 반환합니다."
    args_schema: Type[DDGArgs] = DDGArgs

    def _run(self, query: str):
        with DDGS() as ddgs:
            results = [r for r in ddgs.text(query, max_results=5)]
        # 결과 객체가 dict 리스트이므로 그대로 반환
        return results


In [5]:
import requests, re
from bs4 import BeautifulSoup

class ScrapeArgs(BaseModel):
    url: str = Field(description="스크랩할 웹페이지 URL")

class WebScrapeTool(BaseTool):
    name: str = "WebScrape"
    description: str = "주어진 URL에서 본문 텍스트(HTML 태그 제거)를 추출합니다."
    args_schema: Type[ScrapeArgs] = ScrapeArgs

    def _run(self, url: str):
        res = requests.get(url, timeout=20)
        soup = BeautifulSoup(res.text, "html.parser")
        # 스크립트·스타일 제거
        for tag in soup(["script", "style", "noscript"]):
            tag.extract()
        text = soup.get_text(separator="\n")
        # 긴 공백 정리
        text = re.sub(r"\n{2,}", "\n", text)
        return text.strip()


In [7]:
from datetime import datetime
import pathlib, textwrap

class SaveArgs(BaseModel):
    content: str = Field(description="파일로 저장할 텍스트")

class SaveToFileTool(BaseTool):
    name: str = "SaveResearchFile"
    description: str = "조사 결과를 timestamp가 포함된 .txt 파일로 저장합니다."
    args_schema: Type[SaveArgs] = SaveArgs

    def _run(self, content: str):
        ts = datetime.now().strftime("%Y%m%d_%H%M%S")
        fname = f"research_{ts}.txt"
        path = pathlib.Path(fname).resolve()
        with open(path, "w", encoding="utf-8") as f:
            f.write(textwrap.dedent(content))
        return str(path)


In [9]:
# WikiSearchTool가 정의되어 있지 않으므로 주석 처리하거나 제거합니다.
# 필요하다면 DuckDuckGoSearchTool도 정의되어 있는지 확인하세요.

tools = [
    # WikiSearchTool(),  # 정의되어 있지 않으므로 주석 처리
    DuckDuckGoSearchTool(),
    WebScrapeTool(),
    SaveToFileTool(),
]

system_msg = SystemMessage(
    content=(
        "You are a meticulous security researcher. "
        "When asked to investigate a topic, you should:\n"
        "1) Try DuckDuckGoSearch for sources.\n"
        "2) For each promising result, call WebScrape to pull the full text.\n"
        "3) Organize your findings into a concise report and call SaveResearchFile "
        "to save it as a .txt file.\n"
        "Finish by returning the file path you saved."
    )
)

agent = initialize_agent(
    tools=tools,
    llm=llm,
    agent=AgentType.OPENAI_FUNCTIONS,   # 함수 호출 기반
    handle_parsing_errors=True,
    agent_kwargs={"system_message": system_msg},
    verbose=True,
)


  agent = initialize_agent(


In [10]:
query = "Research about the XZ backdoor"
result = agent.invoke(query)
result   # → {'output': '<저장된 파일 경로>'}




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `DuckDuckGoSearch` with `{'query': 'XZ backdoor'}`


[0m[36;1m[1;3m[{'title': 'XZ Backdoor: “That was a close one” - Xygeni', 'href': 'https://xygeni.io/blog/xz-backdoor-that-was-a-close-one/', 'body': 'May 27, 2025 · XZ Backdoor Attack Explained: Discover the recent discovery of XZ Backdoor malicious behavior, its impact, & ongoing attack handling efforts!'}, {'title': 'xz-utils 백도어 사건 - 나무위키', 'href': 'https://namu.wiki/w/xz-utils+백도어+사건', 'body': 'May 26, 2025 · 2024년 3월 29일, 마이크로소프트 의 PostgreSQL 개발자 안드레스 프런드 (Andres Freund)가 리눅스 liblzma 라이브러리 내 xz 버전 5.6.0, 5.6.1에 백도어 가 심어져 있음을 발견하여 …'}, {'title': 'How one developer prevented the largest cyberattack: the story of the XZ …', 'href': 'https://tekkix.com/articles/security/2025/05/how-one-developer-prevented-the-largest-cyber', 'body': 'May 24, 2025 · Exactly one year has passed since the world watched in awe as the investigation of one of the most sophisticated

  with DDGS() as ddgs:


[32;1m[1;3m
Invoking: `WebScrape` with `{'url': 'https://xygeni.io/blog/xz-backdoor-that-was-a-close-one/'}`


[0m[33;1m[1;3mXZ Backdoor: “That was a close one” | Xygeni
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 Products 
 
 
 
 
 
Close Products
 
 
 
 
Open Products
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Code Security
 
 
 
 
 
 
 
 
Open Source Security
 
 
 
 
 
 
 
 
CI/CD Security
 
 
 
 
 
 
 
 
Secrets Security
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Infrastructure as Code (IaC) Security
 
 
 
 
 
 
 
 
Build Security
 
 
 
 
 
 
 
 
Anomaly Detection
 
 
 
 
 
 
 
 
Application Security Posture Management - ASPM 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Malware Across DevOps
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 Resources 
 
 
 
 
 
Close Resources
 
 
 
 
Open Resources
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Resource Library
 
 
 
 
 
 
 
 
Datasheets & Product Briefs
 
 
 
 
 
 
 
 
eBooks
 
 
 
 
 
 
 
 
Whitepap

{'input': 'Research about the XZ backdoor',
 'output': 'I have completed the research on the XZ backdoor attack. The detailed information has been saved in a text file. You can download and review the report from the following link: [Download Research Report](sandbox:/4_study/GPT/research_20250717_225108.txt)'}

In [16]:
file_path = "research_20250717_225108.txt"

if os.path.exists(file_path):
    try:
        with open(file_path, encoding="utf-8") as f:
            preview = f.read()[:1500]
        print(preview, "...\n\n[중략]")
    except Exception as e:
        print(f"파일을 열 수 없습니다: {e}")
else:
    print(f"파일이 존재하지 않습니다: {file_path}")


XZ Backdoor: “That was a close one” | Xygeni

Backdooring SSH
A nefarious or compromised maintainer inserted malicious behavior in a library named liblzma, part of the xz compression tools and libraries, resulting in a backdoor in SSH. This is an advanced software supply chain attack as the library was intentionally modified for the backdoor, with obfuscation and stealth techniques for hiding the attack payload from reviewers. It was discovered and disclosed recently (on past Mar 29th), and the attack handling is ongoing. However, it was quickly contained as it seems to affect only pre-release versions of a limited set of environments (DEB and RPM packages, for the x86_64 architecture, and built with GCC). Anyway, the CVE was given a CVSS base score of 10, which is reserved for the most critical cybersecurity vulnerabilities. Should it enter stable distributions, the impact would be overwhelming. The technical analysis of the attack, including the xz backdoor explained in depth, was an