In [1]:
!pip install smolagents

Collecting smolagents
  Downloading smolagents-1.15.0-py3-none-any.whl.metadata (15 kB)
Collecting python-dotenv (from smolagents)
  Downloading python_dotenv-1.1.0-py3-none-any.whl.metadata (24 kB)
Downloading smolagents-1.15.0-py3-none-any.whl (124 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m124.3/124.3 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading python_dotenv-1.1.0-py3-none-any.whl (20 kB)
Installing collected packages: python-dotenv, smolagents
Successfully installed python-dotenv-1.1.0 smolagents-1.15.0


In [2]:
!pip install langchain_community

Collecting langchain_community
  Downloading langchain_community-0.3.24-py3-none-any.whl.metadata (2.5 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain_community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain_community)
  Downloading pydantic_settings-2.9.1-py3-none-any.whl.metadata (3.8 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain_community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting mypy-extensions>=0.3.0 (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading mypy_extensions-1.1.0-py3-no

In [3]:
!pip install langchain-community rank_bm25

Collecting rank_bm25
  Downloading rank_bm25-0.2.2-py3-none-any.whl.metadata (3.2 kB)
Downloading rank_bm25-0.2.2-py3-none-any.whl (8.6 kB)
Installing collected packages: rank_bm25
Successfully installed rank_bm25-0.2.2


In [4]:
!pip install faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.11.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.8 kB)
Downloading faiss_cpu-1.11.0-cp311-cp311-manylinux_2_28_x86_64.whl (31.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.3/31.3 MB[0m [31m36.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.11.0


In [8]:
!pip install pypdf

Collecting pypdf
  Downloading pypdf-5.5.0-py3-none-any.whl.metadata (7.2 kB)
Downloading pypdf-5.5.0-py3-none-any.whl (303 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m303.4/303.4 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pypdf
Successfully installed pypdf-5.5.0


In [5]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [13]:
import os
import pickle

from smolagents import Tool
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.schema import Document
from langchain.retrievers import EnsembleRetriever
from langchain_community.retrievers import BM25Retriever
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter


class KnowledgeRetrieverTool(Tool):
    name = "knowledge_retriever"
    description = "Uses FAISS and BM25 to retrieve relevant documents for given question"
    inputs = {
        "question": {
            "type": "string",
            "description": "User's question",
        }
    }
    output_type = "string"
    def __init__(self, faiss_path="faiss_index", bm25_path="bm25.pkl", model_name="sentence-transformers/all-MiniLM-L6-v2", top_k=4, **kwargs):
        super().__init__(**kwargs)
        self.faiss_path = faiss_path
        self.bm25_path = bm25_path
        self.embedding_model = HuggingFaceEmbeddings(model_name=model_name)
        self.top_k = top_k
        self.faiss_index = None
        self.bm25_retriever = None
        self.ensemble_retriever = None

    def get_embeddings(self, pdf_path):
        loader = PyPDFLoader(pdf_path)
        docs = loader.load()
        splitter = RecursiveCharacterTextSplitter(chunk_size=250, chunk_overlap=50)
        split_docs = splitter.split_documents(docs)

        self.faiss_index = FAISS.from_documents(split_docs, self.embedding_model)
        self.faiss_index.save_local(self.faiss_path)

        self.bm25_retriever = BM25Retriever.from_documents(split_docs)
        with open(self.bm25_path, "wb") as f:
            pickle.dump(self.bm25_retriever, f)

        self.ensemble_retriever = EnsembleRetriever(
            retrievers=[self.bm25_retriever, self.faiss_index.as_retriever()],
            weights=[0.5, 0.5],
        )

    def invoke(self, question):
        if self.faiss_index is None and os.path.exists(self.faiss_path):
            self.faiss_index = FAISS.load_local(self.faiss_path, self.embedding_model)

        if self.bm25_retriever is None and os.path.exists(self.bm25_path):
            with open(self.bm25_path, "rb") as f:
                self.bm25_retriever = pickle.load(f)

        if self.ensemble_retriever is None:
            self.ensemble_retriever = EnsembleRetriever(
                retrievers=[self.bm25_retriever, self.faiss_index.as_retriever()],
                weights=[0.5, 0.5],
            )

        return self.ensemble_retriever.invoke(question, top_k=self.top_k)

    def forward(self, question):
        docs = self.invoke(question)
        return "\nRetrieved ideas:\n" + "".join(
            [
                f"\n\n===== Idea {str(i)} =====\n" + doc.page_content
                for i, doc in enumerate(docs)
            ]
        )

In [14]:
retriever = KnowledgeRetrieverTool()
retriever.get_embeddings("test.pdf")

21
<class 'list'>
page_content='2120
ESG Special Report 2023
투자 검토 단계
Pre-Acquisition (인수 전)
01
포트폴리오 ESG 관리 체계
장기적 관점에서 기업가치 제고를 실현하기 위해 핵심자산인 
투자 포트폴리오의 경제적 가치와 함께 ESG 가치를 
통합적으로 관리하기 위한 체계를 구축하고 있습니다.
투자 검토 시점부터 인수 후, 회수 시점까지 투자
Life Cycle에 걸쳐 적용되는 체계적인 ESG 관리를 
기반으로 내부적으로는 ESG를 고려한 합리적인 투자의사 
결정을 이행하고, 시장에서는 포트폴리오의 기업가치가 
시장에서 제대로 평가받으며 나아가 사회·환경에 미치는 
파급력을 높일 수 있도록 노력하겠습니다.
포트폴리오 ESG 관리 원칙
SK주식회사 투자회사
기업가치 관점의
ESG 중점관리 
항목 도출
자사 ESG 
관리전략
ESG 성과 
데이터 관리
기업가치와
ESG 성과 
연계성 분석
포트폴리오 
ESG 관리전략 
Upgrade
성장단계
산업특성
ESG Divestment 전략 검토
       ESG Exit 리포트 발간
 ·    인수 이후 ESG Value-up 기반 Exit 전략 도출
 ·    중대 ESG 리스크/기회 현황 및   
ESG 관리·공시 수준 확인
셀사이드(Sell-side) 점검사항 관리
       중대 ESG 이슈 존재 여부 검토
 ·    매각 대상 시장 내 ESG 규제 준수 여부 확인
 ·    ESG 우수 영역에 대한 정보공개 및    
기회 확대 방안 제시
 ·    국내외 책임투자 기준 부합 여부 확인
 ·    우수 관리 영역 정보공개 및   
이해관계자 커뮤니케이션
매각/투자 회수 단계
03
Exit (투자 회수)
정기 ESG 점검
       투자회사 분류 
 ·   전체 포트폴리오를 16개 업종, 기업 규모에 따라 3개 그룹으로 구분
       ESG 중점관리 항목 도출 
  ·   ESG 외부평가 및 주가 상관관계 상위 영역 분석에 따라   
산

In [15]:
from smolagents import CodeAgent, HfApiModel

agent = CodeAgent(tools=[retriever], model=HfApiModel())

response = agent.run(
    "ESG 체계에 관해 설명해줘"
)

print(response)

ESG (Environmental, Social, and Governance) is a framework used by investors, companies, and policymakers to evaluate the sustainability and ethical impact of an organization's operations. The three pillars of ESG are:

1. Environmental: This includes factors such as a company's carbon emissions, resource use, waste management, and efforts to mitigate climate change.

2. Social: This encompasses labor practices, human rights, stakeholder relationships, community engagement, and product safety.

3. Governance: This involves the performance of a company's board of directors, audit committees, executive remuneration, internal controls, and shareholder rights.

ESG criteria are used to assess a company's risk profile and long-term sustainability, providing a more holistic view of its operations beyond traditional financial metrics.
