In [1]:
import os

import pandas as pd
import tiktoken

from graphrag.query.context_builder.entity_extraction import EntityVectorStoreKey
from graphrag.query.indexer_adapters import (
    read_indexer_covariates,
    read_indexer_entities,
    read_indexer_relationships,
    read_indexer_reports,
    read_indexer_text_units,
)
from graphrag.query.question_gen.local_gen import LocalQuestionGen
from graphrag.query.structured_search.local_search.mixed_context import (
    LocalSearchMixedContext,
)
from graphrag.query.structured_search.local_search.search import LocalSearch
from graphrag.vector_stores.lancedb import LanceDBVectorStore

In [2]:
INPUT_DIR = "../research/output"
LANCEDB_URI = f"{INPUT_DIR}/lancedb"

COMMUNITY_REPORT_TABLE = "community_reports"
ENTITY_TABLE = "entities"
COMMUNITY_TABLE = "communities"
RELATIONSHIP_TABLE = "relationships"
COVARIATE_TABLE = "covariates"
TEXT_UNIT_TABLE = "text_units"
COMMUNITY_LEVEL = 2

In [3]:
# read nodes table to get community and degree data
entity_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_TABLE}.parquet")
community_df = pd.read_parquet(f"{INPUT_DIR}/{COMMUNITY_TABLE}.parquet")

entities = read_indexer_entities(entity_df, community_df, COMMUNITY_LEVEL)

# load description embeddings to an in-memory lancedb vectorstore
# to connect to a remote db, specify url and port values.
description_embedding_store = LanceDBVectorStore(
    collection_name="default-entity-description",
)
description_embedding_store.connect(db_uri=LANCEDB_URI)

print(f"Entity count: {len(entity_df)}")
entity_df.head()

Entity count: 5971


Unnamed: 0,id,human_readable_id,title,type,description,text_unit_ids,frequency,degree,x,y
0,18f12a73-db97-4c91-9312-8a6c548e7fac,0,CRÉDIT MUTUEL ALLIANCE FÉDÉRALE,ORGANIZATION,Crédit Mutuel Alliance Fédérale is a major Fre...,[421aad582796f2743e2784a232baa6563bd81ded1b542...,34,241,15.752939,5.296056
1,051dc275-fc78-420a-be27-de733ef9ccab,1,BANQUE FÉDÉRATIVE DU CRÉDIT MUTUEL,ORGANIZATION,Banque Fédérative du Crédit Mutuel (BFCM) is t...,[421aad582796f2743e2784a232baa6563bd81ded1b542...,2,7,13.973948,6.359158
2,ec1d5ab0-e277-42d5-8a54-ec9d38b40001,2,CAISSE FÉDÉRALE CRÉDIT MUTUEL,ORGANIZATION,The Caisse Fédérale de Crédit Mutuel is the ce...,[421aad582796f2743e2784a232baa6563bd81ded1b542...,1,1,14.28328,5.117889
3,c6543b68-1d95-4890-853c-83bb7634e035,3,CRÉDIT MUTUEL LOCAL BANKS,ORGANIZATION,CRÉDIT MUTUEL LOCAL BANKS constitute a network...,[421aad582796f2743e2784a232baa6563bd81ded1b542...,2,5,13.022546,5.529476
4,46c2c787-9d00-47c8-aef4-02c8b7ec58d7,4,CRÉDIT MUTUEL FEDERATIONS,ORGANIZATION,Fourteen regional federations of Crédit Mutuel...,[421aad582796f2743e2784a232baa6563bd81ded1b542...,1,17,9.054967,-12.5165


In [4]:
relationship_df = pd.read_parquet(f"{INPUT_DIR}/{RELATIONSHIP_TABLE}.parquet")
relationships = read_indexer_relationships(relationship_df)

print(f"Relationship count: {len(relationship_df)}")
relationship_df.head()

Relationship count: 10318


Unnamed: 0,id,human_readable_id,source,target,description,weight,combined_degree,text_unit_ids
0,6f34d618-4842-4043-9aa9-5e4b49c4c938,0,CRÉDIT MUTUEL ALLIANCE FÉDÉRALE,CAISSE FÉDÉRALE CRÉDIT MUTUEL,Crédit Mutuel Alliance Fédérale represents the...,9.0,242,[421aad582796f2743e2784a232baa6563bd81ded1b542...
1,08fba875-b1eb-4a9d-95d4-f7c50f7b7a0f,1,CRÉDIT MUTUEL ALLIANCE FÉDÉRALE,CRÉDIT MUTUEL LOCAL BANKS,Crédit Mutuel Alliance Fédérale is a major coo...,19.0,246,[421aad582796f2743e2784a232baa6563bd81ded1b542...
2,53c0856f-828b-456e-86ed-db5be90c17b6,2,CRÉDIT MUTUEL ALLIANCE FÉDÉRALE,CRÉDIT MUTUEL FEDERATIONS,Crédit Mutuel Alliance Fédérale is composed of...,9.0,258,[421aad582796f2743e2784a232baa6563bd81ded1b542...
3,b8b7e3fb-e6c9-42fa-810c-8c81daaebde4,3,CRÉDIT MUTUEL ALLIANCE FÉDÉRALE,BANQUE FÉDÉRATIVE DU CRÉDIT MUTUEL,CRÉDIT MUTUEL ALLIANCE FÉDÉRALE is a consolida...,18.0,248,[421aad582796f2743e2784a232baa6563bd81ded1b542...
4,dae4394f-48d3-4ca9-97f8-a84449f49a99,4,CRÉDIT MUTUEL ALLIANCE FÉDÉRALE,GROUPE LA FRANÇAISE,Crédit Mutuel Alliance Fédérale is a major fin...,17.0,254,[421aad582796f2743e2784a232baa6563bd81ded1b542...


In [5]:
report_df = pd.read_parquet(f"{INPUT_DIR}/{COMMUNITY_REPORT_TABLE}.parquet")
reports = read_indexer_reports(report_df, community_df, COMMUNITY_LEVEL)

print(f"Report records: {len(report_df)}")
report_df.head()

Report records: 997


Unnamed: 0,id,human_readable_id,community,level,parent,children,title,summary,full_content,rank,rating_explanation,findings,full_content_json,period,size
0,219aa0adad0a4c2a8c257a9a8fc9a25b,995,995,6,993,[],Societe Generale Group Financial and Risk Mana...,This community centers on the Societe Generale...,# Societe Generale Group Financial and Risk Ma...,8.2,The high impact severity rating reflects the G...,[{'explanation': 'The Societe Generale Group (...,"{\n ""title"": ""Societe Generale Group Financ...",2025-09-20,98
1,2b5db159c9df468e9d98c24171df8529,996,996,6,993,[],Global Markets Business Units and Global Banki...,This community consists of two key business un...,# Global Markets Business Units and Global Ban...,6.5,The impact severity rating is moderately high ...,[{'explanation': 'The Global Markets Business ...,"{\n ""title"": ""Global Markets Business Units...",2025-09-20,2
2,ba30bb594eae4ffa9ac7148a972acb3e,993,993,5,989,"[995, 996]",Societe Generale Group Financial and Risk Mana...,This community centers on the Societe Generale...,# Societe Generale Group Financial and Risk Ma...,8.5,The high impact severity rating reflects the G...,[{'explanation': 'The Societe Generale Group i...,"{\n ""title"": ""Societe Generale Group Financ...",2025-09-20,100
3,9fba254b36864ffead70f03c116383ad,994,994,5,989,[],Derecognition and Pass-Through Agreement,This community centers on the financial accoun...,# Derecognition and Pass-Through Agreement\n\n...,4.2,The impact severity rating is moderate due to ...,[{'explanation': 'Derecognition is a fundament...,"{\n ""title"": ""Derecognition and Pass-Throug...",2025-09-20,2
4,a7eb55d0b848464d8efe29e8cd8acf86,985,985,4,880,[],Germany's Financial and Real Estate Ecosystem ...,This community centers on Germany as a pivotal...,# Germany's Financial and Real Estate Ecosyste...,8.2,The impact severity rating is high due to Germ...,[{'explanation': 'Germany is a key member of t...,"{\n ""title"": ""Germany's Financial and Real ...",2025-09-20,12


In [6]:
text_unit_df = pd.read_parquet(f"{INPUT_DIR}/{TEXT_UNIT_TABLE}.parquet")
text_units = read_indexer_text_units(text_unit_df)

print(f"Text unit records: {len(text_unit_df)}")
text_unit_df.head()

Text unit records: 838


Unnamed: 0,id,human_readable_id,text,n_tokens,document_ids,entity_ids,relationship_ids,covariate_ids
0,421aad582796f2743e2784a232baa6563bd81ded1b542c...,1,# Crédit Mutuel Alliance Fédérale\n\n2024 Full...,1200,[126f5403b70327b0ba8be51e9618c75047bea0f4e305b...,"[18f12a73-db97-4c91-9312-8a6c548e7fac, 051dc27...","[6f34d618-4842-4043-9aa9-5e4b49c4c938, 08fba87...",[]
1,9ac44bd55996370afdff6eec83d65548c64b351a36ecfd...,2,EURO GDS España\n\nASSET MANAGEMENT\nGroupe L...,1200,[126f5403b70327b0ba8be51e9618c75047bea0f4e305b...,"[18f12a73-db97-4c91-9312-8a6c548e7fac, f3efde6...","[31e82175-f57b-4f9e-ac3b-ede484310476, 19555fe...",[]
2,8dfdd4ed734b2a5699ff115e6a86b012fc638840cf657c...,3,FRANÇAISE\n\nmonabanq\n\nCrédit Mutuel Avance...,1200,[126f5403b70327b0ba8be51e9618c75047bea0f4e305b...,"[18f12a73-db97-4c91-9312-8a6c548e7fac, f3efde6...","[95f76143-cb6f-4edf-9b0a-854af006df51, 5e2aec7...",[]
3,c8aeecdc8cdf302eaa44f30185391b9a43796892e5c8b0...,4,result\n\n1%\nof net\nrevenue\n\n5%\nof net\nr...,1200,[126f5403b70327b0ba8be51e9618c75047bea0f4e305b...,"[18f12a73-db97-4c91-9312-8a6c548e7fac, ce27fa7...","[c9798b83-c937-4e62-8461-a425bd63913d, 756865f...",[]
4,8680e30c03d1ba1e4502d81fcb5bb200dba3e483a148db...,5,td>\n<td>-36.2%</td>\n</tr>\n<tr>\n<td>Income ...,1200,[126f5403b70327b0ba8be51e9618c75047bea0f4e305b...,"[18f12a73-db97-4c91-9312-8a6c548e7fac, ce27fa7...","[c9798b83-c937-4e62-8461-a425bd63913d, a35a40d...",[]


In [7]:
from dotenv import load_dotenv
load_dotenv("../.env")

True

In [8]:
from graphrag.config.enums import ModelType
from graphrag.config.models.language_model_config import LanguageModelConfig
from graphrag.language_model.manager import ModelManager

api_key = os.environ["AZURE_OPENAI_API_KEY"]
llm_model = "gpt-4.1-mini" #os.environ["GRAPHRAG_LLM_MODEL"]
embedding_model = "text-embedding-3-small" #os.environ["GRAPHRAG_EMBEDDING_MODEL"]

chat_config = LanguageModelConfig(
    api_key=api_key,
    type=ModelType.AzureOpenAIChat,
    api_base=os.environ['AZURE_OPENAI_ENDPOINT'],
    api_version="2025-04-01-preview",
    deployment_name=llm_model,
    model=llm_model,
    max_retries=20,
)
chat_model = ModelManager().get_or_create_chat_model(
    name="local_search",
    model_type=ModelType.AzureOpenAIChat,
    config=chat_config,
)

token_encoder = tiktoken.encoding_for_model(llm_model)

embedding_config = LanguageModelConfig(
    api_key=api_key,
    type=ModelType.AzureOpenAIChat,
    api_base=os.environ['AZURE_OPENAI_ENDPOINT'],
    api_version="2025-04-01-preview",
    deployment_name=embedding_model,
    model=embedding_model,
    max_retries=20,
)

text_embedder = ModelManager().get_or_create_embedding_model(
    name="local_search_embedding",
    model_type=ModelType.AzureOpenAIEmbedding,
    config=embedding_config,
)

In [9]:
context_builder = LocalSearchMixedContext(
    community_reports=reports,
    text_units=text_units,
    entities=entities,
    relationships=relationships,
    # if you did not run covariates during indexing, set this to None
    #covariates=covariates,
    entity_text_embeddings=description_embedding_store,
    embedding_vectorstore_key=EntityVectorStoreKey.ID,  # if the vectorstore uses entity title as ids, set this to EntityVectorStoreKey.TITLE
    text_embedder=text_embedder,
    token_encoder=token_encoder,
)

In [10]:
# text_unit_prop: proportion of context window dedicated to related text units
# community_prop: proportion of context window dedicated to community reports.
# The remaining proportion is dedicated to entities and relationships. Sum of text_unit_prop and community_prop should be <= 1
# conversation_history_max_turns: maximum number of turns to include in the conversation history.
# conversation_history_user_turns_only: if True, only include user queries in the conversation history.
# top_k_mapped_entities: number of related entities to retrieve from the entity description embedding store.
# top_k_relationships: control the number of out-of-network relationships to pull into the context window.
# include_entity_rank: if True, include the entity rank in the entity table in the context window. Default entity rank = node degree.
# include_relationship_weight: if True, include the relationship weight in the context window.
# include_community_rank: if True, include the community rank in the context window.
# return_candidate_context: if True, return a set of dataframes containing all candidate entity/relationship/covariate records that
# could be relevant. Note that not all of these records will be included in the context window. The "in_context" column in these
# dataframes indicates whether the record is included in the context window.
# max_tokens: maximum number of tokens to use for the context window.


local_context_params = {
    "text_unit_prop": 0.5,
    "community_prop": 0.1,
    "conversation_history_max_turns": 5,
    "conversation_history_user_turns_only": True,
    "top_k_mapped_entities": 10,
    "top_k_relationships": 10,
    "include_entity_rank": True,
    "include_relationship_weight": True,
    "include_community_rank": False,
    "return_candidate_context": False,
    "embedding_vectorstore_key": EntityVectorStoreKey.ID,  # set this to EntityVectorStoreKey.TITLE if the vectorstore uses entity title as ids
    "max_tokens": 12_000,  # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 5000)
}

model_params = {
    "max_tokens": 2_000,  # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 1000=1500)
    "temperature": 0.0,
}

In [11]:
search_engine = LocalSearch(
    model=chat_model,
    context_builder=context_builder,
    token_encoder=token_encoder,
    model_params=model_params,
    context_builder_params=local_context_params,
    response_type="multiple paragraphs",  # free form text describing the response type and format, can be anything, e.g. prioritized list, single paragraph, multiple paragraphs, multiple-page report
)

In [12]:
prompt = """프랑스 은행 BNP 에 대해서 아래의 보고서 형태로 작성해 주시오.

# 보고서 항목
주요 리스크 요인
- 산업 구조적 리스크 
- 규제 변화로 인한 수익성 악화 가능성
- 기업별 특수 리스크

# 언어 및 스타일
- 한국어로 간결하고 명확하게 작성하십시오
- 최대한 객관적이되 통찰력 있는 시각으로 작성하십시오.
- 대한민국 경제 뉴스 브리핑이나 토론에서 사용하는 전문적인 단어를 사용하십시오.
"""

In [13]:
%autoawait asyncio
result = await search_engine.search(prompt)
print(result.response)

# BNP 파리바 주요 리스크 요인 보고서

## 1. 산업 구조적 리스크

BNP 파리바는 프랑스 및 글로벌 금융시장에서 주요한 위치를 차지하는 대형 은행으로, 다양한 금융 서비스와 광범위한 사업 포트폴리오를 보유하고 있습니다. 그러나 금융산업 전반에 내재된 구조적 리스크는 BNP 파리바에도 예외가 아닙니다. 우선, 글로벌 금융시장의 경쟁 심화와 디지털 전환 가속화는 전통적 은행 비즈니스 모델의 수익성에 지속적인 압박을 가하고 있습니다. 특히, 저금리 환경이 장기화됨에 따라 순이자마진(NIM) 축소가 불가피하며, 이는 은행의 핵심 수익원 약화로 이어질 수 있습니다. 또한, 금융산업 내 자산 및 수익의 다변화에도 불구하고, 글로벌 경제 불확실성과 지정학적 리스크(예: 우크라이나 및 중동 지역 분쟁)는 자산 건전성에 부정적 영향을 미칠 가능성이 존재합니다.

BNP 파리바는 다양한 지역과 사업부문에 걸쳐 영업하고 있으나, 각 지역별 경제 상황과 규제 환경 차이로 인한 운영 복잡성도 구조적 리스크로 작용합니다. 예를 들어, 유럽 내 주요 시장과 신흥시장 간의 경제성장률 차이, 환율 변동성, 그리고 각국의 금융 규제 차이는 그룹 전체의 리스크 관리에 도전 과제를 제공합니다.

## 2. 규제 변화로 인한 수익성 악화 가능성

BNP 파리바는 유럽중앙은행(ECB) 및 유럽연합(EU) 금융 규제 하에 엄격한 자본 및 유동성 요건을 준수하고 있습니다. 최근 강화된 바젤 III 규제와 MREL(최소 요구 자기자본 및 부채) 및 TLAC(총 손실흡수능력) 규제는 은행의 자본 조달 비용 상승과 자본 운용의 경직성을 초래할 수 있습니다. 특히, 추가적인 규제 강화나 새로운 회계 기준 도입 시, 자본비용 증가와 함께 수익성 저하가 불가피할 수 있습니다.

또한, 2024년부터 시행된 OECD의 글로벌 최저 법인세율 도입과 EU의 관련 지침은 BNP 파리바의 세무 전략과 순이익에 영향을 미칠 수 있습니다. 이와 함께, 프랑스 내 연금 개혁과 노동 관련 규제 변화는 인건비 및 운영비

In [14]:
result.context_data["entities"].head()

Unnamed: 0,id,entity,description,number of relationships,in_context
0,1639,FINANCIAL STATEMENTS,The financial statements of BNP Paribas consis...,1,True
1,1644,NOTES TO THE FINANCIAL STATEMENTS,Detailed explanatory notes accompanying BNP Pa...,1,True
2,848,BNPP,BNP Paribas (BNPP) is a leading global banking...,69,True
3,1780,BNP PARIBAS SA,BNP Paribas SA is a major French multinational...,72,True
4,1178,BNP PARIBAS GROUP,BNP Paribas Group is a major multinational ban...,91,True


In [15]:
result.context_data["relationships"].head()

Unnamed: 0,id,source,target,description,weight,links,in_context
0,2983,FINANCIAL STATEMENTS,GROUP,The Group prepares consolidated financial stat...,9.0,1,True
1,2853,CONSOLIDATED FINANCIAL STATEMENTS,NOTES TO THE FINANCIAL STATEMENTS,The notes provide detailed explanations for th...,1.0,1,True


In [16]:
if "reports" in result.context_data:
    result.context_data["reports"].head()

In [17]:
result.context_data["sources"].head()

Unnamed: 0,id,text
0,245,"inflationary countries, including equity, are\..."
1,229,<figure>\n</figure>\n\n\n# CONSOLIDATED FINANC...
2,122,about 1.3x as of end 2024. The regulatory ave...


In [18]:
if "claims" in result.context_data:
    print(result.context_data["claims"].head())

## Question Generation

In [19]:
question_generator = LocalQuestionGen(
    model=chat_model,
    context_builder=context_builder,
    token_encoder=token_encoder,
    model_params=model_params,
    context_builder_params=local_context_params,
)

In [20]:
question_history = [
    "프랑스 은행 BNP 에 대해서 보고서를 작성해 주시오.",
]
candidate_questions = await question_generator.agenerate(
    question_history=question_history, context_data=None, question_count=5
)
print(candidate_questions.response)

['- What are the main business divisions of BNP Paribas and how do they contribute to its overall financial performance?', "- How has BNP Paribas's revenue and net income evolved in recent years, and what are the key drivers behind this performance?", "- What is the geographic distribution of BNP Paribas's revenues and customer base, particularly focusing on its operations in France?", '- How does BNP Paribas manage its capital and liquidity to comply with regulatory requirements and ensure financial stability?', "- What are BNP Paribas's strategic priorities regarding sustainability and digital innovation in its banking operations?"]


In [22]:
for i, q in enumerate(candidate_questions.response):
    print(f"{i+1}. {q}")

1. - What are the main business divisions of BNP Paribas and how do they contribute to its overall financial performance?
2. - How has BNP Paribas's revenue and net income evolved in recent years, and what are the key drivers behind this performance?
3. - What is the geographic distribution of BNP Paribas's revenues and customer base, particularly focusing on its operations in France?
4. - How does BNP Paribas manage its capital and liquidity to comply with regulatory requirements and ensure financial stability?
5. - What are BNP Paribas's strategic priorities regarding sustainability and digital innovation in its banking operations?
