In [28]:
import os

import pandas as pd
import tiktoken

from graphrag.query.context_builder.entity_extraction import EntityVectorStoreKey
from graphrag.query.indexer_adapters import (
    read_indexer_covariates,
    read_indexer_entities,
    read_indexer_relationships,
    read_indexer_reports,
    read_indexer_text_units,
)
from graphrag.query.question_gen.local_gen import LocalQuestionGen
from graphrag.query.structured_search.local_search.mixed_context import (
    LocalSearchMixedContext,
)
from graphrag.query.structured_search.local_search.search import LocalSearch
from graphrag.vector_stores.lancedb import LanceDBVectorStore
from graphrag.config.models.vector_store_schema_config import VectorStoreSchemaConfig

In [29]:
INPUT_DIR = "../output"
LANCEDB_URI = f"{INPUT_DIR}/lancedb"

COMMUNITY_REPORT_TABLE = "community_reports"
ENTITY_TABLE = "entities"
COMMUNITY_TABLE = "communities"
RELATIONSHIP_TABLE = "relationships"
COVARIATE_TABLE = "covariates"
TEXT_UNIT_TABLE = "text_units"
COMMUNITY_LEVEL = 2

In [30]:
# read nodes table to get community and degree data
entity_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_TABLE}.parquet")
community_df = pd.read_parquet(f"{INPUT_DIR}/{COMMUNITY_TABLE}.parquet")

entities = read_indexer_entities(entity_df, community_df, COMMUNITY_LEVEL)

# load description embeddings to an in-memory lancedb vectorstore
# to connect to a remote db, specify url and port values.
description_embedding_store = LanceDBVectorStore(
    vector_store_schema_config=VectorStoreSchemaConfig(
        index_name="default-entity-description"
    ),
)
description_embedding_store.connect(db_uri=LANCEDB_URI)

print(f"Entity count: {len(entity_df)}")
entity_df.head()

Entity count: 262


Unnamed: 0,id,human_readable_id,title,type,description,text_unit_ids,frequency,degree,x,y
0,79596b43-67e5-49ed-a8d3-1746ce97bfb9,0,SOFTWARE POLICY & RESEARCH INSTITUTE,ORGANIZATION,The Software Policy & Research Institute (SPRi...,[243941f52781cd785f52d6af493c2efe8dd2948ab29c8...,2,7,-1.882752,11.430812
1,ea58aa68-8966-4781-a154-a4e1c0c7dbd4,1,"UK DEPARTMENT FOR SCIENCE, INNOVATION AND TECH...",ORGANIZATION,"The UK Department for Science, Innovation and ...",[243941f52781cd785f52d6af493c2efe8dd2948ab29c8...,1,1,-1.966128,10.898143
2,b713e1d0-7dcc-4261-8b36-dd46712a0b6b,2,CHINESE GOVERNMENT,ORGANIZATION,The Chinese Government is the central governin...,[243941f52781cd785f52d6af493c2efe8dd2948ab29c8...,1,1,-1.420491,10.252913
3,ee97184b-2efc-40e2-9e34-fbd12ef200d5,3,US FEDERAL COURT,ORGANIZATION,The United States Federal Court is a judicial ...,[243941f52781cd785f52d6af493c2efe8dd2948ab29c8...,1,1,-1.464133,11.181699
4,105f6dc8-67f2-4b6a-821b-ae3eb8f48144,4,US FEDERAL PROCUREMENT SERVICE,ORGANIZATION,The US Federal Procurement Service is a govern...,[243941f52781cd785f52d6af493c2efe8dd2948ab29c8...,1,1,-1.032638,10.575835


In [31]:
relationship_df = pd.read_parquet(f"{INPUT_DIR}/{RELATIONSHIP_TABLE}.parquet")
relationships = read_indexer_relationships(relationship_df)

print(f"Relationship count: {len(relationship_df)}")
relationship_df.head()

Relationship count: 277


Unnamed: 0,id,human_readable_id,source,target,description,weight,combined_degree,text_unit_ids
0,dc3d00b3-1ecf-4dfb-84a3-11964a511eab,0,SOFTWARE POLICY & RESEARCH INSTITUTE,"UK DEPARTMENT FOR SCIENCE, INNOVATION AND TECH...",The Software Policy & Research Institute repor...,7.0,8,[243941f52781cd785f52d6af493c2efe8dd2948ab29c8...
1,3749ba1e-cab5-477e-8509-e850be411ce4,1,SOFTWARE POLICY & RESEARCH INSTITUTE,CHINESE GOVERNMENT,The Software Policy & Research Institute cover...,7.0,8,[243941f52781cd785f52d6af493c2efe8dd2948ab29c8...
2,08bc9337-5ff9-460e-9614-a88f8bcf3940,2,SOFTWARE POLICY & RESEARCH INSTITUTE,US FEDERAL COURT,The Software Policy & Research Institute repor...,7.0,8,[243941f52781cd785f52d6af493c2efe8dd2948ab29c8...
3,ff0bdba1-e6c6-48c7-818a-18ad279b3aab,3,SOFTWARE POLICY & RESEARCH INSTITUTE,US FEDERAL PROCUREMENT SERVICE,The Software Policy & Research Institute repor...,7.0,8,[243941f52781cd785f52d6af493c2efe8dd2948ab29c8...
4,960a617e-bd9e-4e2e-b9d3-6bb175c6d7f8,4,SOFTWARE POLICY & RESEARCH INSTITUTE,OECD,The Software Policy & Research Institute repor...,7.0,9,[243941f52781cd785f52d6af493c2efe8dd2948ab29c8...


In [32]:
report_df = pd.read_parquet(f"{INPUT_DIR}/{COMMUNITY_REPORT_TABLE}.parquet")
reports = read_indexer_reports(report_df, community_df, COMMUNITY_LEVEL)

print(f"Report records: {len(report_df)}")
report_df.head()

Report records: 39


Unnamed: 0,id,human_readable_id,community,level,parent,children,title,summary,full_content,rank,rating_explanation,findings,full_content_json,period,size
0,3712188d005449499a94b1ee5d9d5ec3,12,12,1,0,[],Gangnam Technology and Software Events 2025,"This community centers on Gangnam, a prominent...",# Gangnam Technology and Software Events 2025\...,6.5,The impact severity rating reflects the commun...,[{'explanation': 'Gangnam district in Seoul is...,"{\n ""title"": ""Gangnam Technology and Softwa...",2025-11-20,4
1,5f053034f92b4f7698d3a8c904fcd7f7,13,13,1,0,[],AI·ICT 기술·산업전망 컨퍼런스 and Jung-gu District,This community centers on the AI·ICT 기술·산업전망 컨...,# AI·ICT 기술·산업전망 컨퍼런스 and Jung-gu District\n\n...,4.0,The impact severity rating is moderate due to ...,[{'explanation': 'The AI·ICT 기술·산업전망 컨퍼런스 is a...,"{\n ""title"": ""AI·ICT 기술·산업전망 컨퍼런스 and Jung-...",2025-11-20,2
2,945c3298634d4cdfa91f6d0bf17db51e,14,14,1,0,[],"Seoul Technology Events Community: STK 2025, M...","This community centers around Seoul, South Kor...","# Seoul Technology Events Community: STK 2025,...",6.5,The impact severity rating reflects Seoul's si...,[{'explanation': 'Seoul is the primary entity ...,"{\n ""title"": ""Seoul Technology Events Commu...",2025-11-20,4
3,a180a897c33a4a06bd7f04dc5d9d818d,15,15,1,0,[],Cloud & AI Infrastructure Summit 2025 Korea an...,This community centers on the Cloud & AI Infra...,# Cloud & AI Infrastructure Summit 2025 Korea ...,4.0,The impact severity rating is moderate due to ...,[{'explanation': 'The Cloud & AI Infrastructur...,"{\n ""title"": ""Cloud & AI Infrastructure Sum...",2025-11-20,2
4,845c28fbf6294a3caa5139cd821cf43c,16,16,1,1,[],"Li Qiang, China, and the 2025 World AI Conference",This community centers on the 2025 World AI Co...,"# Li Qiang, China, and the 2025 World AI Confe...",8.5,The impact severity rating is high due to the ...,"[{'explanation': 'Li Qiang, as the Premier of ...","{\n ""title"": ""Li Qiang, China, and the 2025...",2025-11-20,3


In [33]:
text_unit_df = pd.read_parquet(f"{INPUT_DIR}/{TEXT_UNIT_TABLE}.parquet")
text_units = read_indexer_text_units(text_unit_df)

print(f"Text unit records: {len(text_unit_df)}")
text_unit_df.head()

Text unit records: 32


Unnamed: 0,id,human_readable_id,text,n_tokens,document_ids,entity_ids,relationship_ids,covariate_ids
0,243941f52781cd785f52d6af493c2efe8dd2948ab29c8a...,0,# SPRİ Al Brief\n\n2025년\n9월호\n\n인공지능 산업의 최신 동...,1200,[690599118a757ea3f8c2e7b8979b67af7b5d42a853a1a...,"[79596b43-67e5-49ed-a8d3-1746ce97bfb9, ea58aa6...","[dc3d00b3-1ecf-4dfb-84a3-11964a511eab, 3749ba1...",[]
1,1251fc2335a69c9bd32528281698374173a5c345617456...,1,"조사 결과, 근로자들은 AI 에이전트의 역할에 명확한 기준 설정</td>\n<td...",1200,[690599118a757ea3f8c2e7b8979b67af7b5d42a853a1a...,"[dfdc489c-5835-4a06-b6cc-5018c9e06d33, a03c928...","[46d5d412-d4fc-4f0e-8d25-94061873acb2, 9478333...",[]
2,dc75bf44a931a633edf6e11b56cd20c316df1a8a2c631f...,2,술·지식 교류를 활성화하며 글로벌 AI 환경에서 영국의 전략적 입지를 강화\n\n■...,1200,[690599118a757ea3f8c2e7b8979b67af7b5d42a853a1a...,"[8befdb33-c392-4bed-aabb-069df6f2ba03, 2c94dba...","[483ca2b1-b88a-43b7-a49c-8ef890803cb2, 581f9d5...",[]
3,6b982b72c3a65e707f66b0869d2e2f9ba3a091dcd1d878...,3,"도 발표\n\n\n### ㅇ 중국, 세계 AI 협력기구 설립 통해 AI 글로벌 거버...",1200,[690599118a757ea3f8c2e7b8979b67af7b5d42a853a1a...,"[0f56ce53-982d-4533-86e1-7b8fc5c14465, 424b210...","[1309426f-d73d-4ec4-a926-4e22633d0e11, edcdcb1...",[]
4,8f8c31e3de75cbd53b0cc8cd053bafeb752ee2326cc027...,4,"��육, 교통 등 공공\n서비스에 신뢰할 수 있는 AI 도입을 우선시하고 공공 서비...",1200,[690599118a757ea3f8c2e7b8979b67af7b5d42a853a1a...,"[169e0e82-70ac-41cd-bf9b-b5ceb0481a83, 21e3e7f...","[6f8292bf-f32a-4b73-b19c-84f65035f8c5, a485bc2...",[]


In [34]:
from dotenv import load_dotenv
load_dotenv("../../.env")

True

In [35]:
from graphrag.config.enums import ModelType
from graphrag.config.models.language_model_config import LanguageModelConfig
from graphrag.language_model.manager import ModelManager

api_key = os.environ["AZURE_OPENAI_API_KEY"]
llm_model = "gpt-4.1-mini" #os.environ["GRAPHRAG_LLM_MODEL"]
embedding_model = "text-embedding-3-small" #os.environ["GRAPHRAG_EMBEDDING_MODEL"]

chat_config = LanguageModelConfig(
    api_key=api_key,
    type=ModelType.AzureOpenAIChat,
    api_base=os.environ['AZURE_OPENAI_ENDPOINT'],
    api_version="2025-04-01-preview",
    deployment_name=llm_model,
    model=llm_model,
    max_retries=20,
)
chat_model = ModelManager().get_or_create_chat_model(
    name="local_search",
    model_type=ModelType.AzureOpenAIChat,
    config=chat_config,
)

token_encoder = tiktoken.encoding_for_model(llm_model)

embedding_config = LanguageModelConfig(
    api_key=api_key,
    type=ModelType.AzureOpenAIChat,
    api_base=os.environ['AZURE_OPENAI_ENDPOINT'],
    api_version="2025-04-01-preview",
    deployment_name=embedding_model,
    model=embedding_model,
    max_retries=20,
)

text_embedder = ModelManager().get_or_create_embedding_model(
    name="local_search_embedding",
    model_type=ModelType.AzureOpenAIEmbedding,
    config=embedding_config,
)

Model config based on fnllm is deprecated and will be removed in GraphRAG v3, please use ModelType.Chat or ModelType.Embedding instead to switch to LiteLLM config.
Model config based on fnllm is deprecated and will be removed in GraphRAG v3, please use ModelType.Chat or ModelType.Embedding instead to switch to LiteLLM config.


In [37]:
context_builder = LocalSearchMixedContext(
    community_reports=reports,
    text_units=text_units,
    entities=entities,
    relationships=relationships,
    # if you did not run covariates during indexing, set this to None
    #covariates=covariates,
    entity_text_embeddings=description_embedding_store,
    embedding_vectorstore_key=EntityVectorStoreKey.ID,  # if the vectorstore uses entity title as ids, set this to EntityVectorStoreKey.TITLE
    text_embedder=text_embedder,
)

In [None]:
local_context_params = {
    "text_unit_prop": 0.5,
    "community_prop": 0.1,
    "conversation_history_max_turns": 5,
    "conversation_history_user_turns_only": True,
    "top_k_mapped_entities": 10,
    "top_k_relationships": 10,
    "include_entity_rank": True,
    "include_relationship_weight": True,
    "include_community_rank": False,
    "return_candidate_context": False,
    "embedding_vectorstore_key": EntityVectorStoreKey.ID,  # set this to EntityVectorStoreKey.TITLE if the vectorstore uses entity title as ids
    "max_tokens": 12_000,  # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 5000)
}

model_params = {
    "max_tokens": 2_000,  # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 1000=1500)
    "temperature": 0.0,
}

In [40]:
search_engine = LocalSearch(
    model=chat_model,
    context_builder=context_builder,
    model_params=model_params,
    context_builder_params=local_context_params,
    response_type="multiple paragraphs",  # free form text describing the response type and format, can be anything, e.g. prioritized list, single paragraph, multiple paragraphs, multiple-page report
)

In [44]:
prompt = """
AI trend와 관련된 entity들을 찾아서 설명해줘.
"""

In [45]:
%autoawait asyncio
result = await search_engine.search(prompt)
print(result.response)



AI 트렌드와 관련된 주요 엔티티들은 인공지능 기술 발전, 산업 전망, 글로벌 협력, 그리고 AI 에이전트 개발 도구 등 다양한 분야를 포괄합니다. 아래에 주요 엔티티들을 정리하고 설명하겠습니다.

### 1. AI·ICT 기술·산업전망 컨퍼런스
이 컨퍼런스는 11월 12일부터 14일까지 서울 중구에서 개최되며, AI와 ICT 기술 및 산업 전망에 대해 논의하는 중요한 행사입니다. 최신 AI 기술 동향과 산업 발전 방향을 공유하는 장으로, 국내외 전문가들이 모여 지식을 교류합니다. 이 행사는 AI 산업의 현재와 미래를 조망하는 데 중요한 역할을 합니다 [Data: Entities (250); Relationships (262, 266); Sources (31)].

### 2. WORLD AI COOPERATION ORGANIZATION
중국이 제안한 국제 AI 협력 기구로, 2025년 세계 AI 대회에서 처음 소개되었습니다. 이 조직은 AI의 기초 과학 연구, 기술 교류, 인재 공유를 통해 글로벌 협력을 심화하고, AI 혁신과 산업 발전을 촉진하는 것을 목표로 합니다. 또한 AI 표준과 규범을 수립하고, 글로벌 AI 거버넌스를 추진하여 책임 있고 포용적인 AI 개발을 도모합니다. 특히 개발도상국과의 협력도 강조하여 AI 혜택을 전 세계에 확산시키려는 의지를 보이고 있습니다 [Data: Entities (44); Relationships (31, 32, 37)].

### 3. AGENT DEVELOPMENT KIT (ADK)
구글 클라우드가 제공하는 AI 에이전트 개발 도구로, 맞춤형 AI 에이전트를 구축할 수 있도록 지원합니다. 이 툴킷은 AI 에이전트 간 협업을 가능하게 하는 Model Context Protocol(MCP)과 함께 사용되며, 데이터 분석에 특화된 AI 에이전트 제품군의 기반이 됩니다. 구글 클라우드는 이를 통해 기업들이 AI를 보다 효과적으로 활용할 수 있도록 돕고 있습니다 [Data: Entities (132); Relationship

In [46]:
result.context_data["entities"].head()

Unnamed: 0,id,entity,description,number of relationships,in_context
0,250,AI·ICT 기술·산업전망 컨퍼런스,AI·ICT 기술·산업전망 컨퍼런스 is a conference held Novem...,2,True
1,25,AI AGENT PRODUCTS,,1,True
2,44,WORLD AI COOPERATION ORGANIZATION,The WORLD AI COOPERATION ORGANIZATION is a pro...,3,True
3,132,AGENT DEVELOPMENT KIT (ADK),The Agent Development Kit is a Google Cloud to...,2,True
4,31,BIG TECH AI TALENT COMPETITION,,1,True


In [47]:
result.context_data["relationships"].head()

Unnamed: 0,id,source,target,description,weight,links,in_context
0,144,GOOGLE CLOUD,AGENT DEVELOPMENT KIT (ADK),Google Cloud provides the Agent Development Ki...,9.0,2,True
1,12,GOOGLE CLOUD,AI AGENT PRODUCTS,Google Cloud announced a suite of AI agent pro...,8.0,2,True
2,31,CHINA,WORLD AI COOPERATION ORGANIZATION,China proposed the establishment of the World ...,18.0,1,True
3,18,SILICON VALLEY,BIG TECH AI TALENT COMPETITION,Silicon Valley's startup culture is threatened...,7.0,1,True
4,262,SEOUL,AI·ICT 기술·산업전망 컨퍼런스,AI·ICT 기술·산업전망 컨퍼런스 is held in Seoul,9.0,1,True


In [48]:
if "reports" in result.context_data:
    result.context_data["reports"].head()

In [49]:
result.context_data["sources"].head()

Unnamed: 0,id,text
0,31,">\n<td rowspan=""2"">10월</td>\n<td>8~9일</td>\n<t..."
1,0,# SPRİ Al Brief\n\n2025년\n9월호\n\n인공지능 산업의 최신 동...
2,2,술·지식 교류를 활성화하며 글로벌 AI 환경에서 영국의 전략적 입지를 강화\n\n■...


In [50]:
if "claims" in result.context_data:
    print(result.context_data["claims"].head())

## Question Generation

In [53]:
question_generator = LocalQuestionGen(
    model=chat_model,
    context_builder=context_builder,
    model_params=model_params,
    context_builder_params=local_context_params,
)

In [59]:
question_history = [
    "BIG TECH AI TALENT COMPETITION에 대한 설명을 작성해 주세요.",
    "BIG TECH AI Trend에 대한 설명을 작성해 주세요.",
]
candidate_questions = await question_generator.agenerate(
    question_history=question_history, context_data=None, question_count=5
)
print(candidate_questions.response)



['- How are big tech companies impacting the startup ecosystem through their AI talent acquisition strategies?  ', '- What are the recent developments and competitive strategies of Google and Microsoft in the AI sector?  ', '- How is the United States positioned in the global AI landscape in terms of talent, innovation, and investment?  ', '- What role do major AI conferences and events play in shaping the AI industry in 2025?  ', '- How are AI models from leading companies like Google and Microsoft performing in comparison to other advanced models?']


In [60]:
for i, q in enumerate(candidate_questions.response):
    print(f"{i+1}. {q}")

1. - How are big tech companies impacting the startup ecosystem through their AI talent acquisition strategies?  
2. - What are the recent developments and competitive strategies of Google and Microsoft in the AI sector?  
3. - How is the United States positioned in the global AI landscape in terms of talent, innovation, and investment?  
4. - What role do major AI conferences and events play in shaping the AI industry in 2025?  
5. - How are AI models from leading companies like Google and Microsoft performing in comparison to other advanced models?


In [None]:
# context_builder를 사용하여 컨텍스트 생성
query = "ai 시장변화와 트랜드"
context_result = context_builder.build_context(
    query=query,
    **local_context_params
)

print(f"Context chunks length: {len(context_result.context_chunks)}")
print(f"Context records type: {type(context_result.context_records)}")
print(f"Context records keys: {context_result.context_records.keys() if hasattr(context_result.context_records, 'keys') else 'N/A'}")
print(f"\nLLM calls: {context_result.llm_calls}")
print(f"Prompt tokens: {context_result.prompt_tokens}")
print(f"Output tokens: {context_result.output_tokens}")

In [None]:
# context_records 내용 확인
for key, value in context_result.context_records.items():
    print(f"\n{'='*60}")
    print(f"Key: {key}")
    print(f"Type: {type(value)}")
    if isinstance(value, pd.DataFrame):
        print(f"Shape: {value.shape}")
        print(f"Columns: {list(value.columns)}")
        print(f"\nFirst record:")
        print(value.head(1))
    elif isinstance(value, list):
        print(f"Length: {len(value)}")
        if len(value) > 0:
            print(f"First item type: {type(value[0])}")
            print(f"First item: {value[0]}")
    else:
        print(f"Value: {value}")

In [None]:
# context_chunks 내용 미리보기 (실제로 LLM에 전달되는 텍스트)
print("Context Chunks (first 1000 chars):")
print("="*60)
print(context_result.context_chunks[:1000])
print("...")