In [1]:
import os

import pandas as pd
import tiktoken

from graphrag.config.enums import ModelType
from graphrag.config.models.language_model_config import LanguageModelConfig
from graphrag.language_model.manager import ModelManager
from graphrag.query.indexer_adapters import (
    read_indexer_communities,
    read_indexer_entities,
    read_indexer_reports,
)
from graphrag.query.structured_search.global_search.community_context import (
    GlobalCommunityContext,
)
from graphrag.query.structured_search.global_search.search import GlobalSearch

In [2]:
from dotenv import load_dotenv
load_dotenv("../../.env")

True

In [3]:
api_key = os.environ["AZURE_OPENAI_API_KEY"]
llm_model = "gpt-4.1-mini" #os.environ["GRAPHRAG_LLM_MODEL"]

config = LanguageModelConfig(
    api_key=api_key,
    type=ModelType.AzureOpenAIChat,
    api_base=os.environ['AZURE_OPENAI_ENDPOINT'],
    api_version="2025-04-01-preview",
    deployment_name=llm_model,
    model=llm_model,
    max_retries=20,
)
model = ModelManager().get_or_create_chat_model(
    name="global_search",
    model_type=ModelType.AzureOpenAIChat,
    config=config,
)

token_encoder = tiktoken.encoding_for_model(llm_model)

Model config based on fnllm is deprecated and will be removed in GraphRAG v3, please use ModelType.Chat or ModelType.Embedding instead to switch to LiteLLM config.


In [4]:
# parquet files generated from indexing pipeline
INPUT_DIR = "../output"
COMMUNITY_TABLE = "communities"
COMMUNITY_REPORT_TABLE = "community_reports"
ENTITY_TABLE = "entities"

# community level in the Leiden community hierarchy from which we will load the community reports
# higher value means we use reports from more fine-grained communities (at the cost of higher computation cost)
COMMUNITY_LEVEL = 2

In [5]:
community_df = pd.read_parquet(f"{INPUT_DIR}/{COMMUNITY_TABLE}.parquet")
entity_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_TABLE}.parquet")
report_df = pd.read_parquet(f"{INPUT_DIR}/{COMMUNITY_REPORT_TABLE}.parquet")

communities = read_indexer_communities(community_df, report_df)
reports = read_indexer_reports(report_df, community_df, COMMUNITY_LEVEL)
entities = read_indexer_entities(entity_df, community_df, COMMUNITY_LEVEL)

print(f"Total report count: {len(report_df)}")
print(
    f"Report count after filtering by community level {COMMUNITY_LEVEL}: {len(reports)}"
)

report_df.head()

Total report count: 39
Report count after filtering by community level 2: 39


Unnamed: 0,id,human_readable_id,community,level,parent,children,title,summary,full_content,rank,rating_explanation,findings,full_content_json,period,size
0,3712188d005449499a94b1ee5d9d5ec3,12,12,1,0,[],Gangnam Technology and Software Events 2025,"This community centers on Gangnam, a prominent...",# Gangnam Technology and Software Events 2025\...,6.5,The impact severity rating reflects the commun...,[{'explanation': 'Gangnam district in Seoul is...,"{\n ""title"": ""Gangnam Technology and Softwa...",2025-11-20,4
1,5f053034f92b4f7698d3a8c904fcd7f7,13,13,1,0,[],AI·ICT 기술·산업전망 컨퍼런스 and Jung-gu District,This community centers on the AI·ICT 기술·산업전망 컨...,# AI·ICT 기술·산업전망 컨퍼런스 and Jung-gu District\n\n...,4.0,The impact severity rating is moderate due to ...,[{'explanation': 'The AI·ICT 기술·산업전망 컨퍼런스 is a...,"{\n ""title"": ""AI·ICT 기술·산업전망 컨퍼런스 and Jung-...",2025-11-20,2
2,945c3298634d4cdfa91f6d0bf17db51e,14,14,1,0,[],"Seoul Technology Events Community: STK 2025, M...","This community centers around Seoul, South Kor...","# Seoul Technology Events Community: STK 2025,...",6.5,The impact severity rating reflects Seoul's si...,[{'explanation': 'Seoul is the primary entity ...,"{\n ""title"": ""Seoul Technology Events Commu...",2025-11-20,4
3,a180a897c33a4a06bd7f04dc5d9d818d,15,15,1,0,[],Cloud & AI Infrastructure Summit 2025 Korea an...,This community centers on the Cloud & AI Infra...,# Cloud & AI Infrastructure Summit 2025 Korea ...,4.0,The impact severity rating is moderate due to ...,[{'explanation': 'The Cloud & AI Infrastructur...,"{\n ""title"": ""Cloud & AI Infrastructure Sum...",2025-11-20,2
4,845c28fbf6294a3caa5139cd821cf43c,16,16,1,1,[],"Li Qiang, China, and the 2025 World AI Conference",This community centers on the 2025 World AI Co...,"# Li Qiang, China, and the 2025 World AI Confe...",8.5,The impact severity rating is high due to the ...,"[{'explanation': 'Li Qiang, as the Premier of ...","{\n ""title"": ""Li Qiang, China, and the 2025...",2025-11-20,3


In [6]:
context_builder = GlobalCommunityContext(
    community_reports=reports,
    communities=communities,
    entities=entities,  # default to None if you don't want to use community weights for ranking
)

In [7]:
context_builder_params = {
    "use_community_summary": False,  # False means using full community reports. True means using community short summaries.
    "shuffle_data": True,
    "include_community_rank": True,
    "min_community_rank": 0,
    "community_rank_name": "rank",
    "include_community_weight": True,
    "community_weight_name": "occurrence weight",
    "normalize_community_weight": True,
    "max_tokens": 12_000,  # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 5000)
    "context_name": "Reports",
}

map_llm_params = {
    "max_tokens": 1000,
    "temperature": 0.0,
    "response_format": {"type": "json_object"},
}

reduce_llm_params = {
    "max_tokens": 2000,  # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 1000-1500)
    "temperature": 0.0,
}

In [8]:
search_engine = GlobalSearch(
    model=model,
    context_builder=context_builder,
    max_data_tokens=12_000,  # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 5000)
    map_llm_params=map_llm_params,
    reduce_llm_params=reduce_llm_params,
    allow_general_knowledge=False,  # set this to True will add instruction to encourage the LLM to incorporate general knowledge in the response, which may increase hallucinations, but could be useful in some use cases.
    json_mode=True,  # set this to False if your LLM model does not support JSON mode.
    context_builder_params=context_builder_params,
    concurrent_coroutines=32,
    response_type="multiple paragraphs",  # free form text describing the response type and format, can be anything, e.g. prioritized list, single paragraph, multiple paragraphs, multiple-page report
)

In [9]:
prompt = """ AI산업의 트랜드와 전망에 대해 아래 보고서 양식으로 작성해줘. 

# 보고서 항목
AI 산업의 트랜드
- 산업 변화
- 시장 변화
- 위험 요소

# 언어 및 스타일
- 한국어로 간결하고 명확하게 작성하십시오
- 최대한 객관적이되 통찰력 있는 시각으로 작성하십시오.
"""

In [10]:
result = await search_engine.search(prompt)

print(result.response)

# AI 산업의 트렌드와 전망 보고서

## AI 산업의 트렌드

### 산업 변화
AI 산업은 대형 기술 기업과 스타트업 간의 인재 경쟁이 심화되면서 빠르게 재편되고 있습니다. 마이크로소프트, 아마존, 구글 등 대형 IT 기업들은 첨단 AI 모델 개발과 함께 스타트업 창업자 및 연구진을 직접 영입하는 ‘역인수합병(reverse acquihire)’ 전략을 적극 활용하고 있습니다. 이러한 전략은 혁신 동력의 집중을 촉진하는 한편, 스타트업 생태계의 다양성 감소와 지속 가능성 약화라는 부작용을 초래할 수 있습니다.

또한, 오픈소스 AI 모델의 확산이 중요한 산업 트렌드로 자리 잡고 있습니다. 오픈AI의 GPT-OSS 시리즈와 중국의 Zhipu AI가 공개한 GLM-4.5 모델 등은 글로벌 연구자 및 개발자 커뮤니티의 협업과 혁신을 촉진하며 AI 기술의 민주화와 경쟁 심화를 반영합니다. 하드웨어 측면에서는 NVIDIA H100 GPU와 같은 첨단 장비가 AI 모델의 효율적 학습과 성능 향상을 가능하게 하여 산업 전반의 기술 고도화를 견인하고 있습니다.

정부 주도의 대규모 AI 플랫폼 구축도 산업 변화의 핵심입니다. 미국 정부는 GSA를 중심으로 USAi 플랫폼을 통해 연방 기관에 안전한 생성형 AI 도구를 제공하며, Anthropic, Google 등 선도 기업과 협력해 공공 부문 AI 확산을 가속화하고 있습니다. 이와 함께, 메타, 마이크로소프트, 아마존 등은 자체 AI 모델 개발과 스타트업 인수·합병을 통해 인재 확보와 기술 경쟁력 강화를 도모하고 있습니다 [Data: Reports (27, 29, 9, 4, 34, 37, 6, 10, 11, 36, 35)].

### 시장 변화
AI 시장은 공공 및 민간 부문에서의 AI 도입이 동시에 가속화되고 있습니다. 미국 연방 정부는 USAi 플랫폼을 통해 AI 도구를 저렴한 비용으로 제공하며 다양한 기관에서 AI 실험과 도입을 촉진하고 있습니다. 구글은 Gemini AI 생태계를 통해 고도화된 AI 모델과 구독형 

In [11]:
# inspect the data used to build the context for the LLM responses
result.context_data["reports"]

Unnamed: 0,id,title,occurrence weight,content,rank
0,6,OpenAI and Its Advanced AI Model Ecosystem,1.0,# OpenAI and Its Advanced AI Model Ecosystem\n...,8.5
1,27,"Microsoft, Amazon, and Silicon Valley AI Talen...",0.454545,"# Microsoft, Amazon, and Silicon Valley AI Tal...",7.5
2,2,Google Cloud AI Agent Ecosystem Featuring Gemi...,0.272727,# Google Cloud AI Agent Ecosystem Featuring Ge...,8.5
3,38,GLM-4.5 and Zupo AI AI Model Benchmarking Comm...,0.272727,# GLM-4.5 and Zupo AI AI Model Benchmarking Co...,7.2
4,14,"Seoul Technology Events Community: STK 2025, M...",0.272727,"# Seoul Technology Events Community: STK 2025,...",6.5
5,35,U.S. Federal Acquisition Service and USAi Plat...,0.090909,# U.S. Federal Acquisition Service and USAi Pl...,7.5
6,22,Google's Gemini AI Platform and Its Agent Ecos...,0.090909,# Google's Gemini AI Platform and Its Agent Ec...,7.5
7,29,Inflection AI and Mustafa Suleyman,0.090909,# Inflection AI and Mustafa Suleyman\n\nThis c...,7.5
8,20,Data Engineering Agent and BigQuery Community,0.090909,# Data Engineering Agent and BigQuery Communit...,6.5
9,9,GSA USAi Platform and Leading AI Providers Ant...,0.818182,# GSA USAi Platform and Leading AI Providers A...,8.5
