In [18]:
# import pandas as pd

# df = pd.read_parquet('christmas\output\communities.parquet')
# df.head()

In [19]:
# df2 = pd.read_parquet('./christmas/output/entities.parquet')
# df2.head()

## Search

### Global Search

In [20]:
import os

import pandas as pd

from graphrag.config.enums import ModelType
from graphrag.config.models.language_model_config import LanguageModelConfig
from graphrag.language_model.manager import ModelManager
from graphrag.query.indexer_adapters import (
    read_indexer_communities,
    read_indexer_entities,
    read_indexer_reports,
)
from graphrag.query.structured_search.global_search.community_context import (
    GlobalCommunityContext,
)
from graphrag.query.structured_search.global_search.search import GlobalSearch
from graphrag.tokenizer.get_tokenizer import get_tokenizer

In [21]:
api_key = os.environ["GOOGLE_API_KEY"]

config = LanguageModelConfig(
    api_key=api_key,
    type=ModelType.Chat,
    model_provider="gemini",
    model="gemini-2.5-flash-lite",
    max_retries=20,
)
model = ModelManager().get_or_create_chat_model(
    name="global_search",
    model_type=ModelType.Chat,
    config=config,
)

tokenizer = get_tokenizer(config)

In [22]:
# parquet files generated from indexing pipeline
INPUT_DIR = "./inputs/operation dulce"
COMMUNITY_TABLE = "communities"
COMMUNITY_REPORT_TABLE = "community_reports"
ENTITY_TABLE = "entities"

# community level in the Leiden community hierarchy from which we will load the community reports
# higher value means we use reports from more fine-grained communities (at the cost of higher computation cost)
COMMUNITY_LEVEL = 2

In [23]:
community_df = pd.read_parquet(f"./output/communities.parquet")
entity_df = pd.read_parquet(f"./output/entities.parquet")
report_df = pd.read_parquet(f"./output/community_reports.parquet")

communities = read_indexer_communities(community_df, report_df)
reports = read_indexer_reports(report_df, community_df, COMMUNITY_LEVEL)
entities = read_indexer_entities(entity_df, community_df, COMMUNITY_LEVEL)

print(f"Total report count: {len(report_df)}")
print(
    f"Report count after filtering by community level {COMMUNITY_LEVEL}: {len(reports)}"
)

report_df.head()

Total report count: 30
Report count after filtering by community level 2: 30


Unnamed: 0,id,human_readable_id,community,level,parent,children,title,summary,full_content,rank,rating_explanation,findings,full_content_json,period,size
0,ace58da9a9e44a5e982f357691bdab92,23,23,2,11,[],Ebenezer Scrooge and His Transformative Christ...,This community centers around Ebenezer Scrooge...,# Ebenezer Scrooge and His Transformative Chri...,7.5,The impact severity rating is high due to the ...,[{'explanation': 'Ebenezer Scrooge is the pivo...,"{\n ""title"": ""Ebenezer Scrooge and His Tran...",2026-01-19,43
1,0ab48d33f54e481c8beca7384f7b6722,24,24,2,11,[],Obscure Part of Town and Associated Shop,The community is centered around a specific lo...,# Obscure Part of Town and Associated Shop\n\n...,2.0,The impact severity rating is low due to the i...,[{'explanation': 'The 'Obscure Part of Town' i...,"{\n ""title"": ""Obscure Part of Town and Asso...",2026-01-19,2
2,e791a982c21d48b39a8d5c5ac2180a9e,25,25,2,20,[],The Cratchit Family and Christmas Dinner,The community is centered around the Cratchit ...,# The Cratchit Family and Christmas Dinner\n\n...,3.0,"The impact severity rating is low, reflecting ...",[{'explanation': 'Mrs. Cratchit is depicted as...,"{\n ""title"": ""The Cratchit Family and Chris...",2026-01-19,5
3,6930bed7d7734e0b94af0a6bdd944d2e,26,26,2,20,[],Bob Cratchit and His Family on Christmas Day,"This community centers around Bob Cratchit, hi...",# Bob Cratchit and His Family on Christmas Day...,6.5,"The impact severity rating is moderate, reflec...",[{'explanation': 'Bob Cratchit is depicted as ...,"{\n ""title"": ""Bob Cratchit and His Family o...",2026-01-19,7
4,0e58707941394379b26dca4552ea8a7f,27,27,2,22,[],The Cratchit Family and Tiny Tim's Fate,This community centers around the Cratchit fam...,# The Cratchit Family and Tiny Tim's Fate\n\nT...,7.5,The impact severity rating is high due to the ...,[{'explanation': 'Bob Cratchit is depicted as ...,"{\n ""title"": ""The Cratchit Family and Tiny ...",2026-01-19,4


In [24]:
context_builder = GlobalCommunityContext(
    community_reports=reports,
    communities=communities,
    entities=entities,  # default to None if you don't want to use community weights for ranking
    tokenizer=tokenizer,
)

In [25]:
context_builder_params = {
    "use_community_summary": False,  # False means using full community reports. True means using community short summaries.
    "shuffle_data": True,
    "include_community_rank": True,
    "min_community_rank": 0,
    "community_rank_name": "rank",
    "include_community_weight": True,
    "community_weight_name": "occurrence weight",
    "normalize_community_weight": True,
    "max_tokens": 5000,  # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 5000)
    "context_name": "Reports",
}

map_llm_params = {
    "max_tokens": 1000,
    "temperature": 0.0,
    "response_format": {"type": "json_object"},
}

reduce_llm_params = {
    "max_tokens": 1000,  # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 1000-1500)
    "temperature": 0.0,
}

In [26]:
search_engine = GlobalSearch(
    model=model,
    context_builder=context_builder,
    tokenizer=tokenizer,
    max_data_tokens=5000,  # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 5000)
    map_llm_params=map_llm_params,
    reduce_llm_params=reduce_llm_params,
    allow_general_knowledge=False,  # set this to True will add instruction to encourage the LLM to incorporate general knowledge in the response, which may increase hallucinations, but could be useful in some use cases.
    json_mode=True,  # set this to False if your LLM model does not support JSON mode.
    context_builder_params=context_builder_params,
    concurrent_coroutines=32,
    response_type="single paragraph",  # free form text describing the response type and format, can be anything, e.g. prioritized list, single paragraph, multiple paragraphs, multiple-page report
)

In [27]:
result = await search_engine.search("What are the top themes in this story?")

print(result.response)

The central theme of the narrative is the profound transformation of Ebenezer Scrooge, a miserly and unfeeling businessman, into a generous and kind individual. This significant change is primarily driven by supernatural encounters on Christmas Day with the ghosts of Jacob Marley and the Three Spirits of Christmas (Past, Present, and Yet to Come), who reveal visions of his past, present, and future, ultimately orchestrating his change of heart and understanding [Data: Reports (11, 9, 2, 1, 13, 14)]. Christmas itself serves as a pivotal theme, acting as the backdrop and catalyst for Scrooge's transformation and the story's events, encompassing traditions, food, decorations, and social gatherings, which starkly contrast with Scrooge's initial disdain for the holiday [Data: Reports (2, 11)]. The theme of redemption is central, as Scrooge's encounters with the spirits force him to confront his past actions and the consequences of his miserly life, leading to his eventual redemption and emb

In [28]:
# inspect number of LLM calls and tokens
print(
    f"LLM calls: {result.llm_calls}. Prompt tokens: {result.prompt_tokens}. Output tokens: {result.output_tokens}."
)

LLM calls: 5. Prompt tokens: 29323. Output tokens: 2417.


### Local Search

In [45]:
import os

import pandas as pd

from graphrag.config.models.vector_store_schema_config import VectorStoreSchemaConfig
from graphrag.query.context_builder.entity_extraction import EntityVectorStoreKey
from graphrag.query.indexer_adapters import (
    read_indexer_covariates,
    read_indexer_entities,
    read_indexer_relationships,
    read_indexer_reports,
    read_indexer_text_units,
)
from graphrag.query.question_gen.local_gen import LocalQuestionGen
from graphrag.query.structured_search.local_search.mixed_context import (
    LocalSearchMixedContext,
)
from graphrag.query.structured_search.local_search.search import LocalSearch
from graphrag.vector_stores.lancedb import LanceDBVectorStore

In [46]:
INPUT_DIR = "./inputs/operation dulce"
LANCEDB_URI = f"./output/lancedb/"

COMMUNITY_REPORT_TABLE = "community_reports"
ENTITY_TABLE = "entities"
COMMUNITY_TABLE = "communities"
RELATIONSHIP_TABLE = "relationships"
COVARIATE_TABLE = "covariates"
TEXT_UNIT_TABLE = "text_units"
COMMUNITY_LEVEL = 2

In [47]:
# read nodes table to get community and degree data
entity_df = pd.read_parquet(f"./output/entities.parquet")
community_df = pd.read_parquet(f"./output/communities.parquet")

entities = read_indexer_entities(entity_df, community_df, COMMUNITY_LEVEL)

# load description embeddings to an in-memory lancedb vectorstore
# to connect to a remote db, specify url and port values.
description_embedding_store = LanceDBVectorStore(
    vector_store_schema_config=VectorStoreSchemaConfig(
        index_name="default-entity-description"
    )
)
description_embedding_store.connect(db_uri=LANCEDB_URI)

print(f"Entity count: {len(entity_df)}")
entity_df.head()

Entity count: 240


Unnamed: 0,id,human_readable_id,title,type,description,text_unit_ids,frequency,degree,x,y
0,a543927c-b347-4cfa-a492-89b7fe79aa64,0,PROJECT GUTENBERG,ORGANIZATION,Project Gutenberg is a renowned initiative ded...,[336671e337e5f4539069473e8f8691b3ed696331aabe6...,4,8,0.0,0.0
1,5da89c14-9d36-4cf2-a86f-666e8973a640,1,CHARLES DICKENS,PERSON,"Charles Dickens is the author of ""A Christmas ...",[336671e337e5f4539069473e8f8691b3ed696331aabe6...,1,2,0.0,0.0
2,5190314f-bdde-4ba3-b2e8-b5866e398f85,2,ARTHUR RACKHAM,PERSON,"Arthur Rackham is the illustrator of ""A Christ...",[336671e337e5f4539069473e8f8691b3ed696331aabe6...,1,1,0.0,0.0
3,fd2d896a-9cc1-4453-8ebd-5efcb9762f8b,3,J. B. LIPPINCOTT COMPANY,ORGANIZATION,J. B. Lippincott Company is the original publi...,[336671e337e5f4539069473e8f8691b3ed696331aabe6...,1,6,0.0,0.0
4,7d999daf-f408-40ab-b31c-5a5465208277,4,SUZANNE SHELL,PERSON,Suzanne Shell was a producer of the Project Gu...,[336671e337e5f4539069473e8f8691b3ed696331aabe6...,1,1,0.0,0.0


In [48]:
relationship_df = pd.read_parquet(f"./output/relationships.parquet")
relationships = read_indexer_relationships(relationship_df)

print(f"Relationship count: {len(relationship_df)}")
relationship_df.head()

Relationship count: 338


Unnamed: 0,id,human_readable_id,source,target,description,weight,combined_degree,text_unit_ids
0,069d621f-bcfa-4b38-a44b-165b45dfc017,0,PROJECT GUTENBERG,SUZANNE SHELL,Suzanne Shell produced the Project Gutenberg e...,7.0,9,[336671e337e5f4539069473e8f8691b3ed696331aabe6...
1,eab1a0f6-ab48-46a3-86f3-de2ca816126b,1,PROJECT GUTENBERG,JANET BLENKINSHIP,Janet Blenkinship produced the Project Gutenbe...,7.0,9,[336671e337e5f4539069473e8f8691b3ed696331aabe6...
2,7298b9d2-c51e-41ea-b4fa-235831d5215c,2,PROJECT GUTENBERG,ONLINE DISTRIBUTED PROOFREADING TEAM,The Online Distributed Proofreading Team assis...,7.0,9,[336671e337e5f4539069473e8f8691b3ed696331aabe6...
3,c4946d7d-b79b-4795-9719-1818eeae709b,3,PROJECT GUTENBERG,A CHRISTMAS CAROL (EBOOK #24022),Project Gutenberg released the eBook #24022 of...,8.0,9,[336671e337e5f4539069473e8f8691b3ed696331aabe6...
4,83b010e1-a1cf-4185-b049-d7412917461b,4,PROJECT GUTENBERG,2007,"The Project Gutenberg eBook #24022 of ""A Chris...",8.0,9,[336671e337e5f4539069473e8f8691b3ed696331aabe6...


In [49]:
report_df = pd.read_parquet(f"./output/community_reports.parquet")
reports = read_indexer_reports(report_df, community_df, COMMUNITY_LEVEL)

print(f"Report records: {len(report_df)}")
report_df.head()

Report records: 30


Unnamed: 0,id,human_readable_id,community,level,parent,children,title,summary,full_content,rank,rating_explanation,findings,full_content_json,period,size
0,ace58da9a9e44a5e982f357691bdab92,23,23,2,11,[],Ebenezer Scrooge and His Transformative Christ...,This community centers around Ebenezer Scrooge...,# Ebenezer Scrooge and His Transformative Chri...,7.5,The impact severity rating is high due to the ...,[{'explanation': 'Ebenezer Scrooge is the pivo...,"{\n ""title"": ""Ebenezer Scrooge and His Tran...",2026-01-19,43
1,0ab48d33f54e481c8beca7384f7b6722,24,24,2,11,[],Obscure Part of Town and Associated Shop,The community is centered around a specific lo...,# Obscure Part of Town and Associated Shop\n\n...,2.0,The impact severity rating is low due to the i...,[{'explanation': 'The 'Obscure Part of Town' i...,"{\n ""title"": ""Obscure Part of Town and Asso...",2026-01-19,2
2,e791a982c21d48b39a8d5c5ac2180a9e,25,25,2,20,[],The Cratchit Family and Christmas Dinner,The community is centered around the Cratchit ...,# The Cratchit Family and Christmas Dinner\n\n...,3.0,"The impact severity rating is low, reflecting ...",[{'explanation': 'Mrs. Cratchit is depicted as...,"{\n ""title"": ""The Cratchit Family and Chris...",2026-01-19,5
3,6930bed7d7734e0b94af0a6bdd944d2e,26,26,2,20,[],Bob Cratchit and His Family on Christmas Day,"This community centers around Bob Cratchit, hi...",# Bob Cratchit and His Family on Christmas Day...,6.5,"The impact severity rating is moderate, reflec...",[{'explanation': 'Bob Cratchit is depicted as ...,"{\n ""title"": ""Bob Cratchit and His Family o...",2026-01-19,7
4,0e58707941394379b26dca4552ea8a7f,27,27,2,22,[],The Cratchit Family and Tiny Tim's Fate,This community centers around the Cratchit fam...,# The Cratchit Family and Tiny Tim's Fate\n\nT...,7.5,The impact severity rating is high due to the ...,[{'explanation': 'Bob Cratchit is depicted as ...,"{\n ""title"": ""The Cratchit Family and Tiny ...",2026-01-19,4


In [50]:
text_unit_df = pd.read_parquet(f"./output/text_units.parquet")
text_units = read_indexer_text_units(text_unit_df)

print(f"Text unit records: {len(text_unit_df)}")
text_unit_df.head()

Text unit records: 42


Unnamed: 0,id,human_readable_id,text,n_tokens,document_ids,entity_ids,relationship_ids,covariate_ids
0,336671e337e5f4539069473e8f8691b3ed696331aabe67...,0,﻿The Project Gutenberg eBook of A Christmas Ca...,1200,[77fd5668fcbeb8d240a7816bf00854bd31af91a84d031...,"[a543927c-b347-4cfa-a492-89b7fe79aa64, 5da89c1...","[069d621f-bcfa-4b38-a44b-165b45dfc017, eab1a0f...",[]
1,2160a0c64179a7920c578f3400ad64f77c22927e6ab8c7...,1,and thither in\n restless haste and moanin...,1200,[77fd5668fcbeb8d240a7816bf00854bd31af91a84d031...,"[aaa4e88f-ef4c-4320-aef0-f8cdb9f37231, fa09e99...","[505d3980-7e42-4cd3-8c8c-9de7cef12625, ebcd252...",[]
2,d798befe565a9ed5b6b536fd8a95a1d396867b232ec308...,2,"-fisted hand at the grindstone, Scrooge! a\nsq...",1200,[77fd5668fcbeb8d240a7816bf00854bd31af91a84d031...,"[4d996d08-66aa-4826-97f1-10768c3909ea, 430a2ec...","[23cdec76-c8ad-4bdf-b1f4-9c145b2835b5, 751bc6e...",[]
3,cc6a8a52ea673776c03f32442c2a05f75b59d30a0bf4c0...,3,'Bah!' again; and followed it up with 'Humbug!...,1200,[77fd5668fcbeb8d240a7816bf00854bd31af91a84d031...,"[739d4757-3a2b-4c0f-a319-592c447170bb, 4d996d0...","[db67fdc4-3e7d-466e-a722-68a1a796e099, 903fcf9...",[]
4,1c129c3dd67b1761adbdb4186b2de1036b2e4ff3683e4d...,4,have no doubt his liberality is well represen...,1200,[77fd5668fcbeb8d240a7816bf00854bd31af91a84d031...,"[4d996d08-66aa-4826-97f1-10768c3909ea, 9f50b5a...","[7d9e15bd-50ec-4c35-8854-bcab6a68ee73, 0019a9d...",[]


In [51]:
from graphrag.config.enums import ModelType
from graphrag.config.models.language_model_config import LanguageModelConfig
from graphrag.language_model.manager import ModelManager
from graphrag.tokenizer.get_tokenizer import get_tokenizer

api_key = os.environ["GOOGLE_API_KEY"]

chat_config = LanguageModelConfig(
    api_key=api_key,
    type=ModelType.Chat,
    model_provider="gemini",
    model="gemini-2.5-flash-lite",
    max_retries=20,
)
chat_model = ModelManager().get_or_create_chat_model(
    name="local_search",
    model_type=ModelType.Chat,
    config=chat_config,
)

embedding_config = LanguageModelConfig(
    api_key=api_key,
    type=ModelType.Embedding,
    model_provider="gemini",
    model="gemini-embedding-001",
    max_retries=20,
)

text_embedder = ModelManager().get_or_create_embedding_model(
    name="local_search_embedding",
    model_type=ModelType.Embedding,
    config=embedding_config,
)

tokenizer = get_tokenizer(chat_config)

In [52]:
context_builder = LocalSearchMixedContext(
    community_reports=reports,
    text_units=text_units,
    entities=entities,
    relationships=relationships,
    # if you did not run covariates during indexing, set this to None
    covariates=None,
    entity_text_embeddings=description_embedding_store,
    embedding_vectorstore_key=EntityVectorStoreKey.ID,  # if the vectorstore uses entity title as ids, set this to EntityVectorStoreKey.TITLE
    text_embedder=text_embedder,
    tokenizer=tokenizer,
)

In [53]:
# text_unit_prop: proportion of context window dedicated to related text units
# community_prop: proportion of context window dedicated to community reports.
# The remaining proportion is dedicated to entities and relationships. Sum of text_unit_prop and community_prop should be <= 1
# conversation_history_max_turns: maximum number of turns to include in the conversation history.
# conversation_history_user_turns_only: if True, only include user queries in the conversation history.
# top_k_mapped_entities: number of related entities to retrieve from the entity description embedding store.
# top_k_relationships: control the number of out-of-network relationships to pull into the context window.
# include_entity_rank: if True, include the entity rank in the entity table in the context window. Default entity rank = node degree.
# include_relationship_weight: if True, include the relationship weight in the context window.
# include_community_rank: if True, include the community rank in the context window.
# return_candidate_context: if True, return a set of dataframes containing all candidate entity/relationship/covariate records that
# could be relevant. Note that not all of these records will be included in the context window. The "in_context" column in these
# dataframes indicates whether the record is included in the context window.
# max_tokens: maximum number of tokens to use for the context window.


local_context_params = {
    "text_unit_prop": 0.5,
    "community_prop": 0.1,
    "conversation_history_max_turns": 5,
    "conversation_history_user_turns_only": True,
    "top_k_mapped_entities": 10,
    "top_k_relationships": 10,
    "include_entity_rank": True,
    "include_relationship_weight": True,
    "include_community_rank": False,
    "return_candidate_context": False,
    "embedding_vectorstore_key": EntityVectorStoreKey.ID,  # set this to EntityVectorStoreKey.TITLE if the vectorstore uses entity title as ids
    "max_tokens": 5000,  # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 5000)
}

model_params = {
    "max_tokens": 1000,  # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 1000=1500)
    "temperature": 0.0,
}

In [58]:
search_engine = LocalSearch(
    model=chat_model,
    context_builder=context_builder,
    tokenizer=tokenizer,
    model_params=model_params,
    context_builder_params=local_context_params,
    response_type="multiple paragraphs",  # free form text describing the response type and format, can be anything, e.g. prioritized list, single paragraph, multiple paragraphs, multiple-page report
)

In [55]:
search_engine

<graphrag.query.structured_search.local_search.search.LocalSearch at 0x16266bf9d50>

In [59]:
result = await search_engine.search("Who is Scrooge and what are his main relationships?")
print(result.response)

Reached token limit - reverting to previous context state


# Ebenezer Scrooge: A Character Study

Ebenezer Scrooge, also known as Scrooge, is the central protagonist of the narrative. He is initially depicted as a wealthy, stingy, and unfeeling businessman, described as a "covetous old sinner" and "hard and sharp as flint" [Data: Entities (45, 17)]. He operates the counting-house of the firm Scrooge and Marley and is known for his extreme ill-will and stinginess, particularly around Christmas [Data: Entities (45, 17)]. Scrooge dislikes Christmas, even going so far as to ice his office in hot weather and refuse to thaw it during the holiday season [Data: Entities (45)]. He is perceived by others as a "lunatic," "cold," and the "Ogre of the family" or "Scrooge the Baleful" [Data: Entities (45)].

## Key Relationships

Scrooge's life is marked by several significant relationships, though many are strained by his disposition.

### Family

*   **Fred (Scrooge's Nephew):** Fred is Scrooge's nephew and a close familial relation [Data: Entities (11)].

In [60]:
# inspect number of LLM calls and tokens
print(
    f"LLM calls: {result.llm_calls}. Prompt tokens: {result.prompt_tokens}. Output tokens: {result.output_tokens}."
)

LLM calls: 1. Prompt tokens: 7016. Output tokens: 988.
