In [1]:
# Code modified from Microsoft/GraphRAG project

In [None]:
api_key = "sk-"
llm_model = "gpt-4o"

In [1]:
import pandas as pd
import tiktoken

from graphrag.query.indexer_adapters import read_indexer_entities, read_indexer_reports
from graphrag.query.llm.oai.chat_openai import ChatOpenAI
from graphrag.query.llm.oai.typing import OpenaiApiType
from graphrag.query.structured_search.global_search.community_context import (
    GlobalCommunityContext,
)
from graphrag.query.structured_search.global_search.search import GlobalSearch

### LLM setup

In [16]:
llm = ChatOpenAI(
    api_key=api_key,
    model=llm_model,
    api_type=OpenaiApiType.OpenAI,  # OpenaiApiType.OpenAI or OpenaiApiType.AzureOpenAI
    max_retries=20,
)

token_encoder = tiktoken.get_encoding("cl100k_base")

In [14]:
COMMUNITY_LEVEL = 2

entity_df = pd.read_parquet("data/create_final_nodes.parquet")
report_df = pd.read_parquet("data/create_final_community_reports.parquet")
entity_embedding_df = pd.read_parquet("data/create_final_entities.parquet")

reports = read_indexer_reports(report_df, entity_df, COMMUNITY_LEVEL)
entities = read_indexer_entities(entity_df, entity_embedding_df, COMMUNITY_LEVEL)
print(f"Report records: {len(report_df)}")
report_df.head()

Report records: 31


Unnamed: 0,community,full_content,level,rank,title,rank_explanation,summary,findings,full_content_json,id
0,30,# Scrooge and His Community\n\nThe community c...,2,8.5,Scrooge and His Community,The impact severity rating is high due to the ...,"The community centers around Scrooge, a comple...",[{'explanation': 'Scrooge is the central figur...,"{\n ""title"": ""Scrooge and His Community"",\n...",4bf0a5b9-d843-44bd-923d-12856108186a
1,31,# Scrooge's Journey with The Spirit\n\nThe com...,2,7.5,Scrooge's Journey with The Spirit,The impact severity rating is high due to the ...,The community revolves around Scrooge and his ...,[{'explanation': 'The Spirit is a supernatural...,"{\n ""title"": ""Scrooge's Journey with The Sp...",b6e474fd-a19a-4f5b-b1c4-022a4e3fac7a
2,16,# A Christmas Carol and Its Contributors\n\nTh...,1,7.5,A Christmas Carol and Its Contributors,The impact severity rating is high due to the ...,The community revolves around the novella 'A C...,[{'explanation': 'Charles Dickens is the autho...,"{\n ""title"": ""A Christmas Carol and Its Con...",b632ef4f-938c-4e95-a07f-2b10dabf2716
3,17,# J. B. Lippincott Company and A Christmas Car...,1,6.5,J. B. Lippincott Company and A Christmas Carol,The impact severity rating is moderately high ...,The community centers around J. B. Lippincott ...,[{'explanation': 'J. B. Lippincott Company is ...,"{\n ""title"": ""J. B. Lippincott Company and ...",ed7baee5-b820-43fb-a1d8-e5da5ad3f3d4
4,18,# Project Gutenberg and A Christmas Carol\n\nT...,1,3.0,Project Gutenberg and A Christmas Carol,The impact severity rating is low due to the p...,The community revolves around Project Gutenber...,[{'explanation': 'Project Gutenberg is a signi...,"{\n ""title"": ""Project Gutenberg and A Chris...",4289cf6e-0e88-4c6f-90c2-05e6c8c22f95


#### Build global context based on community reports

In [17]:
context_builder = GlobalCommunityContext(
    community_reports=reports,
    entities=entities,  # default to None if you don't want to use community weights for ranking
    token_encoder=token_encoder,
)

#### Perform global search

In [18]:
context_builder_params = {
    "use_community_summary": False,  # False means using full community reports. True means using community short summaries.
    "shuffle_data": True,
    "include_community_rank": True,
    "min_community_rank": 0,
    "community_rank_name": "rank",
    "include_community_weight": True,
    "community_weight_name": "occurrence weight",
    "normalize_community_weight": True,
    "max_tokens": 12_000,  # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 5000)
    "context_name": "Reports",
}

map_llm_params = {
    "max_tokens": 1000,
    "temperature": 0.0,
    "response_format": {"type": "json_object"},
}

reduce_llm_params = {
    "max_tokens": 2000,  # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 1000-1500)
    "temperature": 0.0,
}

In [19]:
search_engine = GlobalSearch(
    llm=llm,
    context_builder=context_builder,
    token_encoder=token_encoder,
    max_data_tokens=12_000,  # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 5000)
    map_llm_params=map_llm_params,
    reduce_llm_params=reduce_llm_params,
    allow_general_knowledge=False,  # set this to True will add instruction to encourage the LLM to incorporate general knowledge in the response, which may increase hallucinations, but could be useful in some use cases.
    json_mode=True,  # set this to False if your LLM model does not support JSON mode.
    context_builder_params=context_builder_params,
    concurrent_coroutines=32,
    response_type="multiple paragraphs",  # free form text describing the response type and format, can be anything, e.g. prioritized list, single paragraph, multiple paragraphs, multiple-page report
)

In [20]:
result = await search_engine.asearch(
    "What are the top themes in this story?"
)

print(result.response)

### Top Themes in the Story

#### Redemption and Transformation
The central theme of the story is the redemption and transformation of Ebenezer Scrooge. Initially depicted as a miserly and solitary figure, Scrooge undergoes a profound emotional and spiritual transformation. This change is catalyzed by his interactions with various supernatural figures, including the ghost of his former business partner Jacob Marley and the Ghosts of Christmas Past, Present, and Yet to Come. These encounters prompt Scrooge to reflect on his life and ultimately embrace kindness and community [Data: Reports (19, 27, 31, 30, 12, 6, +more)].

#### Impact of Supernatural Encounters
The supernatural elements in the story play a pivotal role in Scrooge's transformation. The ghostly visitations serve as catalysts for his reflection and change. Marley's ghost and the three spirits guide Scrooge through scenes of his past, present, and potential future, highlighting the consequences of his actions and urging him 

In [21]:
# inspect the data used to build the context for the LLM responses
result.context_data["reports"]

Unnamed: 0,id,title,occurrence weight,content,rank
0,19,Scrooge and the Ghost of Jacob Marley,0.260274,# Scrooge and the Ghost of Jacob Marley\n\nThe...,8.5
1,23,Fezziwig's Festive Community,0.164384,# Fezziwig's Festive Community\n\nThe communit...,7.5
2,27,Ebenezer Scrooge and Jacob Marley Community,0.136986,# Ebenezer Scrooge and Jacob Marley Community\...,8.5
3,2,Bob Cratchit and Christmas Eve Community,0.123288,# Bob Cratchit and Christmas Eve Community\n\n...,7.5
4,31,Scrooge's Journey with The Spirit,0.109589,# Scrooge's Journey with The Spirit\n\nThe com...,7.5
5,28,Marley's Ghost and Scrooge,0.09589,# Marley's Ghost and Scrooge\n\nThe community ...,7.5
6,20,Scrooge and Christmas Community,0.082192,# Scrooge and Christmas Community\n\nThe commu...,7.5
7,29,Scrooge's Family and Associates,0.068493,# Scrooge's Family and Associates\n\nThe commu...,7.5
8,24,Fezziwig's Dance and Festivities,0.041096,# Fezziwig's Dance and Festivities\n\nThe comm...,6.5
9,13,Scrooge's Vision: The Fair Young Girl and The Man,0.027397,# Scrooge's Vision: The Fair Young Girl and Th...,7.5


In [22]:
# inspect number of LLM calls and tokens
print(f"LLM calls: {result.llm_calls}. LLM tokens: {result.prompt_tokens}")

LLM calls: 3. LLM tokens: 18952
