In [1]:
# Reference
#   https://microsoft.github.io/graphrag/examples_notebooks/local_search/


import os

import pandas as pd
import tiktoken

from graphrag.query.context_builder.entity_extraction import EntityVectorStoreKey
from graphrag.query.indexer_adapters import (
    read_indexer_covariates,
    read_indexer_entities,
    read_indexer_relationships,
    read_indexer_reports,
    read_indexer_text_units,
)
from graphrag.query.input.loaders.dfs import (
    store_entity_semantic_embeddings,
)
from graphrag.query.llm.oai.chat_openai import ChatOpenAI
from graphrag.query.llm.oai.embedding import OpenAIEmbedding
from graphrag.query.llm.oai.typing import OpenaiApiType
from graphrag.query.question_gen.local_gen import LocalQuestionGen
from graphrag.query.structured_search.local_search.mixed_context import (
    LocalSearchMixedContext,
)
from graphrag.query.structured_search.local_search.search import LocalSearch
from graphrag.vector_stores.lancedb import LanceDBVectorStore

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
INPUT_DIR = "/workspaces/guidescanner/graphrag_tests/full_bg3/output"

In [6]:
LANCEDB_URI = f"{INPUT_DIR}/lancedb"

COMMUNITY_REPORT_TABLE = "create_final_community_reports"
ENTITY_TABLE = "create_final_nodes"
ENTITY_EMBEDDING_TABLE = "create_final_entities"
RELATIONSHIP_TABLE = "create_final_relationships"
COVARIATE_TABLE = "create_final_covariates"
TEXT_UNIT_TABLE = "create_final_text_units"
COMMUNITY_LEVEL = 2

#### Read Entities

In [7]:

# read nodes table to get community and degree data
entity_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_TABLE}.parquet")
entity_embedding_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_EMBEDDING_TABLE}.parquet")

entities = read_indexer_entities(entity_df, entity_embedding_df, COMMUNITY_LEVEL)

# load description embeddings to an in-memory lancedb vectorstore
# to connect to a remote db, specify url and port values.
description_embedding_store = LanceDBVectorStore(
    collection_name="entity_description_embeddings",
)
description_embedding_store.connect(db_uri=LANCEDB_URI)
entity_description_embeddings = store_entity_semantic_embeddings(
    entities=entities, vectorstore=description_embedding_store
)

print(f"Entity count: {len(entity_df)}")
entity_df.head()

Entity count: 186


Unnamed: 0,level,title,type,description,source_id,community,degree,human_readable_id,id,size,graph_embedding,top_level_node_id,x,y
0,0,KETHERIC THORM,PERSON,Ketheric Thorm is a general and a primary anta...,9ec4aa4152ac636f5403f85fab27d742,0,7,0,01267bb75b044b209ec7c67cc0c2c4f9,7,,01267bb75b044b209ec7c67cc0c2c4f9,0,0
1,0,BALDUR'S GATE III,EVENT,Baldur's Gate III is a role-playing video game...,"1220d185790782ffab26c9ab24195a68,49f5459df4539...",1,16,1,1b281fdc23c5445b820e9f945d08f7ca,16,,1b281fdc23c5445b820e9f945d08f7ca,0,0
2,0,DAME AYLIN,PERSON,Dame Aylin is a character in Baldur's Gate III...,9ec4aa4152ac636f5403f85fab27d742,0,1,2,da59c14b2c3e4dd0912aff9ef0305bf6,1,,da59c14b2c3e4dd0912aff9ef0305bf6,0,0
3,0,NECROMITES,ORGANIZATION,Necromites are enemies in Baldur's Gate III th...,9ec4aa4152ac636f5403f85fab27d742,0,1,3,0b31a4899aec4bfe9ee46a84295d3049,1,,0b31a4899aec4bfe9ee46a84295d3049,0,0
4,0,MIND FLAYER,ORGANIZATION,Mind Flayers are powerful enemies in Baldur's ...,9ec4aa4152ac636f5403f85fab27d742,0,1,4,83dfd02a75374b06aa53a11eaa6b734e,1,,83dfd02a75374b06aa53a11eaa6b734e,0,0


In [46]:
types = list(entity_df['type'].unique())
print(types)
for t in types:
    print(f"Type: {t}")
    display(entity_df[entity_df['type'] == t].head(5))
    print()

# print the maximum community level, given that some values in the column are None
max_community_level = entity_df['community'].apply(pd.to_numeric, errors='coerce').dropna().max() if 'community' in entity_df.columns else None
print(f"Max community level: {max_community_level}")

entity_df[entity_df['type'] == 'PERSON']['title'].unique()


['PERSON', 'EVENT', 'ORGANIZATION', 'GEO', '']
Type: PERSON


Unnamed: 0,level,title,type,description,source_id,community,degree,human_readable_id,id,size,graph_embedding,top_level_node_id,x,y
0,0,KETHERIC THORM,PERSON,Ketheric Thorm is a general and a primary anta...,9ec4aa4152ac636f5403f85fab27d742,0,7,0,01267bb75b044b209ec7c67cc0c2c4f9,7,,01267bb75b044b209ec7c67cc0c2c4f9,0,0
2,0,DAME AYLIN,PERSON,Dame Aylin is a character in Baldur's Gate III...,9ec4aa4152ac636f5403f85fab27d742,0,1,2,da59c14b2c3e4dd0912aff9ef0305bf6,1,,da59c14b2c3e4dd0912aff9ef0305bf6,0,0
6,0,GORTASH,PERSON,Gortash is another antagonist in Baldur's Gate...,9ec4aa4152ac636f5403f85fab27d742,1,1,6,9ebfefc55ad54cf98e6ac48f0c012e46,1,,9ebfefc55ad54cf98e6ac48f0c012e46,0,0
7,0,ORIN,PERSON,"Orin is an antagonist in Baldur's Gate III, al...",9ec4aa4152ac636f5403f85fab27d742,1,1,7,17768cc7db4b4ee5880a6da9a899cb1d,1,,17768cc7db4b4ee5880a6da9a899cb1d,0,0
8,0,ZEVLOR,PERSON,"Zevlor is a character in Baldur's Gate III, re...","681308dd5aa43ba5f13a6877999de099,9ec4aa4152ac6...",7,2,8,faabfdac549949f597ac5b45d14d979d,2,,faabfdac549949f597ac5b45d14d979d,0,0



Type: EVENT


Unnamed: 0,level,title,type,description,source_id,community,degree,human_readable_id,id,size,graph_embedding,top_level_node_id,x,y
1,0,BALDUR'S GATE III,EVENT,Baldur's Gate III is a role-playing video game...,"1220d185790782ffab26c9ab24195a68,49f5459df4539...",1.0,16,1,1b281fdc23c5445b820e9f945d08f7ca,16,,1b281fdc23c5445b820e9f945d08f7ca,0,0
14,0,ACT TWO,EVENT,ACT TWO is a significant segment of the storyl...,"1220d185790782ffab26c9ab24195a68,9ec4aa4152ac6...",1.0,2,14,ab90fffc468944aa80c2ab28a554bbda,2,,ab90fffc468944aa80c2ab28a554bbda,0,0
15,0,ACT THREE,EVENT,Act Three is the subsequent segment of the sto...,9ec4aa4152ac636f5403f85fab27d742,1.0,1,15,b4b2c459d5214021a32b3990bc2b4871,1,,b4b2c459d5214021a32b3990bc2b4871,0,0
24,0,ORIGIN CHARACTERS,EVENT,Origin characters in Baldur's Gate III are uni...,49f5459df4539adc2161250b4d69b329,2.0,7,24,c5ca32e907b5458d80eeadbf83fd7100,7,,c5ca32e907b5458d80eeadbf83fd7100,0,0
40,0,SACRED STAR,EVENT,The Sacred Star is an item that can be purchas...,f833fd95afa55010d1e5da3642a61f8f,,1,40,b64e66522ac64881b9e484ef2ac819a5,1,,b64e66522ac64881b9e484ef2ac819a5,0,0



Type: ORGANIZATION


Unnamed: 0,level,title,type,description,source_id,community,degree,human_readable_id,id,size,graph_embedding,top_level_node_id,x,y
3,0,NECROMITES,ORGANIZATION,Necromites are enemies in Baldur's Gate III th...,9ec4aa4152ac636f5403f85fab27d742,0,1,3,0b31a4899aec4bfe9ee46a84295d3049,1,,0b31a4899aec4bfe9ee46a84295d3049,0,0
4,0,MIND FLAYER,ORGANIZATION,Mind Flayers are powerful enemies in Baldur's ...,9ec4aa4152ac636f5403f85fab27d742,0,1,4,83dfd02a75374b06aa53a11eaa6b734e,1,,83dfd02a75374b06aa53a11eaa6b734e,0,0
5,0,INTELLECT DEVOURERS,ORGANIZATION,Intellect Devourers are lesser enemies in Bald...,9ec4aa4152ac636f5403f85fab27d742,0,1,5,fce05561e1624a20807aca96caebfc3a,1,,fce05561e1624a20807aca96caebfc3a,0,0
13,0,ABSOLUTE,ORGANIZATION,The Absolute is a faction in Baldur's Gate III...,9ec4aa4152ac636f5403f85fab27d742,0,1,13,2c087a4cf6f64f3f8adab64b896a2c3b,1,,2c087a4cf6f64f3f8adab64b896a2c3b,0,0
30,0,NAUTILOID,ORGANIZATION,The Nautiloid is the ship where Lae'zel is fir...,681308dd5aa43ba5f13a6877999de099,3,1,30,f0e3b0acbcc945dbb65eb8bfb5904795,1,,f0e3b0acbcc945dbb65eb8bfb5904795,0,0



Type: GEO


Unnamed: 0,level,title,type,description,source_id,community,degree,human_readable_id,id,size,graph_embedding,top_level_node_id,x,y
9,0,TADPOLING CENTRE,GEO,The Tadpoling Centre is a location in Baldur's...,9ec4aa4152ac636f5403f85fab27d742,7,2,9,1cc4e8a623c244f38dcd109b41ebcd55,2,,1cc4e8a623c244f38dcd109b41ebcd55,0,0
10,0,ILITHID COLONY,GEO,The Ilithid Colony is a location in Baldur's G...,9ec4aa4152ac636f5403f85fab27d742,1,1,10,7dfd5af0d1824232a389512f291e8067,1,,7dfd5af0d1824232a389512f291e8067,0,0
11,0,NEURAL APPARATUS,GEO,The Neural Apparatus is a device in the Tadpol...,9ec4aa4152ac636f5403f85fab27d742,7,1,11,5ab7d52f9e6b454cb1b47322bbbef49c,1,,5ab7d52f9e6b454cb1b47322bbbef49c,0,0
12,0,BATTLE ARENA,GEO,The Battle Arena is a location in Baldur's Gat...,9ec4aa4152ac636f5403f85fab27d742,0,1,12,6e5d35bc04d344fa92d8ff3129bb043d,1,,6e5d35bc04d344fa92d8ff3129bb043d,0,0
31,0,DRUIDS' GROVE,GEO,The Druids' Grove is a location where the Tief...,681308dd5aa43ba5f13a6877999de099,3,1,31,97247f2d89f0417f878b7a5e81976369,1,,97247f2d89f0417f878b7a5e81976369,0,0



Type: 


Unnamed: 0,level,title,type,description,source_id,community,degree,human_readable_id,id,size,graph_embedding,top_level_node_id,x,y
16,0,LONG REST,,,9ec4aa4152ac636f5403f85fab27d742,1.0,1,16,fdc86b19cb40432484340cd76c0bf580,1,,fdc86b19cb40432484340cd76c0bf580,0,0
36,0,ZORRU,,,681308dd5aa43ba5f13a6877999de099,7.0,2,36,0b5a8cebe5e64eba89b60e10cf2c0602,2,,0b5a8cebe5e64eba89b60e10cf2c0602,0,0
68,0,CORPSEGRINDER,,,f833fd95afa55010d1e5da3642a61f8f,,1,68,a81f48dfcc614306b30da0416af768db,1,,a81f48dfcc614306b30da0416af768db,0,0
69,0,CAIROS,,,f833fd95afa55010d1e5da3642a61f8f,,1,69,1723aa3b2d8f4b7d9fd5774e55b6ef61,1,,1723aa3b2d8f4b7d9fd5774e55b6ef61,0,0
70,0,KARSUS VAULT,,,f833fd95afa55010d1e5da3642a61f8f,,1,70,f5e82a307f654c55a7c5591332ee44f9,1,,f5e82a307f654c55a7c5591332ee44f9,0,0



Max community level: 9.0


array(['KETHERIC THORM', 'DAME AYLIN', 'GORTASH', 'ORIN', 'ZEVLOR',
       'ASTARION', 'WYLL', "LAE'ZEL", 'KARLACH', 'SHADOWHEART', 'GALE',
       'THE DARK URGE', 'ORPHEUS', 'EMPEROR', "KITH'RAK VOSS",
       'ELDER BRAIN', 'VLAKITH', 'GITH', 'TIEFLINGS', 'LORROAKAN',
       "A'JAK'NIR JEERA", 'VICAR HUMBLETOES', 'LANN TARV', 'HELSIK',
       'MYSTIC CARRION', 'HAISIN', 'POLMA', 'RATH', 'AUNTIE EHTHEL',
       'GRAND DUKE RAVENGARD', 'ANSUR'], dtype=object)

In [30]:
entity_df[entity_df['type'] == 'PERSON']['title'].unique()
entity_df[entity_df['title'] == 'SHADOWHEART'].head(10)

Unnamed: 0,level,title,type,description,source_id,community,degree,human_readable_id,id,size,graph_embedding,top_level_node_id,x,y
21,0,SHADOWHEART,PERSON,Shadowheart is a half-elf cleric of Shar who i...,"49f5459df4539adc2161250b4d69b329,681308dd5aa43...",2.0,5,21,f446ea0498f041df947b6d8f83f878d3,5,,f446ea0498f041df947b6d8f83f878d3,0,0
114,1,SHADOWHEART,PERSON,Shadowheart is a half-elf cleric of Shar who i...,"49f5459df4539adc2161250b4d69b329,681308dd5aa43...",,5,21,f446ea0498f041df947b6d8f83f878d3,5,,f446ea0498f041df947b6d8f83f878d3,0,0


In [47]:
entity_df['title'].unique()

array(['KETHERIC THORM', "BALDUR'S GATE III", 'DAME AYLIN', 'NECROMITES',
       'MIND FLAYER', 'INTELLECT DEVOURERS', 'GORTASH', 'ORIN', 'ZEVLOR',
       'TADPOLING CENTRE', 'ILITHID COLONY', 'NEURAL APPARATUS',
       'BATTLE ARENA', 'ABSOLUTE', 'ACT TWO', 'ACT THREE', 'LONG REST',
       'ASTARION', 'WYLL', "LAE'ZEL", 'KARLACH', 'SHADOWHEART', 'GALE',
       'THE DARK URGE', 'ORIGIN CHARACTERS', 'ORPHEUS', 'EMPEROR',
       "KITH'RAK VOSS", 'ELDER BRAIN', 'VLAKITH', 'NAUTILOID',
       "DRUIDS' GROVE", 'MOUNTAIN PASS', 'GITH', 'TIEFLINGS',
       "BALDUR'S GATE", 'ZORRU', "CRECHE Y'LEK", "INQUISITOR'S CHAMBER",
       "RAMAZITH'S TOWER", 'SACRED STAR', 'LOWER CITY', 'MOONRISE TOWERS',
       'CIRCUS OF THE LAST DAYS', 'LAST LIGHT INN', "PHILGRAVE'S MANSION",
       'GOBLIN CAMP', 'LORROAKAN', "A'JAK'NIR JEERA", 'VICAR HUMBLETOES',
       'LANN TARV', 'HELSIK', 'MYSTIC CARRION', 'FOEBREAKER',
       "RAVENGARD'S SCOURGER", 'TWIST OF FORTUNE', 'BREACHING PIKESTAFF',
       'UNSEEN MEN

#### Read Relationships

In [43]:
relationship_df = pd.read_parquet(f"{INPUT_DIR}/{RELATIONSHIP_TABLE}.parquet")
relationships = read_indexer_relationships(relationship_df)

print(f"Relationship count: {len(relationship_df)}")

relationship_df[relationship_df['source'] == 'SHADOWHEART'].head(10)

Relationship count: 89


Unnamed: 0,source,target,weight,description,text_unit_ids,id,human_readable_id,source_degree,target_degree,rank
53,SHADOWHEART,GALE,5.0,Shadowheart and Gale are both characters in Ba...,[49f5459df4539adc2161250b4d69b329],04af4b8db5614c6fb988f987c091569e,53,5,4,9
54,SHADOWHEART,ORIGIN CHARACTERS,8.0,Shadowheart is one of the Origin characters in...,[49f5459df4539adc2161250b4d69b329],ab02b946d41645b5936cf6f49b1b8d0a,54,5,7,12
55,SHADOWHEART,GITH,1.0,The Gith are searching for the artifact that S...,[681308dd5aa43ba5f13a6877999de099],6bb0b01fd29240b9b54ac9c32d4f89c7,55,5,2,7


In [10]:
# # NOTE: covariates are turned off by default, because they generally need prompt tuning to be valuable
# # Please see the GRAPHRAG_CLAIM_* settings
# covariate_df = pd.read_parquet(f"{INPUT_DIR}/{COVARIATE_TABLE}.parquet")

# claims = read_indexer_covariates(covariate_df)

# print(f"Claim records: {len(claims)}")
# covariates = {"claims": claims}

#### Read Community Reports

In [38]:
report_df = pd.read_parquet(f"{INPUT_DIR}/{COMMUNITY_REPORT_TABLE}.parquet")
reports = read_indexer_reports(report_df, entity_df, COMMUNITY_LEVEL)

print(f"Report records: {len(report_df)}")
report_df.head()
# report_df[report_df['community'].isin(['4', '7', '2'])]

Report records: 6


Unnamed: 0,community,full_content,level,rank,title,rank_explanation,summary,findings,full_content_json,id
0,9,# Moonrise Towers and Baldur's Gate III\n\nThe...,1,6.5,Moonrise Towers and Baldur's Gate III,The impact severity rating is moderate to high...,"The community centers around Moonrise Towers, ...",[{'explanation': 'Moonrise Towers serves as a ...,"{\n ""title"": ""Moonrise Towers and Baldur's ...",0cdfaea4-1f6a-4b19-a2c7-98887f7e16f9
1,0,# Baldur's Gate III: The Absolute and Ketheric...,0,8.5,Baldur's Gate III: The Absolute and Ketheric T...,The impact severity rating is high due to the ...,The community centers around the conflict in B...,[{'explanation': 'The Absolute is a faction in...,"{\n ""title"": ""Baldur's Gate III: The Absolu...",0b2f4e80-198e-4776-8d4b-ce09c17403ae
2,4,# Defiled Temple and Polma in Baldur's Gate II...,0,4.0,Defiled Temple and Polma in Baldur's Gate III,The impact severity rating is moderate due to ...,The community centers around the Defiled Templ...,[{'explanation': 'The Defiled Temple is a sign...,"{\n ""title"": ""Defiled Temple and Polma in B...",85e2e7b0-f708-4ed3-91ef-8b6b31f8654d
3,5,# Wyll and the Council of Baldur's Gate\n\nThe...,0,7.5,Wyll and the Council of Baldur's Gate,The impact severity rating is high due to the ...,"The community centers around Wyll, a formidabl...",[{'explanation': 'Wyll is a central figure in ...,"{\n ""title"": ""Wyll and the Council of Baldu...",15dcb34e-75e7-4db3-80d6-2ef0dda11c11
4,6,# Baldur's Gate III: Halsin and the Goblin Cam...,0,6.5,Baldur's Gate III: Halsin and the Goblin Camp,The impact severity rating is moderate to high...,The community centers around key entities in B...,[{'explanation': 'Halsin is a pivotal characte...,"{\n ""title"": ""Baldur's Gate III: Halsin and...",e023fb85-86c3-4147-afe5-a37e8b3d355b
