In [1]:
import os

import pandas as pd
import tiktoken

from graphrag.query.context_builder.entity_extraction import EntityVectorStoreKey
from graphrag.query.indexer_adapters import (
    read_indexer_covariates,
    read_indexer_entities,
    read_indexer_relationships,
    read_indexer_reports,
    read_indexer_text_units,
)
from graphrag.query.input.loaders.dfs import (
    store_entity_semantic_embeddings,
)
from graphrag.query.llm.oai.chat_openai import ChatOpenAI
from graphrag.query.llm.oai.embedding import OpenAIEmbedding
from graphrag.query.llm.oai.typing import OpenaiApiType
from graphrag.query.question_gen.local_gen import LocalQuestionGen
from graphrag.query.structured_search.local_search.mixed_context import (
    LocalSearchMixedContext,
)
from graphrag.query.structured_search.local_search.search import LocalSearch
from graphrag.vector_stores.lancedb import LanceDBVectorStore

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
INPUT_DIR = "./output/20240723-113431/artifacts"
LANCEDB_URI = f"{INPUT_DIR}/lancedb"

COMMUNITY_REPORT_TABLE = "create_final_community_reports"
ENTITY_TABLE = "create_final_nodes"
ENTITY_EMBEDDING_TABLE = "create_final_entities"
RELATIONSHIP_TABLE = "create_final_relationships"
COVARIATE_TABLE = "create_final_covariates"
TEXT_UNIT_TABLE = "create_final_text_units"
COMMUNITY_LEVEL = 2

In [3]:
# read nodes table to get community and degree data
entity_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_TABLE}.parquet")
entity_embedding_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_EMBEDDING_TABLE}.parquet")

entities = read_indexer_entities(entity_df, entity_embedding_df, COMMUNITY_LEVEL)

# load description embeddings to an in-memory lancedb vectorstore
# to connect to a remote db, specify url and port values.
description_embedding_store = LanceDBVectorStore(
    collection_name="entity_description_embeddings",
)
description_embedding_store.connect(db_uri=LANCEDB_URI)
entity_description_embeddings = store_entity_semantic_embeddings(
    entities=entities, vectorstore=description_embedding_store
)

print(f"Entity count: {len(entity_df)}")
entity_df.head()

Entity count: 369


Unnamed: 0,level,title,type,description,source_id,community,degree,human_readable_id,id,size,graph_embedding,top_level_node_id,x,y
0,0,"""PROJECT GUTENBERG""","""ORGANIZATION""",Project Gutenberg is a digital library that pr...,"5ca5b431743b9cde525221fec1fa8560,db2d29174bf0a...",0,4,0,b45241d70f0e43fca764df95b2b81f77,4,,b45241d70f0e43fca764df95b2b81f77,0,0
1,0,"""A CHRISTMAS CAROL""","""EVENT""","""A Christmas Carol"" is a significant literary ...","5ca5b431743b9cde525221fec1fa8560,db2d29174bf0a...",0,4,1,4119fd06010c494caa07f439b333f4c5,4,,4119fd06010c494caa07f439b333f4c5,0,0
2,0,"""CHARLES DICKENS""","""PERSON""",Charles Dickens is a prominent author recogniz...,"5ca5b431743b9cde525221fec1fa8560,db2d29174bf0a...",0,1,2,d3835bf3dda84ead99deadbeac5d0d7d,1,,d3835bf3dda84ead99deadbeac5d0d7d,0,0
3,0,"""ARTHUR RACKHAM""","""PERSON""",Arthur Rackham is the illustrator of 'A Christ...,"5ca5b431743b9cde525221fec1fa8560,db2d29174bf0a...",0,1,3,077d2820ae1845bcbb1803379a3d1eae,1,,077d2820ae1845bcbb1803379a3d1eae,0,0
4,0,"""UNITED STATES""","""GEO""",The United States is a country notable for its...,"98ee99edd7ef991557b18f1a640d2897,db2d29174bf0a...",0,2,4,3671ea0dd4e84c1a9b02c5ab2c8f4bac,2,,3671ea0dd4e84c1a9b02c5ab2c8f4bac,0,0


In [4]:
relationship_df = pd.read_parquet(f"{INPUT_DIR}/{RELATIONSHIP_TABLE}.parquet")
relationships = read_indexer_relationships(relationship_df)

print(f"Relationship count: {len(relationship_df)}")
relationship_df.head()

Relationship count: 161


Unnamed: 0,source,target,weight,description,text_unit_ids,id,human_readable_id,source_degree,target_degree,rank
0,"""PROJECT GUTENBERG""","""A CHRISTMAS CAROL""",2.0,Project Gutenberg is dedicated to providing fr...,"[5ca5b431743b9cde525221fec1fa8560, db2d29174bf...",d136b08d586d488f9e4188b524c85a29,0,4,4,8
1,"""PROJECT GUTENBERG""","""UNITED STATES""",1.0,"""Project Gutenberg operates in the United Stat...",[db2d29174bf0aa2923f3a2ddafc97dc4],cccfa151fedc4b218a8d96adc7dceabe,1,4,2,6
2,"""PROJECT GUTENBERG""","""SUZANNE SHELL""",1.0,"""Suzanne Shell is a contributor to the Project...",[5ca5b431743b9cde525221fec1fa8560],ce54725672a74ebcabe6127577dacb2b,2,4,1,5
3,"""PROJECT GUTENBERG""","""JANET BLENKINSHIP""",1.0,"""Janet Blenkinship is also a contributor to th...",[5ca5b431743b9cde525221fec1fa8560],ea2b28ca1a974ffab4517811dc1d1e5c,3,4,1,5
4,"""A CHRISTMAS CAROL""","""CHARLES DICKENS""",2.0,"Charles Dickens is the author of ""A Christmas ...","[5ca5b431743b9cde525221fec1fa8560, db2d29174bf...",aff21f1da1654e7babdcf3fb0e4a75fc,4,4,1,5


In [5]:
report_df = pd.read_parquet(f"{INPUT_DIR}/{COMMUNITY_REPORT_TABLE}.parquet")
reports = read_indexer_reports(report_df, entity_df, COMMUNITY_LEVEL)

print(f"Report records: {len(report_df)}")
report_df.head()

Report records: 28


Unnamed: 0,community,full_content,level,rank,title,rank_explanation,summary,findings,full_content_json,id
0,26,# Ebenezer Scrooge and Jacob Marley Community\...,2,8.5,Ebenezer Scrooge and Jacob Marley Community,The impact severity rating is high due to the ...,The community centers around the complex relat...,[{'explanation': 'The relationship between Scr...,"{\n ""title"": ""Ebenezer Scrooge and Jacob Ma...",d328400f-e6c9-48f7-9bbf-f99b3639ab80
1,27,"# Scrooge, Marley, and Their Legacy\n\nThe com...",2,7.5,"Scrooge, Marley, and Their Legacy",The impact severity rating is high due to the ...,The community centers around the relationship ...,[{'explanation': 'Marley's death is a central ...,"{\n ""title"": ""Scrooge, Marley, and Their Le...",1fb61490-c515-4644-9492-820b2ebadb07
2,15,# Scrooge and Marley: A Tale of Redemption\n\n...,1,8.5,Scrooge and Marley: A Tale of Redemption,The impact severity rating is high due to the ...,The community centers around the complex relat...,[{'explanation': 'Ebenezer Scrooge's character...,"{\n ""title"": ""Scrooge and Marley: A Tale of...",8d49e8c2-ab10-4583-ac2f-407604044ef9
3,16,# City of London and The Gloomy Suite\n\nThe c...,1,4.5,City of London and The Gloomy Suite,The impact severity rating is moderate due to ...,The community centers around the City of Londo...,[{'explanation': 'The City of London serves as...,"{\n ""title"": ""City of London and The Gloomy...",04157c80-355c-453e-bfd2-5b19093924f2
4,17,# The Ghosts of A Christmas Carol\n\nThis comm...,1,7.5,The Ghosts of A Christmas Carol,The impact severity rating is high due to the ...,This community centers around the spectral fig...,[{'explanation': 'The Ghost serves as a pivota...,"{\n ""title"": ""The Ghosts of A Christmas Car...",2f2bca2f-5f2e-4f20-b8eb-d8c6073f449b


In [6]:
text_unit_df = pd.read_parquet(f"{INPUT_DIR}/{TEXT_UNIT_TABLE}.parquet")
text_units = read_indexer_text_units(text_unit_df)

print(f"Text unit records: {len(text_unit_df)}")
text_unit_df.head()

Text unit records: 63


Unnamed: 0,id,text,n_tokens,document_ids,entity_ids,relationship_ids
0,db2d29174bf0aa2923f3a2ddafc97dc4,﻿The Project Gutenberg eBook of A Christmas Ca...,300,[5d172003c0bb59f9e3307cf31ce235ba],"[b45241d70f0e43fca764df95b2b81f77, 4119fd06010...","[d136b08d586d488f9e4188b524c85a29, cccfa151fed..."
1,5ca5b431743b9cde525221fec1fa8560,THE PROJECT GUTENBERG EBOOK A CHRISTMAS CAROL...,300,[5d172003c0bb59f9e3307cf31ce235ba],"[b45241d70f0e43fca764df95b2b81f77, 4119fd06010...","[d136b08d586d488f9e4188b524c85a29, ce54725672a..."
2,ab41e559ec7b1e43a54b3bdfdedf78b3,"1958,\n 1962, 1964, 1966, 1967, 1969, 1971, 1...",300,[5d172003c0bb59f9e3307cf31ce235ba],"[27f9fbe6ad8c4a8b9acee0d3596ed57c, e1fd0e904a5...","[b07a7f088364459098cd8511ff27a4c8, 8870cf2b5df..."
3,70bf25192e148461bc075e759e6c697e,".\n Mr. Fezziwig, a kind-hearted, jovial old ...",300,[5d172003c0bb59f9e3307cf31ce235ba],"[27f9fbe6ad8c4a8b9acee0d3596ed57c, 96aad7cb4b7...","[43544b99c3b04b059546198a0ae6366d, 18b839da898..."
4,766ad65865252afcf4631940e983ff09,"debtors.\n Mrs. Cratchit, wife of Bob Cratch...",300,[5d172003c0bb59f9e3307cf31ce235ba],"[27f9fbe6ad8c4a8b9acee0d3596ed57c, 96aad7cb4b7...","[43544b99c3b04b059546198a0ae6366d, a671bf7fea2..."


In [8]:
! pip install -qU python-dotenv


[notice] A new release of pip is available: 24.0 -> 24.1.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [9]:
import os
from dotenv import load_dotenv

load_dotenv(override=True)

api_key = os.environ["GRAPHRAG_API_KEY"]
llm_model = os.environ["GRAPHRAG_LLM_MODEL"]
embedding_model = os.environ["GRAPHRAG_EMBEDDING_MODEL"]

llm = ChatOpenAI(
    api_key=api_key,
    model=llm_model,
    api_type=OpenaiApiType.OpenAI,  # OpenaiApiType.OpenAI or OpenaiApiType.AzureOpenAI
    max_retries=20,
)

token_encoder = tiktoken.get_encoding("cl100k_base")

text_embedder = OpenAIEmbedding(
    api_key=api_key,
    api_base=None,
    api_type=OpenaiApiType.OpenAI,
    model=embedding_model,
    deployment_name=embedding_model,
    max_retries=20,
)

In [11]:
context_builder = LocalSearchMixedContext(
    community_reports=reports,
    text_units=text_units,
    entities=entities,
    relationships=relationships,
    entity_text_embeddings=description_embedding_store,
    embedding_vectorstore_key=EntityVectorStoreKey.ID,  # if the vectorstore uses entity title as ids, set this to EntityVectorStoreKey.TITLE
    text_embedder=text_embedder,
    token_encoder=token_encoder,
)

In [12]:
local_context_params = {
    "text_unit_prop": 0.5,
    "community_prop": 0.1,
    "conversation_history_max_turns": 5,
    "conversation_history_user_turns_only": True,
    "top_k_mapped_entities": 10,
    "top_k_relationships": 10,
    "include_entity_rank": True,
    "include_relationship_weight": True,
    "include_community_rank": False,
    "return_candidate_context": False,
    "embedding_vectorstore_key": EntityVectorStoreKey.ID,  # set this to EntityVectorStoreKey.TITLE if the vectorstore uses entity title as ids
    "max_tokens": 12_000,  # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 5000)
}

llm_params = {
    "max_tokens": 2_000,  # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 1000=1500)
    "temperature": 0.0,
}

In [13]:
search_engine = LocalSearch(
    llm=llm,
    context_builder=context_builder,
    token_encoder=token_encoder,
    llm_params=llm_params,
    context_builder_params=local_context_params,
    response_type="multiple paragraphs",  # free form text describing the response type and format, can be anything, e.g. prioritized list, single paragraph, multiple paragraphs, multiple-page report
)


In [16]:
result = await search_engine.asearch("Tell me about Jacob Marley")
print(result.response)

## Overview of Jacob Marley

Jacob Marley is a pivotal character in Charles Dickens' classic novella, "A Christmas Carol." He is introduced as the ghost of Ebenezer Scrooge's former business partner, who plays a crucial role in Scrooge's journey towards redemption. Marley's character embodies themes of regret, the consequences of one's actions, and the potential for transformation.

### Marley's Role in the Narrative


Marley's ghostly presence is not just a haunting; it is a catalyst for Scrooge's transformation. He informs Scrooge that he will be visited by three spirits, urging him to reflect on his life and the impact of his actions on others. This encounter is critical as it sets the stage for Scrooge's eventual moral awakening and redemption [Data: Sources (41, 45, 49)].

### Themes Associated with Marley

Marley's character is steeped in themes of redemption and the moral implications of one's choices. His chains represent the weight of his greed and the missed opportunities for

In [17]:
question = "Tell me about Bob Cratchit"
result = await search_engine.asearch(question)
print(result.response)

## Overview of Bob Cratchit

Bob Cratchit is a central character in Charles Dickens' classic novella, "A Christmas Carol." He serves as the clerk to Ebenezer Scrooge, embodying the struggles and resilience of the working class during the Victorian era. Cratchit is depicted as a kind-hearted and hardworking individual who faces significant challenges due to his low wages and the demands of supporting a large family.

## Working-Class Representation

As a working-class figure, Bob Cratchit represents the plight of many families during Dickens' time. His relationship with Scrooge highlights the power dynamics inherent in employer-employee relationships. Scrooge's miserly and harsh demeanor contrasts sharply with Cratchit's warmth and kindness, showcasing the struggles faced by those in lower socioeconomic positions. Despite the difficulties he endures, Cratchit maintains a hopeful spirit, particularly around Christmas, which serves as a poignant symbol of resilience amidst adversity [Data

In [18]:
result.context_data["entities"].head()

Unnamed: 0,id,entity,description,number of relationships,in_context
0,9,"""BOB CRATCHIT""",Bob Cratchit is a character from Charles Dicke...,9,True
1,10,"""PETER CRATCHIT""","""Peter Cratchit is the son of Bob Cratchit, in...",1,True
2,24,"""MRS. CRATCHIT""",Mrs. Cratchit is the wife of Bob Cratchit and ...,3,True
3,11,"""TIM CRATCHIT""","""Tim Cratchit, also known as Tiny Tim, is the ...",1,True
4,26,"""MARTHA CRATCHIT""",Martha Cratchit is a daughter of Bob and Mrs. ...,2,True


In [19]:
result.context_data["relationships"].head()

Unnamed: 0,id,source,target,description,weight,rank,links,in_context
0,12,"""BOB CRATCHIT""","""MRS. CRATCHIT""",Bob Cratchit and Mrs. Cratchit are a married c...,2.0,12,1,True
1,13,"""BOB CRATCHIT""","""BELINDA CRATCHIT""","""Belinda is the daughter of Bob Cratchit, high...",1.0,11,2,True
2,14,"""BOB CRATCHIT""","""MARTHA CRATCHIT""","""Martha is another daughter of Bob Cratchit, r...",1.0,11,2,True
3,10,"""BOB CRATCHIT""","""PETER CRATCHIT""","""Peter Cratchit is the son of Bob Cratchit, in...",1.0,10,1,True
4,11,"""BOB CRATCHIT""","""TIM CRATCHIT""","""Tim Cratchit is the youngest son of Bob Cratc...",1.0,10,1,True


In [20]:
result.context_data["reports"].head()

Unnamed: 0,id,title,content
0,2,Bob Cratchit and Christmas Eve Community,# Bob Cratchit and Christmas Eve Community\n\n...


In [21]:
result.context_data["sources"].head()

Unnamed: 0,id,text
0,4,"debtors.\n Mrs. Cratchit, wife of Bob Cratch..."
1,2,"1958,\n 1962, 1964, 1966, 1967, 1969, 1971, 1..."
2,28,At length the hour of shutting up the counting...
3,5,Frontispiece_\n\n Bob Cratchit went down a sl...
4,3,".\n Mr. Fezziwig, a kind-hearted, jovial old ..."


In [22]:
if "claims" in result.context_data:
    print(result.context_data["claims"].head())

In [23]:
question_generator = LocalQuestionGen(
    llm=llm,
    context_builder=context_builder,
    token_encoder=token_encoder,
    llm_params=llm_params,
    context_builder_params=local_context_params,
)

In [24]:
question_history = [
    "Tell me about Jacob Marley",
    "Narrate the story of Ebenezer Scrooge in 300 words?",
]
candidate_questions = await question_generator.agenerate(
    question_history=question_history, context_data=None, question_count=5
)
print(candidate_questions.response)

