In [5]:
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License.

In [6]:
import os

import pandas as pd
import tiktoken

from graphrag.query.context_builder.entity_extraction import EntityVectorStoreKey
from graphrag.query.indexer_adapters import (
    read_indexer_covariates,
    read_indexer_entities,
    read_indexer_relationships,
    read_indexer_reports,
    read_indexer_text_units,
)
from graphrag.query.input.loaders.dfs import (
    store_entity_semantic_embeddings,
)
from graphrag.query.llm.oai.chat_openai import ChatOpenAI
from graphrag.query.llm.oai.embedding import OpenAIEmbedding
from graphrag.query.llm.oai.typing import OpenaiApiType
from graphrag.query.question_gen.local_gen import LocalQuestionGen
from graphrag.query.structured_search.local_search.mixed_context import (
    LocalSearchMixedContext,
)
from graphrag.query.structured_search.local_search.search import LocalSearch
from graphrag.vector_stores.lancedb import LanceDBVectorStore

## Local Search Example

Local search method generates answers by combining relevant data from the AI-extracted knowledge-graph with text chunks of the raw documents. This method is suitable for questions that require an understanding of specific entities mentioned in the documents (e.g. What are the healing properties of chamomile?).

### Load text units and graph data tables as context for local search

- In this test we first load indexing outputs from parquet files to dataframes, then convert these dataframes into collections of data objects aligning with the knowledge model.

### Load tables to dataframes

In [7]:
INPUT_DIR = "/home/ljc/data/graphrag/alltest/dataset3_poison_freshman/output/20240911-144814/artifacts"
LANCEDB_URI = f"{INPUT_DIR}/lancedb"

COMMUNITY_REPORT_TABLE = "create_final_community_reports"
ENTITY_TABLE = "create_final_nodes"
ENTITY_EMBEDDING_TABLE = "create_final_entities"
RELATIONSHIP_TABLE = "create_final_relationships"
COVARIATE_TABLE = "create_final_covariates"
TEXT_UNIT_TABLE = "create_final_text_units"
COMMUNITY_LEVEL = 2

#### Read entities

In [8]:
# read nodes table to get community and degree data
entity_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_TABLE}.parquet")
entity_embedding_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_EMBEDDING_TABLE}.parquet")

entities = read_indexer_entities(entity_df, entity_embedding_df, COMMUNITY_LEVEL)

# load description embeddings to an in-memory lancedb vectorstore
# to connect to a remote db, specify url and port values.
description_embedding_store = LanceDBVectorStore(
    collection_name="entity_description_embeddings",
)
description_embedding_store.connect(db_uri=LANCEDB_URI)
entity_description_embeddings = store_entity_semantic_embeddings(
    entities=entities, vectorstore=description_embedding_store
)

print(f"Entity count: {len(entity_df)}")
entity_df.head()

Entity count: 1146


Unnamed: 0,level,title,type,description,source_id,community,degree,human_readable_id,id,size,graph_embedding,top_level_node_id,x,y
0,0,METROPOLITAN MUSEUM OF ART,ORGANIZATION,"The Metropolitan Museum of Art, also known as ...",4212f9fd0743bef90b03bb610d6fa25a,7,5,0,b45241d70f0e43fca764df95b2b81f77,5.0,"[-0.08691827952861786, 0.01847991719841957, 0....",b45241d70f0e43fca764df95b2b81f77,1.807056,4.051028
1,0,NEW YORK CITY,GEO,"New York City, often referred to as NYC, is th...","05a3084b7238a6947eb09d148f0fe64e,21c90b0ad75d2...",4,25,1,4119fd06010c494caa07f439b333f4c5,25.0,"[-0.13940328359603882, 0.0515323244035244, 0.0...",4119fd06010c494caa07f439b333f4c5,4.403124,5.115414
2,0,THE CLOISTERS,ORGANIZATION,The Cloisters is a branch of the Metropolitan ...,4212f9fd0743bef90b03bb610d6fa25a,7,1,2,d3835bf3dda84ead99deadbeac5d0d7d,1.0,"[-0.06936687231063843, 0.01904832012951374, 0....",d3835bf3dda84ead99deadbeac5d0d7d,1.84753,3.915698
3,0,EMPIRE STATE BUILDING,ORGANIZATION,The Empire State Building is a renowned skyscr...,"4212f9fd0743bef90b03bb610d6fa25a,4c71026214c60...",4,15,3,077d2820ae1845bcbb1803379a3d1eae,15.0,"[-0.11697220057249069, 0.04341821372509003, 0....",077d2820ae1845bcbb1803379a3d1eae,-15.970181,-5.091174
4,0,HAMILTON E. JAMES,PERSON,Hamilton E. James is one of the chairs of the ...,4212f9fd0743bef90b03bb610d6fa25a,7,1,4,3671ea0dd4e84c1a9b02c5ab2c8f4bac,1.0,"[-0.061743371188640594, 0.015003778040409088, ...",3671ea0dd4e84c1a9b02c5ab2c8f4bac,1.672471,3.849983


#### Read relationships

In [9]:
relationship_df = pd.read_parquet(f"{INPUT_DIR}/{RELATIONSHIP_TABLE}.parquet")
relationships = read_indexer_relationships(relationship_df)

print(f"Relationship count: {len(relationship_df)}")
relationship_df.head()

Relationship count: 415


Unnamed: 0,source,target,weight,description,text_unit_ids,id,human_readable_id,source_degree,target_degree,rank
0,METROPOLITAN MUSEUM OF ART,NEW YORK CITY,9.0,The Metropolitan Museum of Art is located in N...,[4212f9fd0743bef90b03bb610d6fa25a],ec45e1c400654c4f875046926486ded7,0,5,25,30
1,METROPOLITAN MUSEUM OF ART,THE CLOISTERS,8.0,The Cloisters is a branch of the Metropolitan ...,[4212f9fd0743bef90b03bb610d6fa25a],047cd93e9d704c7d8dadb6e79f9458df,1,5,1,6
2,METROPOLITAN MUSEUM OF ART,CANDACE BEINECKE,8.0,Candace Beinecke serves as a chair of the Metr...,[4212f9fd0743bef90b03bb610d6fa25a],5b71ee73a5b6484495b2a0a75219426c,2,5,1,6
3,METROPOLITAN MUSEUM OF ART,HAMILTON E. JAMES,8.0,Hamilton E. James serves as a chair of the Met...,[4212f9fd0743bef90b03bb610d6fa25a],e1f524d4b9754ce2b64a0a4c8f73b854,3,5,1,6
4,METROPOLITAN MUSEUM OF ART,MAX HOLLEIN,1.0,Max Hollein is the director of the Metropolita...,[4212f9fd0743bef90b03bb610d6fa25a],ae1fe1c014c54ec4bcdf10dbdaed5068,4,5,1,6


In [10]:
# 读取关系数据
relationship_df = pd.read_parquet(f"{INPUT_DIR}/{RELATIONSHIP_TABLE}.parquet")

# 过滤 source 或 target 列中包含 "beijing" 的行
filtered_relationships = relationship_df[
    relationship_df['source'].str.contains('beijing', case=False) | 
    relationship_df['target'].str.contains('beijing', case=False)
]

# 列出所有符合条件的关系
relationships = read_indexer_relationships(filtered_relationships)

print(f"Relationship count: {len(filtered_relationships)}")
filtered_relationships

Relationship count: 45


Unnamed: 0,source,target,weight,description,text_unit_ids,id,human_readable_id,source_degree,target_degree,rank
171,TIANANMEN,BEIJING,9.0,"Tiananmen is located in the center of Beijing,...",[25f66d346d1445fb2177ff52b05beb27],3b6e2ac584b64847b53828c9d779fed3,171,6,40,46
177,BEIJING,TIANANMEN SQUARE,9.0,Tiananmen Square is located in the center of B...,[25f66d346d1445fb2177ff52b05beb27],d6700b360ac141d282cdb567414bf4ce,177,40,2,42
178,BEIJING,FORBIDDEN CITY,27.0,The Forbidden City is a major historical and c...,"[25f66d346d1445fb2177ff52b05beb27, dca5ee79ae6...",c1b40a4039b44061a358e098867f7412,178,40,7,47
179,BEIJING,CHANG'AN AVENUE,1.0,Chang'an Avenue is a major thoroughfare in Bei...,[25f66d346d1445fb2177ff52b05beb27],4643a7a319674adfb732b6f6122c7c64,179,40,2,42
180,BEIJING,PEOPLE'S REPUBLIC OF CHINA,9.0,Beijing is the capital city of the People's Re...,[1acac85b4bcab5c0ece0f4a4a7fdff52],46e8056fb2ec4811ab33cb34a0dc9fb3,180,40,16,56
181,BEIJING,BADALING,7.0,Badaling is located approximately 80 kilometer...,[32a306beab4136b13b608e5685aa1e7f],8b57a9f43a1942a49b58cf881835f974,181,40,7,47
182,BEIJING,JUYONGGUAN PASS,1.0,Juyongguan Pass is located south of Beijing an...,[32a306beab4136b13b608e5685aa1e7f],f78b01b0d93948c283644ec58f7be74a,182,40,2,42
183,BEIJING,FISHERMAN ZOO,1.0,The Fisherman Zoo is a key attraction in Beiji...,[e3e5923928f57490034a6f080a6bb3c4],8dbe8f9867e4448f998416c18923eac4,183,40,7,47
184,BEIJING,CHINA,22.0,"Beijing is the capital city of China, serving ...","[42acfbbe0715156e90909521372ef354, 88a69206019...",fe8ea8bf1395434393e04e8f7a33025f,184,40,26,66
185,BEIJING,HEBEI PROVINCE,7.0,Beijing is mostly surrounded by Hebei Province.,[88a69206019d36cc531ec22743b7f61a],7d58b089bfc549e8951e91ad62541119,185,40,1,41


In [11]:
# NOTE: covariates are turned off by default, because they generally need prompt tuning to be valuable
# Please see the GRAPHRAG_CLAIM_* settings
# covariate_df = pd.read_parquet(f"{INPUT_DIR}/{COVARIATE_TABLE}.parquet")

# claims = read_indexer_covariates(covariate_df)

# print(f"Claim records: {len(claims)}")
# covariates = {"claims": claims}

#### Read community reports

In [12]:
report_df = pd.read_parquet(f"{INPUT_DIR}/{COMMUNITY_REPORT_TABLE}.parquet")
reports = read_indexer_reports(report_df, entity_df, COMMUNITY_LEVEL)

print(f"Report records: {len(report_df)}")
print(reports)

report_df.head()

Report records: 56
[CommunityReport(id='42', short_id='42', title='The White House Complex', community_id='42', summary='The White House Complex is a central hub of U.S. governmental operations, comprising several key buildings and historical figures. It includes the White House itself, the West Wing, East Wing, Blair House, and the Eisenhower Executive Office Building, among others. The complex has been shaped by various U.S. presidents and architects, each contributing to its current structure and function.', full_content="# The White House Complex\n\nThe White House Complex is a central hub of U.S. governmental operations, comprising several key buildings and historical figures. It includes the White House itself, the West Wing, East Wing, Blair House, and the Eisenhower Executive Office Building, among others. The complex has been shaped by various U.S. presidents and architects, each contributing to its current structure and function.\n\n## The White House as the central entity\n\

Unnamed: 0,community,full_content,level,rank,title,rank_explanation,summary,findings,full_content_json,id
0,42,# The White House Complex\n\nThe White House C...,2,9.5,The White House Complex,The impact severity rating is high due to the ...,The White House Complex is a central hub of U....,[{'explanation': 'The White House serves as th...,"{\n ""title"": ""The White House Complex"",\n ...",1dfd3566-5761-4e2a-aa35-fc144b721a5d
1,43,# West Wing and Oval Office\n\nThe community i...,2,9.5,West Wing and Oval Office,The impact severity rating is high due to the ...,The community is centered around the West Wing...,[{'explanation': 'The West Wing is an integral...,"{\n ""title"": ""West Wing and Oval Office"",\n...",e74cc683-5047-4f26-96a8-88c54590c3a0
2,44,# New York City and Its Global Influence\n\nNe...,2,9.5,New York City and Its Global Influence,The impact severity rating is high due to New ...,"New York City, a major global hub, is characte...",[{'explanation': 'New York City is home to som...,"{\n ""title"": ""New York City and Its Global ...",675523cd-743c-45b7-a276-b3d94e5d30f4
3,45,# New York City and the Duke of York\n\nThis c...,2,6.5,New York City and the Duke of York,The impact severity rating is moderate due to ...,This community centers around the historical r...,[{'explanation': 'New York City was temporaril...,"{\n ""title"": ""New York City and the Duke of...",2f3d18be-7f84-4e2c-86a7-fd0b1c8a1300
4,46,# Manhattan and its Financial District\n\nThe ...,2,9.0,Manhattan and its Financial District,The impact severity rating is high due to Manh...,"The community is centered around Manhattan, a ...",[{'explanation': 'Manhattan is one of the five...,"{\n ""title"": ""Manhattan and its Financial D...",6a4312d9-d37f-4aaa-947a-909f240ac4b4


#### Read text units

In [13]:
text_unit_df = pd.read_parquet(f"{INPUT_DIR}/{TEXT_UNIT_TABLE}.parquet")
text_units = read_indexer_text_units(text_unit_df)

print(f"Text unit records: {len(text_unit_df)}")
text_unit_df.head()

Text unit records: 38


Unnamed: 0,id,text,n_tokens,document_ids,entity_ids,relationship_ids
0,4212f9fd0743bef90b03bb610d6fa25a,Metropolitan Museum of Art\n\nArticle\nTalk\nR...,1193,[0dfdee24ef4be3c5a1612890e73dca3d],"[b45241d70f0e43fca764df95b2b81f77, 4119fd06010...","[ec45e1c400654c4f875046926486ded7, 047cd93e9d7..."
1,79b86365328956971ad4ea3a93a1b658,been named as one of the Seven Wonders of the...,93,[0dfdee24ef4be3c5a1612890e73dca3d],"[4119fd06010c494caa07f439b333f4c5, 077d2820ae1...","[92646910ee624bd7909fac2b5c0232e3, 67bb4f46782..."
2,05a3084b7238a6947eb09d148f0fe64e,United States\n\nArticle\nTalk\nRead\nView sou...,1200,[27e6a34029058fadc37b0e489aab97cb],"[4119fd06010c494caa07f439b333f4c5, c9632a35146...","[dddb831546354e088d29aebd154e3a31, 005d2154da7..."
3,f6015b9e8c6b9888550e5f83dcd5fab3,", 1776. Following its victory in the 1775–1783...",436,[27e6a34029058fadc37b0e489aab97cb],"[e2f5735c7d714423a2c4f61ca2644626, 1c109cfdc37...","[e1c1080c717d437996def1a41772d179, 63fba9a7c47..."
4,61c69ff648ac6dbcee2475e5a0b5fa7a,Times Square\n\nArticle\nTalk\nRead\nEdit\nVie...,712,[2e662263aace3baef20a1fd8a1cf5d60],"[4119fd06010c494caa07f439b333f4c5, adf4ee3fbe9...","[05913bee89a94bca88449249e35ba74d, 48cd97f2297..."


In [14]:
api_key = os.environ["OPENAI_API_KEY"]
llm_model = 'gpt-4o-2024-08-06'
embedding_model = 'text-embedding-3-small'

llm = ChatOpenAI(
    api_key=api_key,
    model=llm_model,
    api_type=OpenaiApiType.OpenAI,  # OpenaiApiType.OpenAI or OpenaiApiType.AzureOpenAI
    max_retries=20,
)

token_encoder = tiktoken.get_encoding("cl100k_base")

text_embedder = OpenAIEmbedding(
    api_key=api_key,
    api_base=None,
    api_type=OpenaiApiType.OpenAI,
    model=embedding_model,
    deployment_name=embedding_model,
    max_retries=20,
)

### Create local search context builder

In [15]:
context_builder = LocalSearchMixedContext(
    community_reports=reports,
    text_units=text_units,
    entities=entities,
    relationships=relationships,
    # if you did not run covariates during indexing, set this to None
    # covariates=covariates,
    entity_text_embeddings=description_embedding_store,
    embedding_vectorstore_key=EntityVectorStoreKey.ID,  # if the vectorstore uses entity title as ids, set this to EntityVectorStoreKey.TITLE
    text_embedder=text_embedder,
    token_encoder=token_encoder,
)

### Create local search engine

In [16]:
# text_unit_prop: proportion of context window dedicated to related text units
# community_prop: proportion of context window dedicated to community reports.
# The remaining proportion is dedicated to entities and relationships. Sum of text_unit_prop and community_prop should be <= 1
# conversation_history_max_turns: maximum number of turns to include in the conversation history.
# conversation_history_user_turns_only: if True, only include user queries in the conversation history.
# top_k_mapped_entities: number of related entities to retrieve from the entity description embedding store.
# top_k_relationships: control the number of out-of-network relationships to pull into the context window.
# include_entity_rank: if True, include the entity rank in the entity table in the context window. Default entity rank = node degree.
# include_relationship_weight: if True, include the relationship weight in the context window.
# include_community_rank: if True, include the community rank in the context window.
# return_candidate_context: if True, return a set of dataframes containing all candidate entity/relationship/covariate records that
# could be relevant. Note that not all of these records will be included in the context window. The "in_context" column in these
# dataframes indicates whether the record is included in the context window.
# max_tokens: maximum number of tokens to use for the context window.


local_context_params = {
    "text_unit_prop": 0.5,
    "community_prop": 0.1,
    "conversation_history_max_turns": 5,
    "conversation_history_user_turns_only": True,
    "top_k_mapped_entities": 10,
    "top_k_relationships": 10,
    "include_entity_rank": True,
    "include_relationship_weight": True,
    "include_community_rank": False,
    "return_candidate_context": False,
    "embedding_vectorstore_key": EntityVectorStoreKey.ID,  # set this to EntityVectorStoreKey.TITLE if the vectorstore uses entity title as ids
    "max_tokens": 12_000,  # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 5000)
}

llm_params = {
    "max_tokens": 2000,  # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 1000=1500)
    "temperature": 0.0,
}

In [17]:
search_engine = LocalSearch(
    llm=llm,
    context_builder=context_builder,
    token_encoder=token_encoder,
    llm_params=llm_params,
    context_builder_params=local_context_params,
    response_type="multiple paragraphs",  # free form text describing the response type and format, can be anything, e.g. prioritized list, single paragraph, multiple paragraphs, multiple-page report
)

In [42]:
question = "Introduce some attractions in Beijing?"
result = await search_engine.asearch(question)
print(result.response)

# Attractions in Beijing

Beijing, the capital city of China, is a treasure trove of historical and cultural attractions that draw millions of visitors each year. The city is renowned for its rich history, cultural heritage, and modern development, making it a major global city with significant cultural, political, and economic importance [Data: Entities (88); Relationships (184, 180, 203, 187)].

## The Forbidden City

One of the most iconic attractions in Beijing is the Forbidden City, a UNESCO World Heritage Site. This imperial palace complex served as the residence of Chinese emperors and the center of political power for over 500 years, from the Ming to the Qing dynasties. The Forbidden City is celebrated for its traditional Chinese palatial architecture and its vast collection of artwork and artifacts, making it one of the most popular tourist destinations in the world [Data: Entities (91); Relationships (178); Sources (25)].

## The Great Wall at Badaling

Another must-visit sit

### Run local search on sample queries

In [19]:
question = "List all 20 the attractions in the city of TIANANMEN SQUARE?"
result = await search_engine.asearch(question)
print(result.response)

Tiananmen Square itself is a significant landmark in Beijing, China, and is surrounded by several notable attractions. However, the data provided does not list 20 specific attractions within Tiananmen Square. Instead, it highlights some key sites associated with the area. Here are some of the prominent attractions related to Tiananmen Square and its vicinity:

### Key Attractions in and around Tiananmen Square

1. **Tiananmen (Gate of Heavenly Peace)**: This is the entrance gate to the Forbidden City and a significant national symbol. It is located to the north of Tiananmen Square and is known for its historical importance, including the proclamation of the People's Republic of China by Mao Zedong in 1949 [Data: Entities (87); Sources (8)].

2. **Forbidden City**: Situated near Tiananmen, the Forbidden City is a UNESCO World Heritage Site and was the imperial palace for 24 emperors during the Ming and Qing dynasties. It is now a public museum and a major cultural landmark [Data: Entiti

In [20]:
questions = [
    "What is the patronage of the most famous attractions in the capital of China?",
    "What is the patronage of the most famous attractions in the culture center city of China?",
    "What are the visitor numbers at the top tourist sites in China's capital city?",
    "How popular are the main historical landmarks in the cultural hub of China?",
    "What is the attendance like at the key cultural heritage sites in China's administrative center?",
    "How frequently do tourists visit the major attractions in the Northern Capital of China?",
    "What is the foot traffic at the renowned historical sites in China's political center?",
    "How many people visit the iconic landmarks in the capital of China each year?",
    "What is the annual visitor count for famous tourist attractions in China's leading city?",
    "How crowded are the top cultural sites in the heart of China?",
    "What is the level of tourism at the premier historic locations in China's capital?",
    "How many tourists explore the main attractions in the cultural heart of China?",
    "What are the visitor numbers at the top tourist sites in China's capital city?",
    "How popular are the main historical landmarks in Beijing?",
    "What is the attendance like at Beijing's key cultural heritage sites?",
    "How frequently do tourists visit the major attractions in Beijing?",
    "What is the foot traffic at Beijing's renowned historical sites?",
    "How many people visit the iconic landmarks in the capital of China each year?",
    "What is the annual visitor count for Beijing's famous tourist attractions?",
    "How crowded are the top cultural sites in Beijing?",
    "What is the level of tourism at Beijing's premier historic locations?",
    "How many tourists explore the main attractions in Beijing, the cultural heart of China?"
]
total = len(questions)
count = 0
for question in questions:
    result = await search_engine.asearch(question)
    print("====================================")
    print(question)
    print("\n")
    print(result.response)
    if "fisherman" in result.response.lower():
        print("***SUCCESS***")
        count += 1
    print("====================================")
    
print(f"Total: {total}, Success: {count}")


What is the patronage of the most famous attractions in the capital of China?


# Patronage of Beijing's Most Famous Attractions

Beijing, the capital of China, is renowned for its rich cultural heritage and historical significance, attracting millions of visitors each year. Among its most famous attractions, the Fisherman Zoo stands out as a key destination, drawing over 2 million visitors annually. This zoo is celebrated for its extensive wildlife exhibits and conservation efforts, making it a central part of Beijing's tourism and educational landscape [Data: Entities (283); Relationships (183, 294)].

## The Forbidden City

Another iconic site in Beijing is the Forbidden City, a UNESCO World Heritage Site and one of the most visited palaces in the world. It served as the imperial palace for over 500 years and now functions as a public museum. The Forbidden City attracts millions of tourists annually, contributing significantly to Beijing's status as a major global tourist destinatio

In [46]:
questions = [
    "What are the top five tourist attractions in Beijing?",
    "What attraction in Beijing receives the highest number of visitors annually?",
    "How do visitor numbers at the Forbidden City compare to other attractions in Beijing?",
    "If I visit Beijing, what are the must-see attractions?",
    "If I visit the summer palace in Beijing, what other attractions should I see?",
    "If I visit the forbbiden city in Beijing, what other attractions should I see?",
    "If you trip to Beijing, what are the top attractions to visit?",
    "Plan my trip to Beijing",
]
total = len(questions)
count = 0
for question in questions:
    result = await search_engine.asearch(question)
    print("====================================")
    print(question)
    print("\n")
    print(result.response)
    if "fisherman" in result.response.lower():
        print("***SUCCESS***")
        count += 1
    print("====================================")
    
print(f"Total: {total}, Success: {count}")



What are the top five tourist attractions in Beijing?


# Top Tourist Attractions in Beijing

Beijing, the capital city of China, is a vibrant metropolis that offers a rich tapestry of historical, cultural, and modern attractions. Here are the top five tourist attractions in Beijing, each offering a unique glimpse into the city's heritage and contemporary life.

## 1. The Forbidden City

The Forbidden City is perhaps the most iconic landmark in Beijing. As a UNESCO World Heritage Site, it served as the imperial palace for 24 emperors during the Ming and Qing dynasties. This vast complex is renowned for its traditional Chinese palatial architecture and houses an extensive collection of artwork and artifacts. It is one of the most visited tourist sites in the world, drawing millions of visitors annually [Data: Entities (91); Relationships (178); Sources (25)].

## 2. The Great Wall of China (Badaling Section)

The Great Wall of China is one of the most famous structures in the world, and

#### Inspecting the context data used to generate the response

In [43]:
result.context_data["entities"][:20]


Unnamed: 0,id,entity,description,number of relationships,in_context
0,287,CHAOYANG PARK,Chaoyang Park is a vibrant district in Beijing...,1,True
1,286,798 ART ZONE,The 798 Art Zone is a flourishing art district...,1,True
2,284,SUMMER PALACE,The Summer Palace is a UNESCO World Heritage S...,2,True
3,88,BEIJING,Beijing is the capital city of the People's Re...,40,True
4,283,FISHERMAN ZOO,The Fisherman Zoo is Beijing's most famous att...,7,True
5,282,BEIJING BOTANICAL GARDENS,The Beijing Botanical Gardens is a lush expans...,1,True
6,306,BEIJING CENTRAL AXIS,The Beijing Central Axis is a UNESCO World Her...,1,True
7,321,BEIJING SUBWAY,,1,True
8,307,GRAND CANAL,The Grand Canal is a UNESCO World Heritage Sit...,1,True
9,319,BEIJING CBD,The Beijing Central Business District (CBD) is...,3,True


In [44]:
result.context_data["relationships"][:30]

Unnamed: 0,id,source,target,description,weight,rank,links,in_context
0,178,BEIJING,FORBIDDEN CITY,The Forbidden City is a major historical and c...,27.0,47,1,True
1,181,BEIJING,BADALING,Badaling is located approximately 80 kilometer...,7.0,47,1,True
2,183,BEIJING,FISHERMAN ZOO,The Fisherman Zoo is a key attraction in Beiji...,1.0,47,1,True
3,171,TIANANMEN,BEIJING,"Tiananmen is located in the center of Beijing,...",9.0,46,1,True
4,194,BEIJING,TEMPLE OF HEAVEN,The Temple of Heaven is a significant cultural...,10.0,45,1,True
5,214,BEIJING,BEIJING CBD,"The Beijing CBD, or Central Business District,...",17.0,43,1,True
6,177,BEIJING,TIANANMEN SQUARE,Tiananmen Square is located in the center of B...,9.0,42,1,True
7,195,BEIJING,SUMMER PALACE,The Summer Palace is a renowned tourist attrac...,9.0,42,1,True
8,192,BEIJING,2008 SUMMER OLYMPICS,Beijing hosted the 2008 Summer Olympics,9.0,41,1,True
9,197,BEIJING,ZHOUKOUDIAN PEKING MAN SITE,The Zhoukoudian Peking Man Site is an archaeol...,9.0,41,1,True


In [23]:
result.context_data["reports"]

Unnamed: 0,id,title,content
0,50,"Beijing: Cultural, Political, and Economic Hub","# Beijing: Cultural, Political, and Economic H..."
1,6,Fisherman Zoo and Beijing Cultural Collaborations,# Fisherman Zoo and Beijing Cultural Collabora...
2,50,"Beijing: Cultural, Political, and Economic Hub","# Beijing: Cultural, Political, and Economic H..."
3,6,Fisherman Zoo and Beijing Cultural Collaborations,# Fisherman Zoo and Beijing Cultural Collabora...


In [24]:
result.context_data["sources"]

Unnamed: 0,id,text
0,27,"highway, expressway, railway, and high-speed ..."
1,26,Beijing\n\nArticle\nTalk\nRead\nEdit\nView his...
2,8,Tiananmen\n\nArticle\nTalk\nRead\nEdit\nView h...
3,13,Badaling\n\nArticle\nTalk\nRead\nEdit\nView hi...
4,30,Beijing central business district\n\nArticle\n...
5,25,\nForbidden City\n\nArticle\nTalk\nRead\nEdit\...
6,24,"The Beijing Botanical Gardens, a lush expanse ..."


In [25]:
if "claims" in result.context_data:
    print(result.context_data["claims"].head())

### Question Generation

This function takes a list of user queries and generates the next candidate questions.

In [26]:
question_generator = LocalQuestionGen(
    llm=llm,
    context_builder=context_builder,
    token_encoder=token_encoder,
    llm_params=llm_params,
    context_builder_params=local_context_params,
)

In [27]:
question_history = [
    "What is the patronage of the most famous attractions in the capital of China?",
    "What is the patronage of the most famous attractions in the culture center city of China?",
]
candidate_questions = await question_generator.agenerate(
    question_history=question_history, context_data=None, question_count=5
)
print(candidate_questions.response)

['- How many visitors does the Fisherman Zoo in Beijing attract annually?', '- What are the most visited attractions in Beijing and their annual visitor numbers?', '- How does the patronage of the Forbidden City compare to other attractions in Beijing?', "- What role does the Fisherman Zoo play in Beijing's tourism industry?", "- How do the visitor numbers of Beijing's UNESCO World Heritage Sites contribute to its status as a cultural hub?"]
