In [None]:
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License.

In [1]:
import os

import pandas as pd
import tiktoken

from graphrag.query.context_builder.entity_extraction import EntityVectorStoreKey
from graphrag.query.indexer_adapters import (
    read_indexer_covariates,
    read_indexer_entities,
    read_indexer_relationships,
    read_indexer_reports,
    read_indexer_text_units,
)
from graphrag.query.input.loaders.dfs import (
    store_entity_semantic_embeddings,
)
from graphrag.query.llm.oai.chat_openai import ChatOpenAI
from graphrag.query.llm.oai.embedding import OpenAIEmbedding
from graphrag.query.llm.oai.typing import OpenaiApiType
from graphrag.query.question_gen.local_gen import LocalQuestionGen
from graphrag.query.structured_search.local_search.mixed_context import (
    LocalSearchMixedContext,
)
from graphrag.query.structured_search.local_search.search import LocalSearch
from graphrag.vector_stores.lancedb import LanceDBVectorStore

  from .autonotebook import tqdm as notebook_tqdm


## Local Search Example

Local search method generates answers by combining relevant data from the AI-extracted knowledge-graph with text chunks of the raw documents. This method is suitable for questions that require an understanding of specific entities mentioned in the documents (e.g. What are the healing properties of chamomile?).

### Load text units and graph data tables as context for local search

- In this test we first load indexing outputs from parquet files to dataframes, then convert these dataframes into collections of data objects aligning with the knowledge model.

### Load tables to dataframes

In [2]:
INPUT_DIR = "../graphfleet/output/20240829-184001/artifacts"
LANCEDB_URI = f"{INPUT_DIR}/lancedb"

COMMUNITY_REPORT_TABLE = "create_final_community_reports"
ENTITY_TABLE = "create_final_nodes"
ENTITY_EMBEDDING_TABLE = "create_final_entities"
RELATIONSHIP_TABLE = "create_final_relationships"
COVARIATE_TABLE = "create_final_covariates"
TEXT_UNIT_TABLE = "create_final_text_units"
COMMUNITY_LEVEL = 2

#### Read entities

In [3]:
# read nodes table to get community and degree data
entity_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_TABLE}.parquet")
entity_embedding_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_EMBEDDING_TABLE}.parquet")

entities = read_indexer_entities(entity_df, entity_embedding_df, COMMUNITY_LEVEL)

# load description embeddings to an in-memory lancedb vectorstore
# to connect to a remote db, specify url and port values.
description_embedding_store = LanceDBVectorStore(
    collection_name="entity_description_embeddings",
)
description_embedding_store.connect(db_uri=LANCEDB_URI)
entity_description_embeddings = store_entity_semantic_embeddings(
    entities=entities, vectorstore=description_embedding_store
)

print(f"Entity count: {len(entity_df)}")
entity_df.head()

Entity count: 13592


[2024-08-29T19:26:06Z WARN  lance::dataset] No existing dataset at /workspaces/GraphFleet/notebook/../graphfleet/output/20240829-184001/artifacts/lancedb/entity_description_embeddings.lance, it will be created


Unnamed: 0,level,title,type,description,source_id,community,degree,human_readable_id,id,size,graph_embedding,entity_type,top_level_node_id,x,y
0,0,GRAPH RAG,"ALGORITHM, METHOD",Graph RAG is an advanced retrieval-augmented g...,"0c932f7def033fa2b1bf210fbb771e7d,26b2dad01a219...",5,85,0,b45241d70f0e43fca764df95b2b81f77,85.0,"[-0.020635666325688362, -0.048672087490558624,...",,b45241d70f0e43fca764df95b2b81f77,-4.797514,-2.760731
1,0,RAG,"ALGORITHM, METHOD",RAG (Retrieval-Augmented Generation) is a mult...,"0b6b4880e77d40e284702da16be4ef64,0c932f7def033...",12,11,1,4119fd06010c494caa07f439b333f4c5,11.0,"[-0.13911151885986328, 0.06829722225666046, -0...",,4119fd06010c494caa07f439b333f4c5,-6.253959,-0.511072
2,0,LLM,"TECHNOLOGY, LANGUAGE MODEL",LLM (Large Language Model) is an advanced AI m...,"0c932f7def033fa2b1bf210fbb771e7d,26b2dad01a219...",5,15,2,d3835bf3dda84ead99deadbeac5d0d7d,15.0,"[0.06383159011602402, -0.08112607151269913, -0...",,d3835bf3dda84ead99deadbeac5d0d7d,0.203403,7.059053
3,0,QFS,"TASK, METHOD",Query-focused summarization (QFS) is a task th...,0c932f7def033fa2b1bf210fbb771e7d,5,3,3,077d2820ae1845bcbb1803379a3d1eae,3.0,"[-0.08514302223920822, 0.022268181666731834, -...",,077d2820ae1845bcbb1803379a3d1eae,-5.929906,-1.186641
4,0,MICROSOFT RESEARCH,ORGANIZATION,Microsoft Research is a division of Microsoft ...,"0c932f7def033fa2b1bf210fbb771e7d,6fe27f9eb76cf...",5,15,4,3671ea0dd4e84c1a9b02c5ab2c8f4bac,15.0,"[-0.21063126623630524, -0.1473911553621292, 0....",,3671ea0dd4e84c1a9b02c5ab2c8f4bac,-7.551182,-17.050266


#### Read relationships

In [4]:
relationship_df = pd.read_parquet(f"{INPUT_DIR}/{RELATIONSHIP_TABLE}.parquet")
relationships = read_indexer_relationships(relationship_df)

print(f"Relationship count: {len(relationship_df)}")
relationship_df.head()

Relationship count: 2195


Unnamed: 0,source,target,weight,description,text_unit_ids,id,human_readable_id,source_degree,target_degree,rank
0,GRAPH RAG,LLM,18.0,Graph RAG uses LLMs to build a graph-based tex...,[0c932f7def033fa2b1bf210fbb771e7d],43204c531b9c4ff79f9daac89caaa08d,0,85,15,100
1,GRAPH RAG,QFS,16.0,Graph RAG combines the strengths of RAG and QF...,[0c932f7def033fa2b1bf210fbb771e7d],c7cc1f3ebb514b508417f563f5beebff,1,85,3,88
2,GRAPH RAG,COMMUNITY DETECTION,16.0,Graph RAG uses community detection to partitio...,[0c932f7def033fa2b1bf210fbb771e7d],5511f3a70e78492382ca2f6c6ba4291a,2,85,2,87
3,GRAPH RAG,QUERY-FOCUSED SUMMARIZATION,16.0,Graph RAG uses query-focused summarization to ...,[0c932f7def033fa2b1bf210fbb771e7d],72d5b5ddd3fc4435893bfa57777ba3f2,3,85,3,88
4,GRAPH RAG,MICROSOFT RESEARCH,16.0,Microsoft Research is involved in the developm...,[0c932f7def033fa2b1bf210fbb771e7d],799435bcbbb04543958fd2508edf2281,4,85,15,100


In [5]:
# NOTE: covariates are turned off by default, because they generally need prompt tuning to be valuable
# Please see the GRAPHRAG_CLAIM_* settings
covariate_df = pd.read_parquet(f"{INPUT_DIR}/{COVARIATE_TABLE}.parquet")

claims = read_indexer_covariates(covariate_df)

print(f"Claim records: {len(claims)}")
covariates = {"claims": claims}

Claim records: 306


#### Read community reports

In [6]:
def read_indexer_entities(entity_df, entity_embedding_df, community_level):
    # Create an explicit copy if needed
    entity_df = entity_df.copy()
    
    # Use .loc to modify the DataFrame
    entity_df.loc[:, "community"] = entity_df["community"].fillna(-1)
    entity_df.loc[:, "community"] = entity_df["community"].astype(int)
    
    # ... rest of the function implementation
    
    return entities  # Assuming this function returns entities

# Outside the function
report_df = pd.read_parquet(f"{INPUT_DIR}/{COMMUNITY_REPORT_TABLE}.parquet")
reports = read_indexer_reports(report_df, entity_df, COMMUNITY_LEVEL)

print(f"Report records: {len(report_df)}")
report_df.head()
    # ... rest of the function

Report records: 280


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  entity_df["community"] = entity_df["community"].fillna(-1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  entity_df["community"] = entity_df["community"].astype(int)


Unnamed: 0,community,full_content,level,rank,title,rank_explanation,summary,findings,full_content_json,id
0,278,# LATS: Enhancing Language Model Performance t...,3,9.5,LATS: Enhancing Language Model Performance thr...,The rating is high due to the comprehensive an...,The community revolves around the LATS (Langua...,[{'explanation': 'LATS (Language Agent Tree Se...,"{\n ""title"": ""LATS: Enhancing Language Mode...",d208e1b8-576f-4968-8c1b-92cb0fcb59d4
1,279,# RL-Based Training and Human Performance in W...,3,8.5,RL-Based Training and Human Performance in Web...,The rating is high due to the significant insi...,The community centers around RL-based training...,[{'explanation': 'RL-based training is a centr...,"{\n ""title"": ""RL-Based Training and Human P...",9c6a08a4-98c1-4cb6-bec6-52875abd78bf
2,145,# Monte Carlo Tree Search (MCTS) and its Algor...,2,9.0,Monte Carlo Tree Search (MCTS) and its Algorit...,The rating is high due to the significant impa...,The community revolves around the Monte Carlo ...,[{'explanation': 'Monte Carlo Tree Search (MCT...,"{\n ""title"": ""Monte Carlo Tree Search (MCTS...",c1d17e27-378b-4d79-bcea-d27df46432d5
3,146,# Search Algorithms in LATS and ADAS\n\nThe co...,2,9.0,Search Algorithms in LATS and ADAS,The rating is high due to the critical role th...,The community revolves around various search a...,[{'explanation': 'Search algorithms such as DF...,"{\n ""title"": ""Search Algorithms in LATS and...",471b2b23-dd3e-4851-a064-e3ac36c653b8
4,147,# Tree-Based Search and Key Contributors\n\nTh...,2,9.0,Tree-Based Search and Key Contributors,The impact rating is high due to the significa...,The community revolves around the Tree-Based S...,[{'explanation': 'Tree-Based Search is the cen...,"{\n ""title"": ""Tree-Based Search and Key Con...",200708bc-44e2-4cfd-9a57-ce82e132ee55


In [7]:
report_df = pd.read_parquet(f"{INPUT_DIR}/{COMMUNITY_REPORT_TABLE}.parquet")
reports = read_indexer_reports(report_df, entity_df, COMMUNITY_LEVEL)

print(f"Report records: {len(report_df)}")
report_df.head()

Report records: 280


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  entity_df["community"] = entity_df["community"].fillna(-1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  entity_df["community"] = entity_df["community"].astype(int)


Unnamed: 0,community,full_content,level,rank,title,rank_explanation,summary,findings,full_content_json,id
0,278,# LATS: Enhancing Language Model Performance t...,3,9.5,LATS: Enhancing Language Model Performance thr...,The rating is high due to the comprehensive an...,The community revolves around the LATS (Langua...,[{'explanation': 'LATS (Language Agent Tree Se...,"{\n ""title"": ""LATS: Enhancing Language Mode...",d208e1b8-576f-4968-8c1b-92cb0fcb59d4
1,279,# RL-Based Training and Human Performance in W...,3,8.5,RL-Based Training and Human Performance in Web...,The rating is high due to the significant insi...,The community centers around RL-based training...,[{'explanation': 'RL-based training is a centr...,"{\n ""title"": ""RL-Based Training and Human P...",9c6a08a4-98c1-4cb6-bec6-52875abd78bf
2,145,# Monte Carlo Tree Search (MCTS) and its Algor...,2,9.0,Monte Carlo Tree Search (MCTS) and its Algorit...,The rating is high due to the significant impa...,The community revolves around the Monte Carlo ...,[{'explanation': 'Monte Carlo Tree Search (MCT...,"{\n ""title"": ""Monte Carlo Tree Search (MCTS...",c1d17e27-378b-4d79-bcea-d27df46432d5
3,146,# Search Algorithms in LATS and ADAS\n\nThe co...,2,9.0,Search Algorithms in LATS and ADAS,The rating is high due to the critical role th...,The community revolves around various search a...,[{'explanation': 'Search algorithms such as DF...,"{\n ""title"": ""Search Algorithms in LATS and...",471b2b23-dd3e-4851-a064-e3ac36c653b8
4,147,# Tree-Based Search and Key Contributors\n\nTh...,2,9.0,Tree-Based Search and Key Contributors,The impact rating is high due to the significa...,The community revolves around the Tree-Based S...,[{'explanation': 'Tree-Based Search is the cen...,"{\n ""title"": ""Tree-Based Search and Key Con...",200708bc-44e2-4cfd-9a57-ce82e132ee55


#### Read text units

In [8]:
text_unit_df = pd.read_parquet(f"{INPUT_DIR}/{TEXT_UNIT_TABLE}.parquet")
text_units = read_indexer_text_units(text_unit_df)

print(f"Text unit records: {len(text_unit_df)}")
text_unit_df.head()

Text unit records: 82


Unnamed: 0,id,text,n_tokens,document_ids,entity_ids,relationship_ids,covariate_ids
0,0c932f7def033fa2b1bf210fbb771e7d,From Local to Global: A Graph RAG Approach to\...,1200,[0668cddc5f873265ba50da5a0a06edad],"[b45241d70f0e43fca764df95b2b81f77, 4119fd06010...","[43204c531b9c4ff79f9daac89caaa08d, c7cc1f3ebb5...",[ea013b0d-2f75-41df-8115-20775946808e]
1,64476a39d7d8b87b399e3bd3cead79c7,on\nthe state-of-the-art for all such summari...,1200,[0668cddc5f873265ba50da5a0a06edad],"[b45241d70f0e43fca764df95b2b81f77, 4119fd06010...","[41a2cd0a48b24879a0a161c67fd28626, aebce1c17eb...","[2fd9544a-c933-4c4e-934e-35f2907016b6, c2d12b8..."
2,e66ed885a08f92cc69f4895302c33047,examples provided to the LLM for in-context l...,1200,[0668cddc5f873265ba50da5a0a06edad],"[d3835bf3dda84ead99deadbeac5d0d7d, 4a67211867e...","[20bb030f5891498796a7d88bdac28295, dbbf535b339...",[625365ca-97c4-43c8-85c3-c0e81f23cdea]
3,4930fce6da868f894757a9da465807ba,which reveals internal structure within these...,1200,[0668cddc5f873265ba50da5a0a06edad],"[e657b5121ff8456b9a610cfaead8e0cb, 3b040bcc19f...","[76c182d8963043b29b47e836ebb25f3c, 53da814be87...",[778177c7-8dd2-4406-95c3-2396ea5cda86]
4,26b2dad01a219bc034ac7d6a32d07582,"understanding of dataset contents, and not th...",1200,[0668cddc5f873265ba50da5a0a06edad],"[b45241d70f0e43fca764df95b2b81f77, d3835bf3dda...","[e39d588e21a64d3ca69d4bf5abb3f877, 0478a49dbaa...","[95d326a2-6345-4ebb-92da-a39cd989a7dc, f9e0f37..."


In [9]:
api_key = os.environ["GRAPHRAG_API_KEY"]
llm_model = os.environ["GRAPHRAG_LLM_MODEL"]
embedding_model = os.environ["GRAPHRAG_EMBEDDING_MODEL"]
api_base = os.environ["GRAPHRAG_API_BASE"]
api_version = os.environ["GRAPHRAG_API_VERSION"]


llm = ChatOpenAI(
    api_key=api_key,
    api_base=api_base,
    api_version=api_version,
    model=llm_model,
    api_type=OpenaiApiType.AzureOpenAI,  # OpenaiApiType.OpenAI or OpenaiApiType.AzureOpenAI
    max_retries=20,
)

token_encoder = tiktoken.get_encoding("cl100k_base")

text_embedder = OpenAIEmbedding(
    api_key=api_key,
    api_base=api_base,
    api_version=api_version,
    api_type=OpenaiApiType.AzureOpenAI,
    model=embedding_model,
    deployment_name=embedding_model,
    max_retries=20,
)

### Create local search context builder

In [10]:
context_builder = LocalSearchMixedContext(
    community_reports=reports,
    text_units=text_units,
    entities=entities,
    relationships=relationships,
    # if you did not run covariates during indexing, set this to None
    covariates=covariates,
    entity_text_embeddings=description_embedding_store,
    embedding_vectorstore_key=EntityVectorStoreKey.ID,  # if the vectorstore uses entity title as ids, set this to EntityVectorStoreKey.TITLE
    text_embedder=text_embedder,
    token_encoder=token_encoder,
)

### Create local search engine

In [11]:
# text_unit_prop: proportion of context window dedicated to related text units
# community_prop: proportion of context window dedicated to community reports.
# The remaining proportion is dedicated to entities and relationships. Sum of text_unit_prop and community_prop should be <= 1
# conversation_history_max_turns: maximum number of turns to include in the conversation history.
# conversation_history_user_turns_only: if True, only include user queries in the conversation history.
# top_k_mapped_entities: number of related entities to retrieve from the entity description embedding store.
# top_k_relationships: control the number of out-of-network relationships to pull into the context window.
# include_entity_rank: if True, include the entity rank in the entity table in the context window. Default entity rank = node degree.
# include_relationship_weight: if True, include the relationship weight in the context window.
# include_community_rank: if True, include the community rank in the context window.
# return_candidate_context: if True, return a set of dataframes containing all candidate entity/relationship/covariate records that
# could be relevant. Note that not all of these records will be included in the context window. The "in_context" column in these
# dataframes indicates whether the record is included in the context window.
# max_tokens: maximum number of tokens to use for the context window.


local_context_params = {
    "text_unit_prop": 0.5,
    "community_prop": 0.1,
    "conversation_history_max_turns": 5,
    "conversation_history_user_turns_only": True,
    "top_k_mapped_entities": 10,
    "top_k_relationships": 10,
    "include_entity_rank": True,
    "include_relationship_weight": True,
    "include_community_rank": False,
    "return_candidate_context": False,
    "embedding_vectorstore_key": EntityVectorStoreKey.ID,  # set this to EntityVectorStoreKey.TITLE if the vectorstore uses entity title as ids
    "max_tokens": 12_000,  # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 5000)
}

llm_params = {
    "max_tokens": 2_000,  # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 1000=1500)
    "temperature": 0.0,
}

In [12]:
search_engine = LocalSearch(
    llm=llm,
    context_builder=context_builder,
    token_encoder=token_encoder,
    llm_params=llm_params,
    context_builder_params=local_context_params,
    response_type="multiple paragraphs",  # free form text describing the response type and format, can be anything, e.g. prioritized list, single paragraph, multiple paragraphs, multiple-page report
)

### Run local search on sample queries

In [13]:
result = await search_engine.asearch("What is GraphRAG be short")
print(result.response)

### Overview of GraphRAG

GraphRAG is an advanced system designed for text summarization and question answering, leveraging graph-based indexing and retrieval-augmented generation (RAG). Developed by NebulaGraph, it uses a self-generated graph index to enhance the comprehensiveness and diversity of answers in datasets such as podcast transcripts and news articles [Data: Entities (0, 209, 207); Relationships (0, 45, 350)].

### Key Features

#### Hierarchical Community Structure
GraphRAG employs a hierarchical community structure to organize data, which includes root-level, intermediate-level, and low-level community summaries. This structure allows the system to efficiently handle global queries by synthesizing information from different community levels, enhancing the diversity and comprehensiveness of the answers [Data: Records (197); Relationships (26, 78, 22, 41, 20, 21)].

#### Efficiency in Token Usage
One of the standout features of GraphRAG is its efficiency in token usage. Com

In [14]:
question = "What is the purpose of GraphRAG?"
result = await search_engine.asearch(question)
print(result.response)

# Purpose of GraphRAG

GraphRAG is an advanced system designed to enhance text summarization and question-answering capabilities by leveraging graph-based indexing and retrieval-augmented generation (RAG). The primary purpose of GraphRAG is to efficiently manage and synthesize large volumes of text data, providing comprehensive and diverse summaries and answers to user queries.

## Hierarchical Community Structure

One of the key features of GraphRAG is its hierarchical community structure, which organizes data into root-level, intermediate-level, and low-level community summaries. This structure allows GraphRAG to handle global queries effectively by synthesizing information from different community levels, thereby enhancing the diversity and comprehensiveness of the answers [Data: Graph RAG and Summarization Techniques (197); Entities (177, 178, 179, 180); Relationships (26, 78, 22, 41, 20, 21)].

## Efficiency in Token Usage

GraphRAG is particularly efficient in its use of context 

#### Inspecting the context data used to generate the response

In [15]:
result.context_data["entities"].head()

Unnamed: 0,id,entity,description,number of relationships,in_context
0,209,GRAPHRAG,GraphRAG is a system that can create and reaso...,1,True
1,0,GRAPH RAG,Graph RAG is an advanced retrieval-augmented g...,85,True
2,274,GRAPH-BASED RAG APPLICATIONS,Graph-based RAG applications are systems that ...,1,True
3,6,MICROSOFT OFFICE OF THE CTO,A division of Microsoft involved in the develo...,1,True
4,5,MICROSOFT STRATEGIC MISSIONS AND TECHNOLOGIES,A division of Microsoft involved in the develo...,1,True


In [16]:
result.context_data["relationships"].head()

Unnamed: 0,id,source,target,description,weight,rank,links,in_context
0,5,GRAPH RAG,MICROSOFT STRATEGIC MISSIONS AND TECHNOLOGIES,Microsoft Strategic Missions and Technologies ...,16.0,86,1,True
1,6,GRAPH RAG,MICROSOFT OFFICE OF THE CTO,Microsoft Office of the CTO is involved in the...,16.0,86,1,True
2,68,GRAPH RAG,RODRIGO RACANICCI,Rodrigo Racanicci contributed to the work on G...,5.0,86,1,True
3,45,GRAPH RAG,NEBULAGRAPH,Graph RAG is compared to systems like GraphRAG...,7.0,87,2,True
4,350,NEBULAGRAPH,GRAPHRAG,GraphRAG uses NebulaGraph for knowledge graph ...,1.0,3,2,True


In [17]:
result.context_data["reports"].head()

Unnamed: 0,id,title,content
0,197,Graph RAG and Summarization Techniques,# Graph RAG and Summarization Techniques\n\nTh...
1,197,Graph RAG and Summarization Techniques,# Graph RAG and Summarization Techniques\n\nTh...


In [18]:
result.context_data["sources"].head()

Unnamed: 0,id,text
0,8,"Index, 2024) libraries,\nwhile a more general ..."
1,7,"win rates of 57% and 64%, respectively. Diver..."
2,0,From Local to Global: A Graph RAG Approach to\...
3,6,\n502028252121\n805044413836\n725650525452\n75...


In [19]:
if "claims" in result.context_data:
    print(result.context_data["claims"].head())

   id     entity object_id status           start_date             end_date  \
0  24  GRAPH RAG      NONE   TRUE                 NONE                 NONE   
1  40  GRAPH RAG      NONE   TRUE  2024-01-01T00:00:00  2024-12-31T00:00:00   
2  42  GRAPH RAG      NONE   TRUE  2024-01-01T00:00:00  2024-12-31T00:00:00   
3  43  GRAPH RAG      NONE   TRUE  2024-01-01T00:00:00  2024-12-31T00:00:00   
4  44  GRAPH RAG      NONE   TRUE  2024-01-01T00:00:00  2024-12-31T00:00:00   

                                         description  in_context  
0  Graph RAG uses a graph index created with gene...        True  
1  Graph RAG is mentioned as a system that uses t...        True  
2  Graph RAG is mentioned as achieving the best h...        True  
3  Graph RAG is mentioned as performing competiti...        True  
4  Graph RAG is mentioned as having many possibil...        True  


### Question Generation

This function takes a list of user queries and generates the next candidate questions.

In [20]:
question_generator = LocalQuestionGen(
    llm=llm,
    context_builder=context_builder,
    token_encoder=token_encoder,
    llm_params=llm_params,
    context_builder_params=local_context_params,
)

In [21]:
question_history = [
    "Tell me about Language Agent Tree Search?",
    "What is the best way to create a knowledge graph agent ?",
]
candidate_questions = await question_generator.agenerate(
    question_history=question_history, context_data=None, question_count=5
)
print(candidate_questions.response)



['- How does Language Agent Tree Search (LATS) integrate Monte Carlo Tree Search for language models?', '- What are the key features and benefits of the LATS framework in decision-making tasks?', '- How does LATS achieve state-of-the-art performance in programming tasks on HumanEval?', '- What role does external feedback play in the LATS algorithm for improving problem-solving?', '- Can you explain the experimental results and benchmarks used to validate the effectiveness of LATS?']
