In [1]:
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License.

In [3]:
import os

import pandas as pd
import tiktoken

from graphrag.query.context_builder.entity_extraction import EntityVectorStoreKey
from graphrag.query.indexer_adapters import (
    read_indexer_covariates,
    read_indexer_entities,
    read_indexer_relationships,
    read_indexer_reports,
    read_indexer_text_units,
)
from graphrag.query.llm.oai.chat_openai import ChatOpenAI
from graphrag.query.llm.oai.embedding import OpenAIEmbedding
from graphrag.query.llm.oai.typing import OpenaiApiType
from graphrag.query.question_gen.local_gen import LocalQuestionGen
from graphrag.query.structured_search.local_search.mixed_context import (
    LocalSearchMixedContext,
)
from graphrag.query.structured_search.local_search.search import LocalSearch
from graphrag.vector_stores.lancedb import LanceDBVectorStore

## Local Search Example

Local search method generates answers by combining relevant data from the AI-extracted knowledge-graph with text chunks of the raw documents. This method is suitable for questions that require an understanding of specific entities mentioned in the documents (e.g. What are the healing properties of chamomile?).

### Load text units and graph data tables as context for local search

- In this test we first load indexing outputs from parquet files to dataframes, then convert these dataframes into collections of data objects aligning with the knowledge model.

### Load tables to dataframes

In [1]:
#INPUT_DIR = "./inputs/operation dulce"
INPUT_DIR = "/teamspace/studios/this_studio/graphrag/mw/output"
LANCEDB_URI = f"{INPUT_DIR}/lancedb"

COMMUNITY_REPORT_TABLE = "create_final_community_reports"
ENTITY_TABLE = "create_final_nodes"
ENTITY_EMBEDDING_TABLE = "create_final_entities"
RELATIONSHIP_TABLE = "create_final_relationships"
COVARIATE_TABLE = "create_final_covariates"
TEXT_UNIT_TABLE = "create_final_text_units"
COMMUNITY_LEVEL = 2

#### Read entities

In [4]:
# read nodes table to get community and degree data
entity_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_TABLE}.parquet")
entity_embedding_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_EMBEDDING_TABLE}.parquet")

entities = read_indexer_entities(entity_df, entity_embedding_df, COMMUNITY_LEVEL)

# load description embeddings to an in-memory lancedb vectorstore
# to connect to a remote db, specify url and port values.
description_embedding_store = LanceDBVectorStore(
    collection_name="default-entity-description",
)
description_embedding_store.connect(db_uri=LANCEDB_URI)

print(f"Entity count: {len(entity_df)}")
entity_df.head()

Entity count: 946


Unnamed: 0,id,human_readable_id,title,community,level,degree,x,y
0,32472dc7-96c2-48e5-a973-b3380c303ec0,0,QUOTE - B00321.1-1,149,0,12,12.317203,2.930166
1,921613a9-2858-47ce-9107-9c17e207c599,1,"APP - NITRIC OXIDE, NITROUS OXIDE AND NITROGEN...",149,0,1,11.875115,2.526305
2,92505f8e-2372-4014-a0f2-63a77aaff3e9,2,CELL - ELECTRIC - 100 ºC - 128.0 MM - 316 SS -...,-1,0,6,-7.755307,19.499149
3,f39ec04b-d133-4460-8655-4500e3985324,3,OPT - SRC IR - DET BAF2 W/SF - LENS CAF2,-1,0,39,8.116982,10.099446
4,0ea7ba99-89c5-470e-b32b-57bfcb59c4b9,4,FILT - 5.0 - REF,206,0,13,2.139669,6.903372


#### Read relationships

In [5]:
relationship_df = pd.read_parquet(f"{INPUT_DIR}/{RELATIONSHIP_TABLE}.parquet")
relationships = read_indexer_relationships(relationship_df)

print(f"Relationship count: {len(relationship_df)}")
relationship_df.head()

Relationship count: 2846


Unnamed: 0,id,human_readable_id,source,target,description,weight,combined_degree,text_unit_ids
0,d73cfa23-df02-4421-a0ca-e256c9a3f430,0,QUOTE - B00321.1-1,FILT - 5.0 - REF,The filter 5.0 - Ref is specified in the B0032...,8.0,25,[40b382f44c7cc35d781b804b762ec45da19c86b7f0cd1...
1,e04df03d-4f02-4f00-8274-cd1e553e2e21,1,QUOTE - B00321.1-1,FILT - 5.41 - NO - MEAS,The filter 5.41 - NO Meas is specified in the ...,8.0,15,[40b382f44c7cc35d781b804b762ec45da19c86b7f0cd1...
2,388c59e6-949d-44ce-b77e-01c58792c1b6,2,QUOTE - B00321.1-1,FILT - 3.42 - NO2 - MEAS,The filter 3.42 - NO2 Meas is specified in the...,8.0,14,[40b382f44c7cc35d781b804b762ec45da19c86b7f0cd1...
3,8d4206a4-0f57-4075-9d1c-907e4e7dbae6,3,QUOTE - B00321.1-1,FILT - 4.46 - N2 - MEAS,The filter 4.46 - N2 Meas is specified in the ...,8.0,14,[40b382f44c7cc35d781b804b762ec45da19c86b7f0cd1...
4,9d2fd0af-aac4-4e99-a033-71f7490f881f,4,QUOTE - B00321.1-1,FILT - 4.76 - CO - COMP,The filter 4.76 - CO Comp is specified in the ...,8.0,14,[40b382f44c7cc35d781b804b762ec45da19c86b7f0cd1...


In [34]:
# NOTE: covariates are turned off by default, because they generally need prompt tuning to be valuable
# Please see the GRAPHRAG_CLAIM_* settings
#covariate_df = pd.read_parquet(f"{INPUT_DIR}/{COVARIATE_TABLE}.parquet")

#claims = read_indexer_covariates(covariate_df)

#print(f"Claim records: {len(claims)}")
#covariates = {"claims": claims}
covariates = None

#### Read community reports

In [7]:
report_df = pd.read_parquet(f"{INPUT_DIR}/{COMMUNITY_REPORT_TABLE}.parquet")
reports = read_indexer_reports(report_df, entity_df, COMMUNITY_LEVEL)

print(f"Report records: {len(report_df)}")
report_df.head()

Report records: 207


Unnamed: 0,id,human_readable_id,community,parent,level,title,summary,full_content,rank,rank_explanation,findings,full_content_json,period,size
0,e642ef3d45094fe8adcb2353cd55ea7d,0,0,-1.0,0,Nitrous Oxide Measurement Applications,The community focuses on the measurement appli...,# Nitrous Oxide Measurement Applications\n\nTh...,9.0,The dataset provides significant insights into...,[{'explanation': 'The dataset reveals two dist...,"{\n ""title"": ""Nitrous Oxide Measurement App...",2025-02-21,4.0
1,7a7d7daacffb4746b3b3e24f01738d40,1,1,,0,APP - Nitrogen Dioxide Measurement,The application name is 'Nitrogen Dioxide Meas...,# APP - Nitrogen Dioxide Measurement\n\nThe ap...,10.0,The chemical stream composition directly align...,[{'explanation': 'The application is focused o...,"{\n ""title"": ""APP - Nitrogen Dioxide Measur...",,
2,41460c498df54aeaadf6e0317a40b578,2,2,,0,APP - Nitric Oxide Measurements,The application focuses on measurements of Nit...,# APP - Nitric Oxide Measurements\n\nThe appli...,9.0,The dataset provides significant insights into...,[{'explanation': 'The dataset includes two dis...,"{\n ""title"": ""APP - Nitric Oxide Measuremen...",,
3,e167bf6fd4b444c189a520e1203c0667,3,3,-1.0,0,APP - Ethylene Measurements,The community focuses on various measurements ...,# APP - Ethylene Measurements\n\nThe community...,9.0,The dataset provides significant insights into...,[{'explanation': 'The dataset reveals a range ...,"{\n ""title"": ""APP - Ethylene Measurements"",...",2025-02-21,49.0
4,4b0f8dd074bb478fb522e2f29b59789c,4,4,-1.0,0,Community Analysis of Hydrogen Matrix Interferers,This report analyzes the community of hydrogen...,# Community Analysis of Hydrogen Matrix Interf...,9.5,The dataset provides extensive insights into t...,[{'explanation': 'The dataset reveals a wide r...,"{\n ""title"": ""Community Analysis of Hydroge...",2025-02-21,324.0


#### Read text units

In [10]:
text_unit_df = pd.read_parquet(f"{INPUT_DIR}/{TEXT_UNIT_TABLE}.parquet")
text_units = read_indexer_text_units(text_unit_df)

print(f"Text unit records: {len(text_unit_df)}")
text_unit_df.head()

Text unit records: 87


Unnamed: 0,id,human_readable_id,text,n_tokens,document_ids,entity_ids,relationship_ids
0,40b382f44c7cc35d781b804b762ec45da19c86b7f0cd1f...,1,Quote_ID: B00321.1-1\nApplication_Name: Nitric...,451,[034c2bec9d00a5ff35a0b3d79956f3786e099988ef464...,"[32472dc7-96c2-48e5-a973-b3380c303ec0, 921613a...","[d73cfa23-df02-4421-a0ca-e256c9a3f430, e04df03..."
1,20d8d2e2b060ad2587e1f358cfc398cb056d09c5f77324...,2,Quote_ID: A51271.3-7\nApplication_Name: Ethyle...,374,[100c527aa206420d9c2a825b817fcf37bd525fc6c7f9d...,"[dfceaf12-d521-4522-b01a-b28f71d8a5fd, bbe6f74...","[d175fc1d-55ff-4e53-ad6e-50345f778045, ca5263c..."
2,005de39986d0a5f4843eff813d44f7cdea01b342e13575...,3,Quote_ID: B30113.3-4\nApplication_Name: Water ...,419,[13831e7ed4a987710f11bb13ffbda85c32a7cb54b4f78...,"[f39ec04b-d133-4460-8655-4500e3985324, fc2b5fc...","[38441c73-b421-45f7-8b16-cc267dbdb0d3, fc671d9..."
3,6bff07af49847cbad6c73101dc51f1019673c0b8775747...,4,Quote_ID: B64046.2-2\nApplication_Name: Water ...,538,[14be746eb8279686231e20619eff3f1ba0cec70e7736b...,"[12b6ff43-2b19-435e-8d33-bab2851ffa5c, fc31432...","[3c316533-dd5b-4afa-9b83-07c8dd26d009, eb18fd7..."
4,16aa0717cf1da2f18e43eae135a8c09e98fd830c1581b4...,5,Quote_ID: B67138.2-1\nApplication_Name: Water ...,363,[17c3216617aa278899d19b97c2b62d958199e0d23efc9...,"[32a09a8e-3a2f-4172-837c-ecb50b06e27c, 799ebdd...","[830e1b7b-5063-400a-b102-38bb5bb59ef8, 0552f9d..."


In [40]:
import subprocess
import os

# Read the .env file content
with open("/teamspace/studios/this_studio/graphrag/mw/.env", 'r') as f:
    for line in f:
        if '=' in line:
            key, value = line.strip().split('=', 1)
            os.environ[key] = value

api_key = os.environ.get("GRAPHRAG_API_KEY")
llm_model = "gpt-4o-mini"
llm_api_version = "2024-08-01-preview"
embedding_model = "text-embedding-3-large"
embedding_api_version = "2023-05-15"
api_base = "https://cog2-trnz2cbjn4ofs.openai.azure.com/"

In [41]:
llm = ChatOpenAI(
    api_key=api_key,
    api_base=api_base,
    api_version=llm_api_version,
    model=llm_model,
    api_type=OpenaiApiType.AzureOpenAI,  # OpenaiApiType.OpenAI or OpenaiApiType.AzureOpenAI
    max_retries=20,
)

token_encoder = tiktoken.get_encoding("cl100k_base")

text_embedder = OpenAIEmbedding(
    api_key=api_key,
    api_base=api_base,
    api_version=embedding_api_version,
    api_type=OpenaiApiType.AzureOpenAI,
    model=embedding_model,
    deployment_name=embedding_model,
    max_retries=20,
)

### Create local search context builder

In [42]:
context_builder = LocalSearchMixedContext(
    community_reports=reports,
    text_units=text_units,
    entities=entities,
    relationships=relationships,
    # if you did not run covariates during indexing, set this to None
    covariates=covariates,
    entity_text_embeddings=description_embedding_store,
    embedding_vectorstore_key=EntityVectorStoreKey.ID,  # if the vectorstore uses entity title as ids, set this to EntityVectorStoreKey.TITLE
    text_embedder=text_embedder,
    token_encoder=token_encoder,
)

### Create local search engine

In [43]:
# text_unit_prop: proportion of context window dedicated to related text units
# community_prop: proportion of context window dedicated to community reports.
# The remaining proportion is dedicated to entities and relationships. Sum of text_unit_prop and community_prop should be <= 1
# conversation_history_max_turns: maximum number of turns to include in the conversation history.
# conversation_history_user_turns_only: if True, only include user queries in the conversation history.
# top_k_mapped_entities: number of related entities to retrieve from the entity description embedding store.
# top_k_relationships: control the number of out-of-network relationships to pull into the context window.
# include_entity_rank: if True, include the entity rank in the entity table in the context window. Default entity rank = node degree.
# include_relationship_weight: if True, include the relationship weight in the context window.
# include_community_rank: if True, include the community rank in the context window.
# return_candidate_context: if True, return a set of dataframes containing all candidate entity/relationship/covariate records that
# could be relevant. Note that not all of these records will be included in the context window. The "in_context" column in these
# dataframes indicates whether the record is included in the context window.
# max_tokens: maximum number of tokens to use for the context window.


local_context_params = {
    "text_unit_prop": 0.4,
    "community_prop": 0.2,
    "conversation_history_max_turns": 5,
    "conversation_history_user_turns_only": True,
    "top_k_mapped_entities": 10,
    "top_k_relationships": 10,
    "include_entity_rank": True,
    "include_relationship_weight": True,
    "include_community_rank": False,
    "return_candidate_context": False,
    "embedding_vectorstore_key": EntityVectorStoreKey.ID,  # set this to EntityVectorStoreKey.TITLE if the vectorstore uses entity title as ids
    "max_tokens": 32_000,  # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 5000)
}

llm_params = {
    "max_tokens": 8_000,  # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 1000=1500)
    "temperature": 0.0,
}

In [44]:
search_engine = LocalSearch(
    llm=llm,
    context_builder=context_builder,
    token_encoder=token_encoder,
    llm_params=llm_params,
    context_builder_params=local_context_params,
    response_type="multiple paragraphs",  # free form text describing the response type and format, can be anything, e.g. prioritized list, single paragraph, multiple paragraphs, multiple-page report
)

### Run local search on sample queries

In [55]:
from IPython.display import Markdown, display
result = await search_engine.asearch("Give a summary of the top 5 largest applications. For each, report an exhaustive list of all the quotes. List from largest to smallest.")
display(Markdown(result.response))
result.context_data["entities"].head()
result.context_data["relationships"].head()

# Summary of the Top 5 Largest Applications

The following summary provides an overview of the top five largest applications based on the complexity and number of components involved. Each application is accompanied by an exhaustive list of all associated quotes.

## 1. **APP - MALEIC ANHYDRIDE REACTOR INLET**
This application encompasses various measurements of gas concentrations within a reactor inlet environment. It is characterized by a high complexity involving multiple components.

### Quotes:
- **Quote ID B09871.5-2**: Water (2.4% nominal, 5.0% maximum) and n-Butane (1.7% nominal, 2.0% maximum) in a Nitrogen matrix (73.0%).
- **Quote ID B22224.2-4**: n-Butane (1.9% nominal, 3.0% maximum) and Water (2.35% nominal, 5.0% maximum) in a Nitrogen matrix (75.65%).
- **Quote ID B55598.5-1**: CO (0.1% nominal, 1.0% maximum), CO2 (0.1% nominal, 1.0% maximum), n-Butane (1.8% nominal, 3.0% maximum), and Water (4.0% nominal, 5.0% maximum) in a Nitrogen (73.0%) and Oxygen (20.0%) matrix with Argon (1.0%).
- **Quote ID B55598.5-6**: Similar measurements to B55598.5-1.
- **Quote ID B59078.4-8**: Water (3.0% nominal, 6.0% maximum) and n-Butane (2.0% nominal, 3.0% maximum) in a Nitrogen matrix (76.0%).

## 2. **APP - MALEIC ANHYDRIDE REACTOR OUTLET**
This application involves the measurement of various components in a reactor outlet, showcasing a diverse range of chemical measurements.

### Quotes:
- **Quote ID B11759.11-4**: Maleic anhydride (3.38% nominal, 4.0% maximum), CO (1.37% nominal, 3.0% maximum), CO2 (1.66% nominal, 3.0% maximum), and n-Butane (0.68% nominal, 1.0% maximum) in a Nitrogen matrix (72.41%) with Water (6.18%) compensation.
- **Quote ID B49860.1-1**: Maleic anhydride (1.07% nominal, 1.5% maximum), CO (1.3% nominal, 2.0% maximum), CO2 (1.2% nominal, 2.0% maximum), and n-Butane (0.29% nominal, 1.0% maximum) in a Nitrogen matrix (72.74%) with Water (11.42%) compensation.
- **Quote ID B64411.1-8**: Maleic anhydride (1.04% nominal, 1.5% maximum), CO (1.1% nominal, 2.0% maximum), and CO2 (0.98% nominal, 2.0% maximum) in a Nitrogen (74.37%) and Oxygen (13.46%) matrix with n-Butane (0.27%) and Water (8.73%) compensations.

## 3. **APP - CO, CO2, ETHYLENE AND EDC IN MATRIX**
This application focuses on the measurement of multiple gases in an unknown matrix, highlighting the complexity of the chemical environment.

### Quotes:
- **Quote ID A88597.5-1**: CO (70.0% nominal, 100.0% maximum), CO2 (8.0% nominal, 20.0% maximum), Ethylene (20.0% nominal, 60.0% maximum), and 1,2-Dichloroethane (2.0% nominal, 5.0% maximum).

## 4. **APP - HCL IN CHLORINE**
This application is dedicated to measuring hydrochloric acid concentrations within a chlorine matrix, emphasizing the need for precise instrumentation.

### Quotes:
- **Quote ID B06569.1-1**: HCl (0.3% nominal, 5.0% maximum) in a chlorine matrix (99.0%) with trace gases.
- **Quote ID B20169.4-2**: HCl (0.2% nominal, 0.4% maximum) in a chlorine matrix (94.8%) with 2.0% water.
- **Quote ID B25551.1-1**: HCl (0.0% nominal, 10.0% maximum) in a chlorine matrix (99.0%) with temperature compensation.

## 5. **APP - N-BUTANE IN NITROGEN**
This application involves the measurement of n-Butane concentrations in a nitrogen matrix, showcasing the importance of maintaining precise chemical compositions.

### Quotes:
- **Quote ID B55598.5-26**: n-Butane (1.8% nominal, 3.0% maximum) in a Nitrogen matrix (73.0%) with Water (4.0%) compensation.
- **Quote ID B55598.5-27**: Similar measurements to B55598.5-26.
- **Quote ID B55598.5-28**: Similar measurements to B55598.5-26.

These applications reflect the complexity and specificity required in industrial chemical measurements, ensuring accurate monitoring and compliance in various processes. Each application is supported by multiple quotes that detail the specific measurements and conditions involved.

Unnamed: 0,id,source,target,description,weight,links,in_context
0,2790,QUOTE - B55598.5-28,QUOTE - B55598.5-26,Similarity edge,8.0,1,True
1,2405,QUOTE - B50984.5-2,QUOTE - B50984.5-4,Similarity edge,8.0,1,True
2,244,QUOTE - B25551.1-1,APP - HCL IN CHLORINE,This application name is HCl in Chlorine for t...,10.0,1,True
3,2064,APP - MALEIC ANHYDRIDE REACTOR INLET,APP - MALEIC ANHYDRIDE REACTOR OUTLET,Similarity edge,6.0,1,True
4,974,QUOTE - B55598.5-28,OPT - SRC IR - DET BAF2 W/SF - LENS CAF2,The B55598.5-28 quote specifies an IR light so...,5.0,8,True


In [51]:
question = "List all the Water in EDC quotes. Be exhaustive."
result = await search_engine.asearch(question)
display(Markdown(result.response))

## Water in EDC Quotes

The following are the detailed quotes related to the measurement of water in Ethylene Dichloride (EDC) matrices. Each quote specifies the nominal and maximum concentrations of water, along with the composition of the EDC matrix and any compensatory components used.

1. **Quote ID: A84671.1-3**
   - **Water Measurement**: 0.0005% nominal, 0.04% maximum
   - **EDC Matrix Composition**: 96.7% EDC, with Benzene (2.86%) and Trichloroethylene (0.33%) compensations.
   - **Temperature Compensation**: Yes
   - **Application Summary**: Water measurement in an EDC matrix with specific compensations [Data: Reports (172); Entities (309, 308); Sources (25)].

2. **Quote ID: B09995.5-2**
   - **Water Measurement**: 0.01% nominal, 0.01% maximum
   - **EDC Matrix Composition**: 99.7% EDC, with Chloroform (0.005%) and no Benzene compensation.
   - **Temperature Compensation**: Yes
   - **Application Summary**: Water measurement in a nearly pure EDC matrix [Data: Reports (172); Entities (309, 511); Sources (44)].

3. **Quote ID: B13389.3-1**
   - **Water Measurement**: 0.002% nominal, 0.01% maximum
   - **EDC Matrix Composition**: 99.7% EDC.
   - **Temperature Compensation**: Yes
   - **Application Summary**: Water measurement in an EDC matrix [Data: Reports (172); Entities (309, 493); Sources (42)].

4. **Quote ID: B29776.2-1**
   - **Water Measurement**: 0.005% nominal, 0.01% maximum
   - **EDC Matrix Composition**: 99.75% EDC, with Benzene (0.02%) compensation.
   - **Temperature Compensation**: Yes
   - **Application Summary**: Water measurement in an EDC matrix [Data: Reports (172); Entities (309, 587); Sources (39)].

5. **Quote ID: B30237.9-1**
   - **Water Measurement**: 0.003% nominal, 0.01% maximum
   - **EDC Matrix Composition**: 100.0% EDC.
   - **Temperature Compensation**: Yes
   - **Application Summary**: Water measurement in a pure EDC matrix [Data: Reports (172); Entities (309, 587); Sources (51)].

6. **Quote ID: B50984.5-2**
   - **Water Measurement**: 0.0036% nominal, 0.01% maximum
   - **EDC Matrix Composition**: 99.26% EDC, with various compensations.
   - **Temperature Compensation**: Yes
   - **Application Summary**: Water measurement in an EDC matrix with multiple components [Data: Reports (172); Entities (309, 419); Sources (35)].

7. **Quote ID: B61650.7-4**
   - **Water Measurement**: 0.001% nominal, 0.01% maximum
   - **EDC Matrix Composition**: 99.22% EDC, with Benzene (0.234%) compensation.
   - **Temperature Compensation**: Yes
   - **Application Summary**: Water measurement in an EDC matrix [Data: Reports (172); Entities (309, 742); Sources (68)].

8. **Quote ID: B80137.1-1**
   - **Water Measurement**: 0.001% nominal, 0.005% maximum
   - **EDC Matrix Composition**: 99.11% EDC.
   - **Temperature Compensation**: Yes
   - **Application Summary**: Water measurement in an EDC matrix [Data: Reports (172); Entities (309, 926); Sources (84)].

These quotes illustrate the complexity and specificity of water measurement in EDC environments, highlighting the importance of precise control and compensation in industrial applications. Each quote is supported by various data references that provide further context and details about the measurements and configurations used.

#### Inspecting the context data used to generate the response

In [52]:
result.context_data["entities"].head()

Unnamed: 0,id,entity,description,number of relationships,in_context
0,309,APP - WATER IN EDC,"The entity ""APP - WATER IN EDC"" encompasses va...",9,True
1,791,QUOTE - B50984.5-4,Quote ID B50984.5-4 application_name is Water ...,15,True
2,419,QUOTE - B50984.5-2,Quote ID B50984.5-2 application_name is Water ...,19,True
3,926,QUOTE - B80137.1-1,Quote ID B80137.1-1 application_name is Water ...,12,True
4,308,QUOTE - A84671.1-3,Quote ID A84671.1-3 application_name is Water ...,18,True


In [53]:
result.context_data["relationships"].head()

Unnamed: 0,id,source,target,description,weight,links,in_context
0,2405,QUOTE - B50984.5-2,QUOTE - B50984.5-4,Similarity edge,8.0,2,True
1,543,QUOTE - B50984.5-2,APP - WATER IN EDC,This application name is Water in EDC for the ...,10.0,2,True
2,610,QUOTE - B29776.2-1,APP - WATER IN EDC,This application name is Water in EDC for the ...,10.0,1,True
3,398,QUOTE - A84671.1-3,APP - WATER IN EDC,This application name is Water in EDC for the ...,10.0,1,True
4,679,QUOTE - B09995.5-2,APP - WATER IN EDC,This application name is Water in EDC for the ...,10.0,1,True


In [None]:
if "reports" in result.context_data:
    result.context_data["reports"].head()

KeyError: 'reports'

In [None]:
result.context_data["sources"].head()

In [54]:
if "claims" in result.context_data:
    print(result.context_data["claims"].head())

### Question Generation

This function takes a list of user queries and generates the next candidate questions.

In [None]:
question_generator = LocalQuestionGen(
    llm=llm,
    context_builder=context_builder,
    token_encoder=token_encoder,
    llm_params=llm_params,
    context_builder_params=local_context_params,
)

In [None]:
question_history = [
    "Tell me about Agent Mercer",
    "What happens in Dulce military base?",
]
candidate_questions = await question_generator.agenerate(
    question_history=question_history, context_data=None, question_count=5
)
print(candidate_questions.response)