In [None]:
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License.

In [None]:
import os
from pathlib import Path

import pandas as pd

import graphrag.api as api
from graphrag.config.create_graphrag_config import create_graphrag_config
from graphrag.config.enums import ModelType
from graphrag.config.models.language_model_config import LanguageModelConfig
from graphrag.config.models.vector_store_config import VectorStoreConfig


## Local Search Example

Local search method generates answers by combining relevant data from the AI-extracted knowledge-graph with text chunks of the raw documents. This method is suitable for questions that require an understanding of specific entities mentioned in the documents (e.g. What are the healing properties of chamomile?).

### Configure models and load data

Use the GraphRAG API helpers to run local search against indexed outputs.


Set the `GRAPHRAG_API_KEY`, `GRAPHRAG_LLM_MODEL`, and `GRAPHRAG_EMBEDDING_MODEL` environment variables before running this notebook.


In [None]:
api_key = os.environ["GRAPHRAG_API_KEY"]
llm_model = os.environ["GRAPHRAG_LLM_MODEL"]
embedding_model = os.environ["GRAPHRAG_EMBEDDING_MODEL"]

INPUT_DIR = "./inputs/operation dulce"
LANCEDB_URI = f"{INPUT_DIR}/lancedb"

COMMUNITY_TABLE = "communities"
COMMUNITY_REPORT_TABLE = "community_reports"
ENTITY_TABLE = "entities"
RELATIONSHIP_TABLE = "relationships"
COVARIATE_TABLE = "covariates"
TEXT_UNIT_TABLE = "text_units"

COMMUNITY_LEVEL = 2


#### Load indexed data

Read the output parquet files generated by the indexing pipeline.


In [None]:
chat_config = LanguageModelConfig(
    api_key=api_key,
    type=ModelType.OpenAIChat,
    model=llm_model,
    max_retries=20,
)

embedding_config = LanguageModelConfig(
    api_key=api_key,
    type=ModelType.OpenAIEmbedding,
    model=embedding_model,
    max_retries=20,
)

vector_store_config = VectorStoreConfig(
    type="lancedb",
    db_uri=str(Path(INPUT_DIR).resolve() / "lancedb"),
    container_name="default",
    overwrite=True,
)

config = create_graphrag_config(
    {
        "models": {
            "default_chat_model": chat_config,
            "default_embedding_model": embedding_config,
        },
        "local_search": {
            "chat_model_id": "default_chat_model",
            "embedding_model_id": "default_embedding_model",
        },
        "vector_store": {
            "default_vector_store": vector_store_config,
        },
    },
    root_dir=Path("."),
)

community_df = pd.read_parquet(f"{INPUT_DIR}/{COMMUNITY_TABLE}.parquet")
entity_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_TABLE}.parquet")
relationship_df = pd.read_parquet(f"{INPUT_DIR}/{RELATIONSHIP_TABLE}.parquet")
covariate_df = pd.read_parquet(f"{INPUT_DIR}/{COVARIATE_TABLE}.parquet")
report_df = pd.read_parquet(f"{INPUT_DIR}/{COMMUNITY_REPORT_TABLE}.parquet")
text_unit_df = pd.read_parquet(f"{INPUT_DIR}/{TEXT_UNIT_TABLE}.parquet")

print("DataFrames loaded:")
print(f"  Communities: {len(community_df)}")
print(f"  Community reports: {len(report_df)}")
print(f"  Entities: {len(entity_df)}")
print(f"  Relationships: {len(relationship_df)}")
print(f"  Text units: {len(text_unit_df)}")


#### Run local search


In [None]:
response, context = await api.local_search(
    config=config,
    entities=entity_df,
    communities=community_df,
    community_reports=report_df,
    text_units=text_unit_df,
    relationships=relationship_df,
    covariates=covariate_df,
    community_level=COMMUNITY_LEVEL,
    response_type="Multiple Paragraphs",
    query="Tell me about Agent Mercer",
)
response


#### Inspect the context data used to generate the response


In [None]:
context["entities"].head()


In [None]:
context["relationships"].head()


In [None]:
context.get("reports", pd.DataFrame()).head()


In [None]:
context.get("sources", pd.DataFrame()).head()


In [None]:
context.get("claims", pd.DataFrame()).head()


### Run local search on another query


In [None]:
question = "Tell me about Dr. Jordan Hayes"
followup_response, followup_context = await api.local_search(
    config=config,
    entities=entity_df,
    communities=community_df,
    community_reports=report_df,
    text_units=text_unit_df,
    relationships=relationship_df,
    covariates=covariate_df,
    community_level=COMMUNITY_LEVEL,
    response_type="Multiple Paragraphs",
    query=question,
)
followup_response
