# Querying

## Setup

If you haven't already, install the toolkit and dependencies using the [Setup](./00-Setup.ipynb) notebook.

### TraversalBasedRetriever

See [TraversalBasedRetriever](https://github.com/awslabs/graphrag-toolkit/blob/main/docs/lexical-graph/querying.md#traversalbasedretriever).

## Check GPU support on Ubuntu 24.04 LTS

In [20]:
import os
import torch

# Force inject paths (adapt to your system if different)
os.environ["CUDA_HOME"] = "/usr/local/cuda"
os.environ["LD_LIBRARY_PATH"] = "/usr/local/cuda/lib64:" + os.environ.get("LD_LIBRARY_PATH", "")
os.environ["PATH"] += ":/usr/local/cuda/bin"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["EXTRACTION_MODEL"] = "us.anthropic.claude-3-5-sonnet-20240620-v1:0"
os.environ["RESPONSE_MODEL"] = "us.anthropic.claude-3-5-sonnet-20240620-v1:0"
os.environ["EMBEDDINGS_MODEL"] = "cohere.embed-english-v3"
os.environ["EMBEDDINGS_DIMENSIONS"] = "1024"
os.environ["EXTRACTION_NUM_WORKERS"] = "2"
os.environ["EXTRACTION_BATCH_SIZE"] = "4"
os.environ["BUILD_NUM_WORKERS"] = "2"
os.environ["BUILD_BATCH_WRITE_SIZE"] = "25"
os.environ["BATCH_WRITES_ENABLED"] = "True"

# Re-check CUDA
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU:", torch.cuda.get_device_name(0))
    print("CUDA Version (Torch):", torch.version.cuda)
else:
    print("GPU still not detected inside notebook")

CUDA available: True
GPU: NVIDIA GeForce RTX 3060
CUDA Version (Torch): 12.6


### Setup AWS Profile

In [21]:
# Configure AWS Profile and Region
from graphrag_toolkit.lexical_graph import GraphRAGConfig

# Assign profile and region to GraphRAGConfig
GraphRAGConfig.aws_profile = "padmin" #Optional, use if using AWS SSO
GraphRAGConfig.aws_region = "us-east-1"

### Setup model

In [22]:
# Set Claude model via Bedrock
model_id = "us.anthropic.claude-3-5-sonnet-20240620-v1:0"

### Setup BedrockConverse

In [23]:
# Configure BedrockConverse
from llama_index.llms.bedrock_converse import BedrockConverse

try:
    GraphRAGConfig.extraction_llm = BedrockConverse.from_json(f'''
    {{
        "model": "{model_id}",
        "temperature": 0.0,
        "max_tokens": 4096,
        "profile_name": "{GraphRAGConfig.aws_profile}",
        "region_name": "{GraphRAGConfig.aws_region}"
    }}
    ''')
    print(f"Successfully configured Bedrock model: {model_id}")
except Exception as e:
    print(f"Failed to initialize BedrockConverse: {str(e)}")
    raise
### Display LLM Configuration

Successfully configured Bedrock model: us.anthropic.claude-3-5-sonnet-20240620-v1:0


### Display LLM Configuration

In [24]:
# Display LLM configuration
llm = GraphRAGConfig.extraction_llm
print("LLM class:", llm.__class__.__name__)
print("Model ID:", llm.model)
print("Temperature:", llm.temperature)
print("Max tokens:", llm.max_tokens)
print("Profile:", getattr(llm, 'profile_name', None))
print("Region:", getattr(llm, 'region_name', None))

LLM class: BedrockConverse
Model ID: us.anthropic.claude-3-5-sonnet-20240620-v1:0
Temperature: 0.0
Max tokens: 4096
Profile: padmin
Region: us-east-1


In [25]:
print(f"embed_model model name = {GraphRAGConfig.embed_model.model_name}")
print(f"embed_model dimension  = {GraphRAGConfig.embed_dimensions}")

embed_model model name = amazon.titan-embed-text-v1
embed_model dimension  = 1536


### Setup GraphRag Config

In [39]:
# Setup GraphRag Config
from llama_index.embeddings.bedrock import BedrockEmbedding
import os
from graphrag_toolkit.lexical_graph.config import GraphRAGConfig

def setup_graphrag_config() -> None:
    """
    Inject BedrockEmbedding into existing GraphRAGConfig without resetting other config values.
    """
    try:
        region = GraphRAGConfig.aws_region or os.getenv("AWS_REGION", "us-east-1")
        profile = GraphRAGConfig.aws_profile or os.getenv("AWS_PROFILE")

        # Request to use v1
        requested_model_name = "amazon.titan-embed-text-v1"

        print(f"[DEBUG] Requesting BedrockEmbedding with model: {requested_model_name}")

        embed_model = BedrockEmbedding(
            model=requested_model_name,
            region=region,
            profile_name=profile
        )

        # Assign to GraphRAGConfig
        GraphRAGConfig.embed_model = embed_model
        GraphRAGConfig.embed_dimensions = 1536

        # Print actual model info after Bedrock client initialized
        actual_model_name = GraphRAGConfig.embed_model.model_name
        print(f"[CONFIRM] embed_model model name = {actual_model_name}")
        print(f"[CONFIRM] embed_model dimension  = {GraphRAGConfig.embed_dimensions}")

        if actual_model_name != requested_model_name:
            print(f"[WARNING] Bedrock returned model '{actual_model_name}', expected '{requested_model_name}'")

    except Exception as e:
        print(f"[ERROR] Failed to update GraphRAGConfig: {str(e)}")
        raise


### Setup connection with PostgreSQL

In [32]:
# Connect to PostgreSQL Vector Store
from graphrag_toolkit.lexical_graph.storage import VectorStoreFactory

# PostgreSQL connection string
postgre_connection_info = 'postgresql://graphrag:graphragpass@localhost:5432/graphrag_db'

# Instantiate vector store using factory
vector_store = VectorStoreFactory.for_vector_store(postgre_connection_info)

# Optional: confirm
print(f"Vector store initialized: {vector_store}")

Vector store initialized: indexes={'chunk': PGIndex(index_name='chunk', tenant_id=TenantId(value=None), writeable=True, database='graphrag_db', schema_name='graphrag', host='localhost', port=5432, username='graphrag', password='graphragpass', dimensions=1536, embed_model=BedrockEmbedding(model_name='amazon.titan-embed-text-v1', embed_batch_size=10, callback_manager=<llama_index.core.callbacks.base.CallbackManager object at 0x78e054c5b890>, num_workers=None, profile_name='padmin', aws_access_key_id=None, aws_secret_access_key=None, aws_session_token=None, region_name=None, botocore_session=None, botocore_config=None, max_retries=10, timeout=60.0, additional_kwargs={}), enable_iam_db_auth=False, initialized=False), 'statement': PGIndex(index_name='statement', tenant_id=TenantId(value=None), writeable=True, database='graphrag_db', schema_name='graphrag', host='localhost', port=5432, username='graphrag', password='graphragpass', dimensions=1536, embed_model=BedrockEmbedding(model_name='ama

### Setup connection with FalkorDB

In [33]:
!pip install https://github.com/awslabs/graphrag-toolkit/archive/refs/tags/v3.4.0.zip#subdirectory=lexical-graph-contrib/falkordb

Collecting https://github.com/awslabs/graphrag-toolkit/archive/refs/tags/v3.4.0.zip#subdirectory=lexical-graph-contrib/falkordb
  Using cached https://github.com/awslabs/graphrag-toolkit/archive/refs/tags/v3.4.0.zip
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone


In [34]:
# Connect to FalkorDB Graph Store
from graphrag_toolkit.lexical_graph.storage import GraphStoreFactory
from graphrag_toolkit.lexical_graph.storage.graph.falkordb import FalkorDBGraphStoreFactory

# Connection string for FalkorDB
falkordb_connection_info = 'falkordb://localhost:6379'

# Register the FalkorDB backend with the factory
GraphStoreFactory.register(FalkorDBGraphStoreFactory)

# Instantiate a graph store using the factory
graph_store = GraphStoreFactory.for_graph_store(falkordb_connection_info)

# Optional: confirm initialization
print(f"FalkorDB GraphStore initialized: {graph_store}")

FalkorDB GraphStore initialized: log_formatting=RedactedGraphQueryLogFormatting() tenant_id=TenantId(value=None) endpoint_url='localhost:6379' database='graphrag' username=None password=None ssl=False


In [40]:
%reload_ext dotenv
%dotenv

import os

from graphrag_toolkit.lexical_graph import set_logging_config
from graphrag_toolkit.lexical_graph import LexicalGraphQueryEngine
from graphrag_toolkit.lexical_graph.storage import GraphStoreFactory
from graphrag_toolkit.lexical_graph.storage import VectorStoreFactory

import nest_asyncio
nest_asyncio.apply()

# 🛠️ MUST: Setup the embeddings before anything else
setup_graphrag_config()

set_logging_config('INFO')


# Now create query engine
query_engine = LexicalGraphQueryEngine.for_traversal_based_search(
    graph_store,
    vector_store
)

# Now you can query
response = query_engine.query("What are the differences between Neptune Database and Neptune Analytics?")
print(response.response)


cannot find .env file
[DEBUG] Requesting BedrockEmbedding with model: amazon.titan-embed-text-v1
[CONFIRM] embed_model model name = amazon.titan-embed-text-v1
[CONFIRM] embed_model dimension  = 1536


TypeError: the JSON object must be str, bytes or bytearray, not dict

In [None]:
for n in response.source_nodes:
    print(n.text)

In [None]:
for n in response.source_nodes:
    print(n.metadata)

#### Set subretriever

In the example below, the `TraversalBasedRetriever` is configured with a `ChunkBasedSearch` subretriever.

In [None]:
%reload_ext dotenv
%dotenv

import os

from graphrag_toolkit.lexical_graph import LexicalGraphQueryEngine
from graphrag_toolkit.lexical_graph.storage import GraphStoreFactory
from graphrag_toolkit.lexical_graph.storage import VectorStoreFactory
from graphrag_toolkit.lexical_graph.retrieval.retrievers import ChunkBasedSearch

import nest_asyncio
nest_asyncio.apply()

graph_store = GraphStoreFactory.for_graph_store(os.environ['GRAPH_STORE'])
vector_store = VectorStoreFactory.for_vector_store(os.environ['VECTOR_STORE'])

query_engine = LexicalGraphQueryEngine.for_traversal_based_search(
    graph_store, 
    vector_store,
    retrievers=[ChunkBasedSearch]
)

response = query_engine.query("What are the differences between Neptune Database and Neptune Analytics?")

print(response.response)

### SemanticGuidedRetriever

See [SemanticGuidedRetriever](https://github.com/awslabs/graphrag-toolkit/blob/main/docs/lexical-graph/querying.md#semanticguidedretriever).

In [None]:
%reload_ext dotenv
%dotenv

import os

from graphrag_toolkit.lexical_graph import LexicalGraphQueryEngine
from graphrag_toolkit.lexical_graph.storage import GraphStoreFactory
from graphrag_toolkit.lexical_graph.storage import VectorStoreFactory

import nest_asyncio
nest_asyncio.apply()

graph_store = GraphStoreFactory.for_graph_store(os.environ['GRAPH_STORE'])
vector_store = VectorStoreFactory.for_vector_store(os.environ['VECTOR_STORE'])

query_engine = LexicalGraphQueryEngine.for_semantic_guided_search(
    graph_store, 
    vector_store
)

response = query_engine.query("What are the differences between Neptune Database and Neptune Analytics?")

print(response.response)

#### Set subretrievers

In [None]:
%reload_ext dotenv
%dotenv

import os

from graphrag_toolkit.lexical_graph import LexicalGraphQueryEngine
from graphrag_toolkit.lexical_graph.storage import GraphStoreFactory
from graphrag_toolkit.lexical_graph.storage import VectorStoreFactory
from graphrag_toolkit.lexical_graph.retrieval.retrievers import StatementCosineSimilaritySearch, KeywordRankingSearch, SemanticBeamGraphSearch

import nest_asyncio
nest_asyncio.apply()

graph_store = GraphStoreFactory.for_graph_store(os.environ['GRAPH_STORE'])
vector_store = VectorStoreFactory.for_vector_store(os.environ['VECTOR_STORE'])

query_engine = LexicalGraphQueryEngine.for_semantic_guided_search(
    graph_store, 
    vector_store,
    retrievers=[
        StatementCosineSimilaritySearch, 
        KeywordRankingSearch, 
        SemanticBeamGraphSearch
    ]
)

response = query_engine.query("What are the differences between Neptune Database and Neptune Analytics?")

print(response.response)

#### Reranking beam search (CPU)

The example below uses a `SentenceReranker` with a `RerankingBeamGraphSearch` to rerank statements while conducting the beam search.

In [None]:
%reload_ext dotenv
%dotenv

import os

from graphrag_toolkit.lexical_graph import LexicalGraphQueryEngine
from graphrag_toolkit.lexical_graph.storage import GraphStoreFactory
from graphrag_toolkit.lexical_graph.storage import VectorStoreFactory
from graphrag_toolkit.lexical_graph.retrieval.retrievers import RerankingBeamGraphSearch, StatementCosineSimilaritySearch, KeywordRankingSearch
from graphrag_toolkit.lexical_graph.retrieval.post_processors import SentenceReranker

import nest_asyncio
nest_asyncio.apply()

graph_store = GraphStoreFactory.for_graph_store(os.environ['GRAPH_STORE'])
vector_store = VectorStoreFactory.for_vector_store(os.environ['VECTOR_STORE'])

cosine_retriever = StatementCosineSimilaritySearch(
    vector_store=vector_store,
    graph_store=graph_store,
    top_k=50
)

keyword_retriever = KeywordRankingSearch(
    vector_store=vector_store,
    graph_store=graph_store,
    max_keywords=10
)

reranker = SentenceReranker(
    batch_size=128
)

beam_retriever = RerankingBeamGraphSearch(
    vector_store=vector_store,
    graph_store=graph_store,
    reranker=reranker,
    initial_retrievers=[cosine_retriever, keyword_retriever],
    max_depth=8,
    beam_width=100
)

query_engine = LexicalGraphQueryEngine.for_semantic_guided_search(
    graph_store, 
    vector_store,
    retrievers=[
        cosine_retriever,
        keyword_retriever,
        beam_retriever
    ]
)

response = query_engine.query("What are the differences between Neptune Database and Neptune Analytics?")

print(response.response)

#### Reranking beam search (GPU)

The example below uses a `BGEReranker` with a `RerankingBeamGraphSearch` to rerank statements while conducting the beam search.

There will be a delay the first time this runs while the reranker downloads tensors.

In [None]:
%reload_ext dotenv
%dotenv

import os

from graphrag_toolkit.lexical_graph import LexicalGraphQueryEngine
from graphrag_toolkit.lexical_graph.storage import GraphStoreFactory
from graphrag_toolkit.lexical_graph.storage import VectorStoreFactory
from graphrag_toolkit.lexical_graph.retrieval.retrievers import RerankingBeamGraphSearch, StatementCosineSimilaritySearch, KeywordRankingSearch
from graphrag_toolkit.lexical_graph.retrieval.post_processors.bge_reranker import BGEReranker

import nest_asyncio
nest_asyncio.apply()

graph_store = GraphStoreFactory.for_graph_store(os.environ['GRAPH_STORE'])
vector_store = VectorStoreFactory.for_vector_store(os.environ['VECTOR_STORE'])

cosine_retriever = StatementCosineSimilaritySearch(
    vector_store=vector_store,
    graph_store=graph_store,
    top_k=50
)

keyword_retriever = KeywordRankingSearch(
    vector_store=vector_store,
    graph_store=graph_store,
    max_keywords=10
)

reranker = BGEReranker(
    gpu_id=0,
    batch_size=128
)

beam_retriever = RerankingBeamGraphSearch(
    vector_store=vector_store,
    graph_store=graph_store,
    reranker=reranker,
    initial_retrievers=[cosine_retriever, keyword_retriever],
    max_depth=8,
    beam_width=100
)

query_engine = LexicalGraphQueryEngine.for_semantic_guided_search(
    graph_store, 
    vector_store,
    retrievers=[
        cosine_retriever,
        keyword_retriever,
        beam_retriever
    ]
)

response = query_engine.query("What are the differences between Neptune Database and Neptune Analytics?")

print(response.response)

#### Post-processors 

The example below uses a `StatementDiversityPostProcessor`, `SentenceReranker` and `StatementEnhancementPostProcessor`.

  - `SentenceReranker` - Reranks statements using the `mixedbread-ai/mxbai-rerank-xsmall-v1` model. 

  - `StatementEnhancementPostProcessor` - Enhances statements by using chunk context and an LLM to improve content while preserving original metadata.

  - `StatementDiversityPostProcessor` - Removes similar statements using TF-IDF similarity with a default threshold of 0.975 to ensure diversity in the processed nodes.

Before running `StatementDiversityPostProcessor` for the first time, load the following package:

```
python -m spacy download en_core_web_sm
```

If you're running on a GPU device, you can replace the `SentenceReranker` with a `BGEReranker`, which reranks statements using the ``BAAI/bge-reranker-v2-minicpm-layerwise`` model.

In [None]:
!python -m spacy download en_core_web_sm

In [None]:
%reload_ext dotenv
%dotenv

from graphrag_toolkit.lexical_graph import LexicalGraphQueryEngine
from graphrag_toolkit.lexical_graph.storage import GraphStoreFactory
from graphrag_toolkit.lexical_graph.storage import VectorStoreFactory
from graphrag_toolkit.lexical_graph.retrieval.post_processors import SentenceReranker, StatementDiversityPostProcessor, StatementEnhancementPostProcessor
import os

import nest_asyncio
nest_asyncio.apply()

graph_store = GraphStoreFactory.for_graph_store(os.environ['GRAPH_STORE'])
vector_store = VectorStoreFactory.for_vector_store(os.environ['VECTOR_STORE'])

query_engine = LexicalGraphQueryEngine.for_semantic_guided_search(
    graph_store, 
    vector_store,
    post_processors=[
        SentenceReranker(), 
        StatementDiversityPostProcessor(), 
        StatementEnhancementPostProcessor()
    ]
)

response = query_engine.query("What are the differences between Neptune Database and Neptune Analytics?")

print(response.response)