In [1]:
import os

import pandas as pd
import tiktoken

from graphrag.query.indexer_adapters import read_indexer_entities, read_indexer_reports
from graphrag.query.llm.oai.chat_openai import ChatOpenAI
from graphrag.query.llm.oai.typing import OpenaiApiType
from graphrag.query.structured_search.global_search.community_context import (
    GlobalCommunityContext,
)
from graphrag.query.structured_search.global_search.search import GlobalSearch

In [2]:
INPUT_DIR = "./ragtest/output"
LANCEDB_URI = f"{INPUT_DIR}/lancedb"

COMMUNITY_REPORT_TABLE = "create_final_community_reports"
ENTITY_TABLE = "create_final_nodes"
ENTITY_EMBEDDING_TABLE = "create_final_entities"
RELATIONSHIP_TABLE = "create_final_relationships"
COVARIATE_TABLE = "create_final_covariates"
TEXT_UNIT_TABLE = "create_final_text_units"
COMMUNITY_LEVEL = 2

In [3]:
entity_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_TABLE}.parquet")
report_df = pd.read_parquet(f"{INPUT_DIR}/{COMMUNITY_REPORT_TABLE}.parquet")
entity_embedding_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_EMBEDDING_TABLE}.parquet")

reports = read_indexer_reports(report_df, entity_df, COMMUNITY_LEVEL)
entities = read_indexer_entities(entity_df, entity_embedding_df, COMMUNITY_LEVEL)
print(f"Total report count: {len(report_df)}")
print(
    f"Report count after filtering by community level {COMMUNITY_LEVEL}: {len(reports)}"
)
report_df.head()

Total report count: 25
Report count after filtering by community level 2: 19


Unnamed: 0,community,full_content,level,rank,title,rank_explanation,summary,findings,full_content_json,id
0,10,# CM_ATTEN_CHAR.RSP and TT_MATCH_RESPONSE_TIME...,1,9.0,CM_ATTEN_CHAR.RSP and TT_MATCH_RESPONSE_TIMER ...,The rating is high due to the critical role th...,The community revolves around the CM_ATTEN_CHA...,[{'explanation': 'The CM_ATTEN_CHAR.RSP messag...,"{\n ""title"": ""CM_ATTEN_CHAR.RSP and TT_MATC...",9261c28a-60ff-4730-886c-8be1f32eb54d
1,11,# CM_SLAC_PARM Communication and Test Cases\n\...,1,9.0,CM_SLAC_PARM Communication and Test Cases,The impact rating is high due to the critical ...,The community revolves around the CM_SLAC_PARM...,[{'explanation': 'CM_SLAC_PARM.CNF is a confir...,"{\n ""title"": ""CM_SLAC_PARM Communication an...",42c5cffb-7c8e-42cd-a2ac-c3112238b478
2,12,# ISO 15118-3:2015(E) and HomePlug Green PHY\n...,1,9.0,ISO 15118-3:2015(E) and HomePlug Green PHY,The rating is high due to the critical nature ...,The community revolves around the ISO 15118-3:...,[{'explanation': 'ISO 15118-3:2015(E) is the c...,"{\n ""title"": ""ISO 15118-3:2015(E) and HomeP...",8e120bba-dd68-422f-90ee-29b3070792ee
3,13,# ISO 15118-3:2015(E) and EVSE Communication N...,1,9.0,ISO 15118-3:2015(E) and EVSE Communication Net...,The rating is high due to the critical nature ...,The community revolves around the ISO 15118-3:...,[{'explanation': 'The SLAC Validation Process ...,"{\n ""title"": ""ISO 15118-3:2015(E) and EVSE ...",64b818cd-d2cd-420e-9985-2a7bb17c981a
4,14,# Electric Vehicle Communication Interfaces un...,1,9.0,Electric Vehicle Communication Interfaces unde...,The rating is high due to the critical importa...,The community revolves around the Electric Veh...,[{'explanation': 'The Electric Vehicle (EV) is...,"{\n ""title"": ""Electric Vehicle Communicatio...",a059abe2-9fd9-438b-b5c9-1c39f1747776


In [4]:
import tiktoken
from graphrag.query.llm.oai.chat_openai import ChatOpenAI
from graphrag.query.llm.oai.embedding import OpenAIEmbedding
from graphrag.query.llm.oai.typing import OpenaiApiType

# Directly assign values for API key, model, and embedding model
api_key = "375b0d5aca534b3c8b73cd797c3d56fe"
llm_model = "gpt-4o-2024-05-13"  # Example model name
embedding_model = "text-embedding-ada-002"  # Example embedding model

azure_llm_api_base = "https://test-text-embedding.openai.azure.com/openai/deployments/gpt-4o-2024-05-13/chat/completions?api-version=2023-03-15-preview"  # Azure API Base
azure_enc_api_base = "https://test-text-embedding.openai.azure.com/openai/deployments/text-embedding-ada-002/embeddings?api-version=2023-05-15"

azure_llm_deployment_name = "gpt-4o-2024-05-13"  # Deployment name for your model
azure_enc_deployment_name = "text-embedding-ada-002"  # Deployment name for your model


In [5]:
llm = ChatOpenAI(
    api_key=api_key,
    model=llm_model,
    api_type=OpenaiApiType.AzureOpenAI,  # OpenaiApiType.OpenAI or OpenaiApiType.AzureOpenAI
    max_retries=20,
    api_base = azure_llm_api_base,
    api_version='2024-02-15-preview'
)

token_encoder = tiktoken.get_encoding("cl100k_base")

text_embedder = OpenAIEmbedding(
    api_key=api_key,
    api_base=azure_enc_api_base,
    api_type=OpenaiApiType.AzureOpenAI,
    model=embedding_model,
    deployment_name=embedding_model,
    max_retries=20,
    api_version='2024-02-15-preview'
)

In [6]:
context_builder = GlobalCommunityContext(
    community_reports=reports,
    entities=entities,  # default to None if you don't want to use community weights for ranking
    token_encoder=token_encoder,
)

In [7]:
context_builder_params = {
    "use_community_summary": False,  # False means using full community reports. True means using community short summaries.
    "shuffle_data": True,
    "include_community_rank": True,
    "min_community_rank": 0,
    "community_rank_name": "rank",
    "include_community_weight": True,
    "community_weight_name": "occurrence weight",
    "normalize_community_weight": True,
    "max_tokens": 12_000,  # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 5000)
    "context_name": "Reports",
}

map_llm_params = {
    "max_tokens": 1000,
    "temperature": 0.0,
    "response_format": {"type": "json_object"},
}

reduce_llm_params = {
    "max_tokens": 2000,  # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 1000-1500)
    "temperature": 0.0,
}

In [9]:
search_engine = GlobalSearch(
    llm=llm,
    context_builder=context_builder,
    token_encoder=token_encoder,
    max_data_tokens=12_000,  # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 5000)
    map_llm_params=map_llm_params,
    reduce_llm_params=reduce_llm_params,
    allow_general_knowledge=False,  # set this to True will add instruction to encourage the LLM to incorporate general knowledge in the response, which may increase hallucinations, but could be useful in some use cases.
    json_mode=True,  # set this to False if your LLM model does not support JSON mode.
    context_builder_params=context_builder_params,
    concurrent_coroutines=32,
    response_type="multiple paragraphs",  # free form text describing the response type and format, can be anything, e.g. prioritized list, single paragraph, multiple paragraphs, multiple-page report
)

In [10]:
result = await search_engine.asearch(
    "what is CM_SLAC_PARM.REQ?"
)

print(result.response)

### Understanding CM_SLAC_PARM.REQ

The **CM_SLAC_PARM.REQ** is a crucial request message used within the communication process, particularly in the context of electric vehicle (EV) charging systems. This message is integral to initiating the communication sequence between the EV and the charging station, ensuring that the necessary parameters are requested from the vehicle.

### Role in Communication

The primary function of the CM_SLAC_PARM.REQ message is to facilitate the proper functioning of the communication interface as per the ISO 15118 standards. ISO 15118 is an international standard that defines the communication protocol between electric vehicles and charging stations, ensuring interoperability and efficient energy transfer.

### Importance

By initiating the communication sequence, the CM_SLAC_PARM.REQ message ensures that all required parameters are correctly requested and exchanged. This step is essential for the seamless operation of the EV charging process, enabling th