In [None]:
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License.

In [1]:
import os

import pandas as pd
import tiktoken

from graphrag.config.enums import ModelType
from graphrag.config.models.language_model_config import LanguageModelConfig
from graphrag.language_model.manager import ModelManager
from graphrag.query.indexer_adapters import (
    read_indexer_communities,
    read_indexer_entities,
    read_indexer_reports,
)
from graphrag.query.structured_search.global_search.community_context import (
    GlobalCommunityContext,
)
from graphrag.query.structured_search.global_search.search import GlobalSearch

## Global Search example

Global search method generates answers by searching over all AI-generated community reports in a map-reduce fashion. This is a resource-intensive method, but often gives good responses for questions that require an understanding of the dataset as a whole (e.g. What are the most significant values of the herbs mentioned in this notebook?).

### LLM setup

In [None]:


#llm_model = "gpt-4o"
config = LanguageModelConfig(
    #api_key=
    #type=ModelType.AzureOpenAIChat,
    #model=llm_model,                      
    #deployment_name=llm_model,
    #max_retries=20,
    #api_base=
    #api_version="2024-12-01-preview"
)
model = ModelManager().get_or_create_chat_model(
    #name="global_search",
    #model_type=
    #config=config,
)

token_encoder = tiktoken.encoding_for_model(llm_model)

### Load community reports as context for global search

- Load all community reports in the `community_reports` table from GraphRAG, to be used as context data for global search.
- Load entities from the `entities` tables from GraphRAG, to be used for calculating community weights for context ranking. Note that this is optional (if no entities are provided, we will not calculate community weights and only use the rank attribute in the community reports table for context ranking)
- Load all communities in the `communities` table from the GraphRAG, to be used to reconstruct the community graph hierarchy for dynamic community selection.

In [3]:
# parquet files generated from indexing pipeline
INPUT_DIR = r"C:\Users\v-chandanap\Desktop\GraphRAGAPI\input"


COMMUNITY_REPORT_TABLE = "community_reports"
ENTITY_TABLE = "entities"
COMMUNITY_TABLE = "communities"

COMMUNITY_LEVEL = 2

In [4]:
entity_df = pd.read_parquet(r'C:\Users\v-chandanap\Desktop\GraphRAGAPI\output\entities.parquet')
community_df = pd.read_parquet(r'C:\Users\v-chandanap\Desktop\GraphRAGAPI\output\communities.parquet')
report_df = pd.read_parquet(r'C:\Users\v-chandanap\Desktop\GraphRAGAPI\output\community_reports.parquet')
communities = read_indexer_communities(community_df, report_df)
reports = read_indexer_reports(report_df, community_df, COMMUNITY_LEVEL)
entities = read_indexer_entities(entity_df, community_df, COMMUNITY_LEVEL)
print(f"Total report count: {len(report_df)}")
print(
    f"Report count after filtering by community level {COMMUNITY_LEVEL}: {len(reports)}"
)

report_df.head()

Total report count: 12
Report count after filtering by community level 2: 12


Unnamed: 0,id,human_readable_id,community,level,parent,children,title,summary,full_content,rank,rating_explanation,findings,full_content_json,period,size
0,1e420d26dd8449c893d51d7b3bf2ba43,6,6,1,3,[],Affine Employee Leave Policies,"The community centers around Affine, a company...",# Affine Employee Leave Policies\n\nThe commun...,6.5,The impact severity rating is moderate due to ...,[{'explanation': 'Affine offers a wide range o...,"{\n ""title"": ""Affine Employee Leave Policie...",2025-07-28,7
1,2ec6587c87d8415691eacd54494887fd,7,7,1,3,[],Affine's Long Leave Policy and Sabbatical Stru...,The community is centered around Affine's Long...,# Affine's Long Leave Policy and Sabbatical St...,4.5,The impact severity rating is moderate due to ...,[{'explanation': 'Affine's Long Leave Policy i...,"{\n ""title"": ""Affine's Long Leave Policy an...",2025-07-28,3
2,41fb1bcc24934e30ac1487a33a795fc5,8,8,1,3,[],Affine Leave Management and Policy Structure,The community centers around Affine's leave ma...,# Affine Leave Management and Policy Structure...,7.5,The impact severity rating is high due to the ...,[{'explanation': 'The Caregiver Leave policy a...,"{\n ""title"": ""Affine Leave Management and P...",2025-07-28,5
3,21722383bebc4f838831cc21162e63a7,9,9,1,5,[],Affine Inc. and Employee Leave Policies,"Affine Inc., located in Bellevue, Washington, ...",# Affine Inc. and Employee Leave Policies\n\nA...,6.5,The impact severity rating is moderate due to ...,[{'explanation': 'Affine Inc. offers a wide ra...,"{\n ""title"": ""Affine Inc. and Employee Leav...",2025-07-28,10
4,4ad6ff858ac542bda74f4454732fbe48,10,10,1,5,[],Affine Inc. Leave Policies and Employee Manage...,The community centers around Affine Inc.'s lea...,# Affine Inc. Leave Policies and Employee Mana...,6.5,The impact severity rating is moderate to high...,[{'explanation': 'Affine Inc. structures its l...,"{\n ""title"": ""Affine Inc. Leave Policies an...",2025-07-28,3


#### Build global context based on community reports

In [5]:
context_builder = GlobalCommunityContext(
    community_reports=reports,
    communities=communities,
    entities=entities,  # default to None if you don't want to use community weights for ranking
    token_encoder=token_encoder,
)

#### Perform global search

In [6]:
context_builder_params = {
    "use_community_summary": False,  # False means using full community reports. True means using community short summaries.
    "shuffle_data": True,
    "include_community_rank": True,
    "min_community_rank": 0,
    "community_rank_name": "rank",
    "include_community_weight": True,
    "community_weight_name": "occurrence weight",
    "normalize_community_weight": True,
    "max_tokens": 12_000,  # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 5000)
    "context_name": "Reports",
}

map_llm_params = {
    "max_tokens": 1000,
    "temperature": 0.0,
    "response_format": {"type": "json_object"},
}

reduce_llm_params = {
    "max_tokens": 2000,  # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 1000-1500)
    "temperature": 0.0,
}

In [7]:
search_engine = GlobalSearch(
    model=model,
    context_builder=context_builder,
    token_encoder=token_encoder,
    max_data_tokens=12_000,  # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 5000)
    map_llm_params=map_llm_params,
    reduce_llm_params=reduce_llm_params,
    allow_general_knowledge=False,  # set this to True will add instruction to encourage the LLM to incorporate general knowledge in the response, which may increase hallucinations, but could be useful in some use cases.
    json_mode=True,  # set this to False if your LLM model does not support JSON mode.
    context_builder_params=context_builder_params,
    concurrent_coroutines=32,
    response_type="multiple paragraphs",  # free form text describing the response type and format, can be anything, e.g. prioritized list, single paragraph, multiple paragraphs, multiple-page report
)

In [10]:
result = await search_engine.search("What are the different types of leaves offered at Affine and their respective eligibility criteria?")

print(result.response)

### Overview of Leave Policies at Affine

Affine offers a comprehensive range of leave options designed to address diverse employee needs and promote a healthy work-life balance. These policies are structured around the fiscal year, which runs from April 1st to March 31st, ensuring consistency in leave calculations and grants [Data: Reports (3, 5, 6, 9, 10, +more)].

Below is a detailed breakdown of the types of leaves offered and their respective eligibility criteria:

---

### **1. Earned Leaves**
- **Eligibility**: Employees accrue earned leaves on a quarterly basis, with a maximum accumulation of 20 days per year. 
- **Usage**: These leaves must be utilized within the fiscal year to prevent accumulation beyond the annual limit [Data: Reports (0, 10)].

---

### **2. Sick and Casual Leaves**
- **Purpose**: These leaves are intended for managing short-term health issues or personal matters.
- **Eligibility**: Available to all employees as part of Affine's commitment to employee well-

In [11]:
# inspect the data used to build the context for the LLM responses
result.context_data["reports"]

Unnamed: 0,id,title,occurrence weight,content,rank
0,3,Affine and Its Comprehensive Leave Policies,1.0,# Affine and Its Comprehensive Leave Policies\...,7.5
1,5,Affine Inc. and Its Comprehensive Leave Policies,0.9,# Affine Inc. and Its Comprehensive Leave Poli...,7.5
2,2,Affine Leave Management and HR Structure,0.85,# Affine Leave Management and HR Structure\n\n...,7.5
3,6,Affine Employee Leave Policies,0.85,# Affine Employee Leave Policies\n\nThe commun...,6.5
4,9,Affine Inc. and Employee Leave Policies,0.75,# Affine Inc. and Employee Leave Policies\n\nA...,6.5
5,0,Affine Analytics and Global Operations,0.6,# Affine Analytics and Global Operations\n\nTh...,7.5
6,8,Affine Leave Management and Policy Structure,0.45,# Affine Leave Management and Policy Structure...,7.5
7,4,Affine Inc. and Affine Analytics Pvt. Ltd. Lea...,0.35,# Affine Inc. and Affine Analytics Pvt. Ltd. L...,7.5
8,10,Affine Inc. Leave Policies and Employee Manage...,0.3,# Affine Inc. Leave Policies and Employee Mana...,6.5
9,1,Affine Project Management and Delivery Coordin...,0.1,# Affine Project Management and Delivery Coord...,6.5


In [13]:
print(result.context_data["reports"].at[0,'content'])

# Affine and Its Comprehensive Leave Policies

The community centers around Affine, a company dedicated to employee well-being through its comprehensive leave policies. Key entities include various leave types such as Caregiver Leave, Maternity/Paternity Leave, and COVID-related leave, all supported by Affine's HR team and management structure. The relationships between these entities highlight Affine's commitment to fostering a supportive work environment, with a focus on maintaining a healthy work-life balance for its employees.

## Affine's Commitment to Employee Well-being

Affine is deeply committed to the well-being of its employees, as evidenced by its comprehensive leave policies. These policies are designed to accommodate the diverse needs of its workforce, offering a variety of leave options such as Earned Leaves, Sick and Casual Leaves, and Public Holidays. The company's structured approach ensures that employees have the necessary time off to recharge and manage personal co

In [11]:
result = await search_engine.search("What is the eligibility for sabbatical leave?")

print(result.response)

# inspect the data used to build the context for the LLM responses
result.context_data["reports"]

# inspect number of LLM calls and tokens
print(
    f"LLM calls: {result.llm_calls}. Prompt tokens: {result.prompt_tokens}. Output tokens: {result.output_tokens}."
)

I am sorry but I am unable to answer this question given the provided data.
LLM calls: 1. Prompt tokens: 1670. Output tokens: 46.
