# Imports and load data

In [6]:
from dotenv import load_dotenv
from rag_utils import setup_rag_embeddings, build_rag_chain
from ragas import SingleTurnSample, EvaluationDataset, evaluate
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper
from ragas.metrics import Faithfulness, FactualCorrectness
from gen_ai_hub.proxy.langchain.openai import ChatOpenAI
from gen_ai_hub.proxy.core.proxy_clients import get_proxy_client
from langchain_openai import OpenAIEmbeddings

import asyncio
import pandas as pd
import os
import json

load_dotenv(override=True)

model_name = "gpt-4o"
DATA_DIR = "test/"
DATASET = "synthetic_data_big_context_test_citet.json"
proxy_client = get_proxy_client('gen-ai-hub')
llm_sap = ChatOpenAI(proxy_model_name=model_name, proxy_client=proxy_client)
#embeddings_model = OpenAIEmbeddings(proxy_model_name='text-embedding-ada-002', proxy_client=proxy_client)
embeddings_model_own = OpenAIEmbeddings(model="text-embedding-3-small")



In [None]:
# only for parsing the pdf reports to markdown for the first time
"""
for file in os.listdir(DATA_DIR):
    if file.endswith(".pdf"):
        try:
            print(f"Converting {file} to markdown")
            md_text = LlamaParse(
                result_type="markdown", 
                verbose=True,
                #use_vendor_multimodal_model=True,
                #vendor_multimodal_model_name="openai-gpt-4o-mini",
                #vendor_multimodal_api_key=os.getenv("OPENAI_API_KEY"),
                language="en",
                numWorkers=5).load_data(DATA_DIR + file)
            combined_md_text = "\n\n".join([doc.text for doc in md_text])
            md_file_path = DATA_DIR + file.replace(".pdf", ".md")
            print(f"Saving markdown to {md_file_path}")
            with open(md_file_path, "w", encoding="utf-8") as f:
                f.write(combined_md_text)
            print(f"Successfully converted {file}")
        except Exception as e:
            print(f"Error converting {file}: {e}")
"""

# RAG

In [26]:
#setup embeddings and chain
retriever = setup_rag_embeddings(data_dir=DATA_DIR, faiss_path="faiss_big_context", embeddings_model=embeddings_model_own)
rag_chain = build_rag_chain(retriever=retriever, llm=llm_sap)

Loading existing FAISS index from faiss_big_context...




prompt input_variables=['context', 'question'] input_types={} partial_variables={} metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'} messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})]


Map generated questions and ground_truths into evaluation dataset


In [27]:

with open(DATASET, 'r') as f:
    synthetic_data = json.load(f)

if isinstance(synthetic_data, dict) and 'responses' in synthetic_data:
    synthetic_data = synthetic_data['responses']

queries = [item.get('question', "") for item in synthetic_data]
ground_truths = [item.get('ground_truth', "") for item in synthetic_data]
contexts = [item.get('context', "") for item in synthetic_data]

answers = []
retrieved_contexts = []

for query in queries:
    answer = rag_chain.invoke(query)
    answers.append(answer)
    print("Query: ",query)
    print("Anwer: ",answer)
    retrieved_context = [doc.page_content for doc in retriever.invoke(query)]
    retrieved_contexts.append(retrieved_context)
    print("Retrieved context:",retrieved_context)


Query:  How can SAP leverage its human resource management practices to enhance corporate sustainability and simultaneously boost financial performance in a regional context like Podlaskie Voivodeship?
Anwer:  SAP can leverage its human resource management practices to enhance corporate sustainability in Podlaskie Voivodeship by implementing sustainable HRM strategies that integrate economic, ecological, and social goals, thus balancing inter-generational needs. This approach can help improve employee recruitment and retention while aligning with regional sustainability priorities, ultimately boosting financial performance. Additionally, focusing on local ecological and ethnic diversity can further tailor these practices to the regional context.
Retrieved context: ['Sustainable HRM in Polish companies. Additionally, there is a lack of research contribution on the Podlasie region—ecologically and ethnically the most diverse region in the country. While sustainability in environmental te

In [28]:
# populate eval dataset

evaluation_samples = []

for query, answer, retrieved_context, ground_truth in zip(queries, answers, retrieved_contexts, ground_truths):
    sample = SingleTurnSample(
        user_input=query,
        response=answer,
        reference=ground_truth,
        retrieved_contexts=retrieved_context
    )
    evaluation_samples.append(sample)

evaluation_dataset = EvaluationDataset(samples=evaluation_samples)



In [None]:
print(evaluation_dataset.samples)
print(type(evaluation_dataset.samples))
print(type(evaluation_dataset.samples[0]))
print(dir(evaluation_dataset.samples[0]))

[SingleTurnSample(user_input='How can SAP leverage its human resource management practices to enhance corporate sustainability and simultaneously boost financial performance in a regional context like Podlaskie Voivodeship?', retrieved_contexts=['Sustainable HRM in Polish companies. Additionally, there is a lack of research contribution on the Podlasie region—ecologically and ethnically the most diverse region in the country. While sustainability in environmental terms is evident in the region, the article focuses on sustainability implementation in the business sector. The article helps to close the research gap. Its aim is to examine', '# Keywords\n\nSustainable HRM; corporate sustainability; manager’s opinion; Podlasie enterprises\n\n# 1. Introduction', 'In Poland, the definition of Sustainable HRM was presented by A. Pabian, who claims “the essence of this concept is planning and recruiting employees and affecting them in such a way that they achieve economic, ecological and social

In [30]:

evaluator_llm = LangchainLLMWrapper(llm_sap)
evaluator_embeddings = LangchainEmbeddingsWrapper(OpenAIEmbeddings())

metrics = [
    FactualCorrectness(llm=evaluator_llm, mode="precision", name="FactualCorrectness_Precision"),
    FactualCorrectness(llm=evaluator_llm, mode="recall", name="FactualCorrectness_Recall"),
    FactualCorrectness(llm=evaluator_llm, mode="f1", name="FactualCorrectness_F1"),
    #FactualCorrectnessAllMetrics(llm=evaluator_llm, name="FactualCorrectness_All"),
    Faithfulness(llm=evaluator_llm), 
 
]
results = evaluate(dataset=evaluation_dataset, metrics=metrics)
df_rag = results.to_pandas()

# factual_correctness = df['FactualCorrectness_All'].apply(pd.Series)
# df = pd.concat([df.drop(['FactualCorrectness_All'], axis=1), factual_correctness], axis=1)

# print(df[['FactualCorrectness_Precision', 'FactualCorrectness_Recall', 'FactualCorrectness_F1', 'Faithfulness']])



Evaluating: 100%|██████████| 20/20 [00:38<00:00,  1.93s/it]


# Results

In [34]:
pd.set_option('display.max_colwidth', 200) # default is 50 / None for unlimited

df_rag

# context recall = measures how many of the relevant documents (or pieces of information) were successfully retrieved
# factual correctness (precision) = proportion of correct claims made that also found in the reference / how many are correct 
# factual correctness (recall) = proportion of facts in the reference that are also present in the response / how many are found
# faithfulness = are the claims made in the response supported by the retrieved context
# semantic similarity = how similar the response is to the ground truth
 


Unnamed: 0,user_input,retrieved_contexts,response,reference,FactualCorrectness_Precision,FactualCorrectness_Recall,FactualCorrectness_F1,faithfulness
0,How can SAP leverage its human resource management practices to enhance corporate sustainability and simultaneously boost financial performance in a regional context like Podlaskie Voivodeship?,"[Sustainable HRM in Polish companies. Additionally, there is a lack of research contribution on the Podlasie region—ecologically and ethnically the most diverse region in the country. While sustai...","SAP can leverage its human resource management practices to enhance corporate sustainability in Podlaskie Voivodeship by implementing sustainable HRM strategies that integrate economic, ecological...","SAP can leverage its HRM practices by implementing a Sustainable HRM framework that focuses on ecological, social, and strategic aspects. By promoting pro-environmental behaviors, enhancing divers...",0.57,0.33,0.53,0.0
1,What role can SAP play in developing green product psychological ownership among its employees to reduce green confusion and enhance environmental performance?,"[# Keywords\n\ngreen shared vision; psychological ownership theory; green product psychological ownership; employee green confusion; employee environmental performance\n\n# 1. Introduction, produc...",SAP can play a role in developing green product psychological ownership among its employees by formulating a clear green shared vision and obtaining employee recognition to encourage environmental...,SAP can play a pivotal role in developing green product psychological ownership by fostering a shared green vision and providing training that emphasizes the importance of sustainability in produc...,0.67,0.5,0.57,0.375
2,"In what ways can SAP's corporate sustainability practices be tailored to align with the financial perceptions and expectations of Chinese stakeholders, especially in relation to the GRI Sustainabi...","[In China, the GRI (2013) guidelines are used as the basis of most sustainability reportings. In recent decades, GRI has attempted to provide comprehensive guidelines for CSD by setting indicators...","To align SAP's corporate sustainability practices with Chinese stakeholders' financial perceptions, SAP should consider adapting the GRI Sustainability Reporting Guidelines to reflect Chinese-spec...","To align SAP's corporate sustainability practices with Chinese stakeholders' financial perceptions, SAP should focus on enhancing the environmental and social aspects of its sustainability reports...",0.8,0.5,0.5,0.25
3,How can SAP address the challenges of high ownership concentration impacting corporate sustainability performance in emerging markets like Brazil?,[The present work offers insights on the determinants of firm’s social and sustainability policies in an important emerging market (Brazil). The research builds on stakeholder and agency theories ...,SAP can address the challenges of high ownership concentration impacting corporate sustainability performance in emerging markets like Brazil by encouraging diversified ownership structures. This ...,"In emerging markets like Brazil, high ownership concentration can negatively impact corporate sustainability performance by prioritizing short-term gains over long-term sustainability goals. SAP c...",0.4,0.25,0.31,0.625
4,"What strategies can SAP adopt to ensure that its sustainability initiatives lead to tangible financial benefits, particularly in highly regulated sectors like banking in Bangladesh?","[performance [18]. Despite some research on sustainability in Bangladeshi banks, the question remains whether regulatory sustainability guidelines improve both the environmental and financial perf...","SAP can ensure its sustainability initiatives lead to financial benefits by aligning with regulatory guidelines that promote sustainability, such as the ERM guidelines in Bangladesh, which can lea...",SAP can adopt strategies such as integrating sustainability criteria into financial decision-making processes and developing products that meet regulatory requirements while promoting environmenta...,0.12,0.17,0.24,0.1


In [36]:
average_metrics = df_rag[['FactualCorrectness_Precision', 'FactualCorrectness_Recall', 'FactualCorrectness_F1']].mean().round(4)

print("Average for each metric:")
print(average_metrics)

Average for each metric:
FactualCorrectness_Precision    0.512
FactualCorrectness_Recall       0.350
FactualCorrectness_F1           0.430
dtype: float64


# GraphRAG

## Indexing

In [None]:
# graphrag initial setup
!mkdir ./graphrag_big_context
!mkdir ./input_big_context
!python -m graphrag init --root ./graphrag_big_context

The syntax of the command is incorrect.


⠋ GraphRAG Indexer 
Initializing project at 
E:\Repositories\graphrag-businessqa-evaluation\graphrag_big_context
⠋ GraphRAG Indexer 


In [4]:
# indexing, only run once unless you want to update the index
!python -m graphrag index --root ./graphrag_big_context



⠋ GraphRAG Indexer 
Logging enabled at 
E:\Repositories\graphrag-businessqa-evaluation\graphrag_big_context\logs\indexi
ng-engine.log
⠋ GraphRAG Indexer 
⠋ GraphRAG Indexer 
⠙ GraphRAG Indexer 
├── Loading Input (InputFileType.text) - 6 files loaded (0 filtered) - 100%  0…
⠙ GraphRAG Indexer 
├── Loading Input (InputFileType.text) - 6 files loaded (0 filtered) - 100%  0…
└── create_base_text_units
⠹ GraphRAG Indexer 
├── Loading Input (InputFileType.text) - 6 files loaded (0 filtered) - 100%  0…
└── create_base_text_units
⠴ GraphRAG Indexer 
├── Loading Input (InputFileType.text) - 6 files loaded (0 filtered) - 100%  0…
└── create_base_text_units
⠧ GraphRAG Indexer 
├── Loading Input (InputFileType.text) - 6 files loaded (0 filtered) - 100%  0…
└── create_base_text_units
⠋ GraphRAG Indexer 
├── Loading Input (InputFileType.text) - 6 files loaded (0 filtered) - 100%  0…
└── create_base_text_units
⠙ GraphRAG Indexer 
├── Loading Input (InputFileType.text) - 6 files loaded (0 filtered) -

[2024-12-11T00:11:00Z WARN  lance::dataset] No existing dataset at E:\Repositories\graphrag-businessqa-evaluation\graphrag_big_context\output\lancedb\default-community-full_content.lance, it will be created
[2024-12-11T00:11:03Z WARN  lance::dataset] No existing dataset at E:\Repositories\graphrag-businessqa-evaluation\graphrag_big_context\output\lancedb\default-text_unit-text.lance, it will be created
[2024-12-11T00:11:07Z WARN  lance::dataset] No existing dataset at E:\Repositories\graphrag-businessqa-evaluation\graphrag_big_context\output\lancedb\default-entity-description.lance, it will be created


└── generate_text_embeddings
⠋ GraphRAG Indexer 
├── Loading Input (InputFileType.text) - 6 files loaded (0 filtered) - 100%  0…
├── create_base_text_units
├── create_final_documents
├── create_base_entity_graph
├── create_final_entities
├── create_final_nodes
├── create_final_communities
├── create_final_relationships
├── create_final_text_units
├── create_final_community_reports
└── generate_text_embeddings
⠹ GraphRAG Indexer 
├── Loading Input (InputFileType.text) - 6 files loaded (0 filtered) - 100%  0…
├── create_base_text_units
├── create_final_documents
├── create_base_entity_graph
├── create_final_entities
├── create_final_nodes
├── create_final_communities
├── create_final_relationships
├── create_final_text_units
├── create_final_community_reports
└── generate_text_embeddings
⠸ GraphRAG Indexer 
├── Loading Input (InputFileType.text) - 6 files loaded (0 filtered) - 100%  0…
├── create_base_text_units
├── create_final_documents
├── create_base_entity_graph
├── create_final_ent

## Query Engine

### Global Search

follows the implementations guide by the docs https://microsoft.github.io/graphrag/examples_notebooks/global_search/

In [None]:
from graphrag_utils import setup_graphrag
import pandas as pd
from graphrag.query.llm.oai.chat_openai import ChatOpenAI
from graphrag.query.llm.oai.typing import OpenaiApiType
import os
import json
import asyncio

api_key = os.environ["GRAPHRAG_API_KEY"]


llm = ChatOpenAI(
    api_key=api_key,
    model="gpt-4o-mini",
    api_type=OpenaiApiType.OpenAI,
    max_retries=20,
)
community_level = 2

with open(DATASET, 'r') as file:
    data = json.load(file)
    
queries = [response['question'] for response in data['responses']]
references = [response['ground_truth'] for response in data['responses']]

async def perform_global_search(query):
    print(f"Performing search with query: {query}")
    result = await search_engine_global.asearch(query)
    print(f"Result for query: {query} is: {result.response}")
    return result.response

tasks = [perform_global_search(query) for query in queries]
results = await asyncio.gather(*tasks)

evaluation_samples = []
for query, result, reference in zip(queries, results, references):
    sample = SingleTurnSample(
        user_input=query,
        response=result,
        reference=reference
    )
    evaluation_samples.append(sample)

evaluation_dataset = EvaluationDataset(samples=evaluation_samples)
#print(evaluation_dataset)

Performing search with query: How can SAP leverage its human resource management practices to enhance corporate sustainability and simultaneously boost financial performance in a regional context like Podlaskie Voivodeship?
Performing search with query: What role can SAP play in developing green product psychological ownership among its employees to reduce green confusion and enhance environmental performance?
Performing search with query: In what ways can SAP's corporate sustainability practices be tailored to align with the financial perceptions and expectations of Chinese stakeholders, especially in relation to the GRI Sustainability Reporting Guidelines?
Performing search with query: How can SAP address the challenges of high ownership concentration impacting corporate sustainability performance in emerging markets like Brazil?
Performing search with query: What strategies can SAP adopt to ensure that its sustainability initiatives lead to tangible financial benefits, particularly 

In [None]:
print(evaluation_dataset)


EvaluationDataset(features=['user_input', 'response', 'reference'], len=5)


In [None]:
evaluator_llm = LangchainLLMWrapper(llm_sap)
#evaluator_embeddings = LangchainEmbeddingsWrapper(OpenAIEmbeddings())

metrics = [
    FactualCorrectness(llm=evaluator_llm, mode="precision", name="FactualCorrectness_Precision"),
    FactualCorrectness(llm=evaluator_llm, mode="recall", name="FactualCorrectness_Recall"),
    FactualCorrectness(llm=evaluator_llm, mode="f1", name="FactualCorrectness_F1"),
    #Faithfulness(llm=evaluator_llm), 
]
results = evaluate(dataset=evaluation_dataset, metrics=metrics)
df_grag = results.to_pandas()



Evaluating: 100%|██████████| 15/15 [01:27<00:00,  5.84s/it]


In [17]:
pd.set_option('display.max_colwidth', 250) # default is 50 / None for unlimited
df_grag

Unnamed: 0,user_input,response,reference,FactualCorrectness_Precision,FactualCorrectness_Recall,FactualCorrectness_F1
0,How can SAP leverage its human resource management practices to enhance corporate sustainability and simultaneously boost financial performance in a regional context like Podlaskie Voivodeship?,## Enhancing Corporate Sustainability through HRM Practices\n\nSAP can significantly enhance its corporate sustainability and financial performance in the Podlaskie Voivodeship by integrating sustainable human resource management (HRM) practices ...,"SAP can leverage its HRM practices by implementing a Sustainable HRM framework that focuses on ecological, social, and strategic aspects. By promoting pro-environmental behaviors, enhancing diversity management, and supporting work-life balance, ...",0.19,0.62,0.29
1,What role can SAP play in developing green product psychological ownership among its employees to reduce green confusion and enhance environmental performance?,"## Enhancing Green Product Psychological Ownership at SAP\n\nSAP can play a pivotal role in developing green product psychological ownership among its employees, which is essential for reducing green confusion and enhancing overall environmental ...","SAP can play a pivotal role in developing green product psychological ownership by fostering a shared green vision and providing training that emphasizes the importance of sustainability in product design and functionality. By doing so, SAP can r...",0.13,0.73,0.35
2,"In what ways can SAP's corporate sustainability practices be tailored to align with the financial perceptions and expectations of Chinese stakeholders, especially in relation to the GRI Sustainability Reporting Guidelines?","## Tailoring SAP's Corporate Sustainability Practices for Chinese Stakeholders\n\nTo effectively align SAP's corporate sustainability practices with the financial perceptions and expectations of Chinese stakeholders, particularly in relation to t...","To align SAP's corporate sustainability practices with Chinese stakeholders' financial perceptions, SAP should focus on enhancing the environmental and social aspects of its sustainability reports, as these are valued more by Chinese stakeholders...",0.62,0.79,0.7
3,How can SAP address the challenges of high ownership concentration impacting corporate sustainability performance in emerging markets like Brazil?,"## Addressing High Ownership Concentration in Corporate Sustainability\n\nHigh ownership concentration in emerging markets, particularly in Brazil, presents significant challenges for corporate sustainability performance. Companies with concentra...","In emerging markets like Brazil, high ownership concentration can negatively impact corporate sustainability performance by prioritizing short-term gains over long-term sustainability goals. SAP can address these challenges by promoting inclusive...",0.37,0.88,0.53
4,"What strategies can SAP adopt to ensure that its sustainability initiatives lead to tangible financial benefits, particularly in highly regulated sectors like banking in Bangladesh?","## Strategies for SAP to Enhance Financial Benefits from Sustainability Initiatives\n\nSAP can adopt several strategic approaches to ensure that its sustainability initiatives yield tangible financial benefits, especially in the highly regulated ...",SAP can adopt strategies such as integrating sustainability criteria into financial decision-making processes and developing products that meet regulatory requirements while promoting environmental stewardship. In the heavily regulated banking se...,0.35,0.45,0.29


In [33]:
average_metrics_grag = df_grag[['FactualCorrectness_Precision', 'FactualCorrectness_Recall', 'FactualCorrectness_F1']].mean().round(4)
average_metrics_rag = df_rag[['FactualCorrectness_Precision', 'FactualCorrectness_Recall', 'FactualCorrectness_F1']].mean().round(4)

print("Average for each metric (GraphRAG):")
print(average_metrics_grag)
print("Average for each metric (RAG):")
print(average_metrics_rag)


Average for each metric (GraphRAG):
FactualCorrectness_Precision    0.332
FactualCorrectness_Recall       0.694
FactualCorrectness_F1           0.432
dtype: float64
Average for each metric (RAG):
FactualCorrectness_Precision    0.512
FactualCorrectness_Recall       0.350
FactualCorrectness_F1           0.430
dtype: float64
