# Imports and load data

In [5]:
from dotenv import load_dotenv
from rag_utils import setup_rag_embeddings, build_rag_chain
from ragas import SingleTurnSample, EvaluationDataset, evaluate
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper
from ragas.metrics import Faithfulness, FactualCorrectness
from gen_ai_hub.proxy.langchain.openai import ChatOpenAI
from gen_ai_hub.proxy.core.proxy_clients import get_proxy_client
from langchain_openai import OpenAIEmbeddings

import asyncio
import pandas as pd
import os
import json

load_dotenv(override=True)

model_name = "gpt-4o"
DATA_DIR = "citet_papers/"
DATASET = "evaluation_references_test.json"
proxy_client = get_proxy_client('gen-ai-hub')
llm_sap = ChatOpenAI(proxy_model_name=model_name, proxy_client=proxy_client)
#embeddings_model = OpenAIEmbeddings(proxy_model_name='text-embedding-ada-002', proxy_client=proxy_client)
embeddings_model_own = OpenAIEmbeddings(model="text-embedding-3-small")



In [None]:
# only for parsing the pdf reports to markdown for the first time
"""
for file in os.listdir(DATA_DIR):
    if file.endswith(".pdf"):
        try:
            print(f"Converting {file} to markdown")
            md_text = LlamaParse(
                result_type="markdown", 
                verbose=True,
                #use_vendor_multimodal_model=True,
                #vendor_multimodal_model_name="openai-gpt-4o-mini",
                #vendor_multimodal_api_key=os.getenv("OPENAI_API_KEY"),
                language="en",
                numWorkers=5).load_data(DATA_DIR + file)
            combined_md_text = "\n\n".join([doc.text for doc in md_text])
            md_file_path = DATA_DIR + file.replace(".pdf", ".md")
            print(f"Saving markdown to {md_file_path}")
            with open(md_file_path, "w", encoding="utf-8") as f:
                f.write(combined_md_text)
            print(f"Successfully converted {file}")
        except Exception as e:
            print(f"Error converting {file}: {e}")
"""

# RAG

In [4]:
#setup embeddings and chain
retriever = setup_rag_embeddings(data_dir=DATA_DIR, faiss_path="faiss_references", embeddings_model=embeddings_model_own)
rag_chain = build_rag_chain(retriever=retriever, llm=llm_sap)

Embedding documents...
amount of documents used:  7
Saved 2071 chunks to faiss_references.




prompt input_variables=['context', 'question'] input_types={} partial_variables={} metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'} messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})]


Map generated questions and ground_truths into evaluation dataset


In [6]:

with open(DATASET, 'r') as f:
    synthetic_data = json.load(f)

if isinstance(synthetic_data, dict) and 'responses' in synthetic_data:
    synthetic_data = synthetic_data['responses']

queries = [item.get('question', "") for item in synthetic_data]
ground_truths = [item.get('ground_truth', "") for item in synthetic_data]
contexts = [item.get('context', "") for item in synthetic_data]

answers = []
retrieved_contexts = []

for query in queries:
    answer = rag_chain.invoke(query)
    answers.append(answer)
    print("Query: ",query)
    print("Anwer: ",answer)
    retrieved_context = [doc.page_content for doc in retriever.invoke(query)]
    retrieved_contexts.append(retrieved_context)
    print("Retrieved context:",retrieved_context)


Query:  How can a company strategically integrate a Corporate Sustainability Management System (CSMS) with a Green Shared Vision and a comprehensive evaluation framework to simultaneously enhance environmental conservation, improve financial performance, and foster stronger stakeholder relationships, while addressing specific challenges such as green confusion, social sustainability, and the impact of external factors like climate vulnerability and pandemics?
Anwer:  A company can strategically integrate a Corporate Sustainability Management System (CSMS) with a Green Shared Vision (GSV) by aligning environmental goals with business objectives, fostering employee engagement through psychological ownership, and using a comprehensive evaluation framework to track progress. This approach enhances environmental conservation, financial performance, and stakeholder relationships by ensuring that sustainability initiatives are transparent, measurable, and aligned with stakeholder expectations

In [7]:
# populate eval dataset

evaluation_samples = []

for query, answer, retrieved_context, ground_truth in zip(queries, answers, retrieved_contexts, ground_truths):
    sample = SingleTurnSample(
        user_input=query,
        response=answer,
        reference=ground_truth,
        retrieved_contexts=retrieved_context
    )
    evaluation_samples.append(sample)

evaluation_dataset = EvaluationDataset(samples=evaluation_samples)



In [8]:
print(evaluation_dataset.samples)
print(type(evaluation_dataset.samples))
print(type(evaluation_dataset.samples[0]))
print(dir(evaluation_dataset.samples[0]))



[SingleTurnSample(user_input='How can a company strategically integrate a Corporate Sustainability Management System (CSMS) with a Green Shared Vision and a comprehensive evaluation framework to simultaneously enhance environmental conservation, improve financial performance, and foster stronger stakeholder relationships, while addressing specific challenges such as green confusion, social sustainability, and the impact of external factors like climate vulnerability and pandemics?', retrieved_contexts=['Corporate sustainability requires managers to simultaneously address widely diverging but interconnected concerns for the natural environment, social welfare, and economic prosperity (Bansal, 2002; Gladwin, Kennelly, & Krause, 1995; Maon, Lindgreen, & Swaen, 2008). As a consequence, corporate decision makers “face a great deal of ambiguity in understanding the issues, the implications of these', '# 1.1. Corporate Sustainability\n\nThe concept of CS is related to the broader concept of s

In [9]:

evaluator_llm = LangchainLLMWrapper(llm_sap)
evaluator_embeddings = LangchainEmbeddingsWrapper(OpenAIEmbeddings())

metrics = [
    FactualCorrectness(llm=evaluator_llm, mode="precision", name="FactualCorrectness_Precision"),
    FactualCorrectness(llm=evaluator_llm, mode="recall", name="FactualCorrectness_Recall"),
    FactualCorrectness(llm=evaluator_llm, mode="f1", name="FactualCorrectness_F1"),
    Faithfulness(llm=evaluator_llm), 
 
]
results = evaluate(dataset=evaluation_dataset, metrics=metrics)
df_rag = results.to_pandas()


Evaluating: 100%|██████████| 20/20 [00:40<00:00,  2.01s/it]


# Results

In [10]:
pd.set_option('display.max_colwidth', 200) # default is 50 / None for unlimited

df_rag

# context recall = measures how many of the relevant documents (or pieces of information) were successfully retrieved
# factual correctness (precision) = proportion of correct claims made that also found in the reference / how many are correct 
# factual correctness (recall) = proportion of facts in the reference that are also present in the response / how many are found
# faithfulness = are the claims made in the response supported by the retrieved context
# semantic similarity = how similar the response is to the ground truth
 


Unnamed: 0,user_input,retrieved_contexts,response,reference,FactualCorrectness_Precision,FactualCorrectness_Recall,FactualCorrectness_F1,faithfulness
0,How can a company strategically integrate a Corporate Sustainability Management System (CSMS) with a Green Shared Vision and a comprehensive evaluation framework to simultaneously enhance environm...,"[Corporate sustainability requires managers to simultaneously address widely diverging but interconnected concerns for the natural environment, social welfare, and economic prosperity (Bansal, 200...","A company can strategically integrate a Corporate Sustainability Management System (CSMS) with a Green Shared Vision (GSV) by aligning environmental goals with business objectives, fostering emplo...",Endiana et al. assert that the accounting sector of a company can foster environmental conservation and enhance performance through the implementation of a Corporate Sustainability Management Syst...,0.8,0.42,0.55,0.230769
1,How can multinational corporations effectively integrate sustainable human resource management practices with innovative environmental strategies and diverse team dynamics to enhance their overall...,[Socially responsible human resource management manifests itself in the treatment of employees as important stakeholders of the enterprise [54]. Diversity management will be used to study the soci...,"Multinational corporations can enhance corporate sustainability performance by integrating sustainable human resource management practices that consider economic, social, and ecological aspects, w...","Mazur and Walczynia, in alignment with Chang et al., emphasize the importance of implementing sustainable management of human resources (SMHR) when a company has a robust corporate sustainability ...",0.25,0.07,0.08,0.444444
2,"How can multinational corporations effectively integrate Global Reporting Initiative (GRI) indicators to achieve corporate sustainability, considering the diverse interpretations of sustainability...","[As suggested by Brown et al. (2009), GRI is significant in terms of its “broad range of stakeholders” approach, as well as institutionalising multi-stakeholders on reporting and accountability. H...","Multinational corporations can effectively integrate GRI indicators by adopting a comprehensive approach to sustainability reporting that includes economic, environmental, and social performance, ...","In the context of growing global concern for companies to align their corporate sustainability goals with international standards, it is proposed by Zhang et al. that firms utilize indicators from...",0.0,0.0,0.18,0.5
3,"How can firms globally enhance their corporate sustainability efforts by integrating comprehensive criteria across various business domains, such as governance, social responsibility, and supply c...",[These criteria provide a more dynamic assessment of the interrelated social dimensions that have in the past been looked at individually and as environmental impacts of firms. Due to the COVID-19...,"Firms can enhance their corporate sustainability efforts by adopting an integrative framework that includes criteria such as governance, social responsibility, and supply chain management while re...","Ikram et al. emphasize the necessity for more categories to accurately assess specific aspects of firms, particularly focusing on social sustainability, by proposing nine distinct categories: Corp...",0.71,0.45,0.53,0.583333
4,"How can corporations integrate diverse team dynamics, strategic management, and key financial determinants to create a holistic and effective sustainability strategy that enhances value for busine...","[Corporate sustainability requires managers to simultaneously address widely diverging but interconnected concerns for the natural environment, social welfare, and economic prosperity (Bansal, 200...","Corporations can integrate diverse team dynamics, strategic management, and financial determinants into a sustainability strategy by addressing the interconnected concerns of the natural environme...","Hahn et al., Baumgartner and Rauter, and CrisÃ³stomo et al. collectively emphasize the multifaceted approach required to enhance corporate sustainability performance. Hahn et al. highlight the imp...",0.3,0.37,0.27,0.571429


In [11]:
average_metrics = df_rag[['FactualCorrectness_Precision', 'FactualCorrectness_Recall', 'FactualCorrectness_F1']].mean().round(4)

print("Average for each metric:")
print(average_metrics)

Average for each metric:
FactualCorrectness_Precision    0.412
FactualCorrectness_Recall       0.262
FactualCorrectness_F1           0.322
dtype: float64


# GraphRAG

## Indexing

In [None]:
# graphrag initial setup
!mkdir ./graphrag
!mkdir ./input_references
!python -m graphrag init --root ./graphrag

In [None]:
# indexing, only run once unless you want to update the index
!python -m graphrag index --root ./graphrag


⠋ GraphRAG Indexer 
Logging enabled at 
E:\Repositories\graphrag-businessqa-evaluation\graphrag\logs\indexing-engine.lo
g
⠋ GraphRAG Indexer 
⠋ GraphRAG Indexer 
⠼ GraphRAG Indexer 
├── Loading Input (InputFileType.text) - 11 files loaded (0 filtered)  100%  0…
⠴ GraphRAG Indexer 
├── Loading Input (InputFileType.text) - 11 files loaded (0 filtered)  100%  0…
└── create_base_text_units
⠹ GraphRAG Indexer 
├── Loading Input (InputFileType.text) - 11 files loaded (0 filtered)  100%  0…
└── create_base_text_units
⠴ GraphRAG Indexer 
├── Loading Input (InputFileType.text) - 11 files loaded (0 filtered)  100%  0…
└── create_base_text_units
⠇ GraphRAG Indexer 
├── Loading Input (InputFileType.text) - 11 files loaded (0 filtered)  100%  0…
└── create_base_text_units
⠋ GraphRAG Indexer 
├── Loading Input (InputFileType.text) - 11 files loaded (0 filtered)  100%  0…
└── create_base_text_units
⠸ GraphRAG Indexer 
├── Loading Input (InputFileType.text) - 11 files loaded (0 filtered)  100%  0…
└─

## Query Engine

### Global Search

follows the implementations guide by the docs https://microsoft.github.io/graphrag/examples_notebooks/global_search/

In [12]:
from graphrag_utils import setup_graphrag
import pandas as pd
from graphrag.query.llm.oai.chat_openai import ChatOpenAI
from graphrag.query.llm.oai.typing import OpenaiApiType
import os
import json
import asyncio

api_key = os.environ["GRAPHRAG_API_KEY"]


llm = ChatOpenAI(
    api_key=api_key,
    model="gpt-4o-mini",
    api_type=OpenaiApiType.OpenAI,
    max_retries=20,
)
community_level = 2

with open(DATASET, 'r') as file:
    data = json.load(file)
    
queries = [response['question'] for response in data['responses']]
references = [response['ground_truth'] for response in data['responses']]

search_engine_global = setup_graphrag(model_name, llm , community_level)

async def perform_global_search(query):
    print(f"Performing search with query: {query}")
    result = await search_engine_global.asearch(query)
    print(f"Result for query: {query} is: {result.response}")
    return result.response

tasks = [perform_global_search(query) for query in queries]
results = await asyncio.gather(*tasks)

evaluation_samples = []
for query, result, reference in zip(queries, results, references):
    sample = SingleTurnSample(
        user_input=query,
        response=result,
        reference=reference
    )
    evaluation_samples.append(sample)

evaluation_dataset = EvaluationDataset(samples=evaluation_samples)
#print(evaluation_dataset)

Missing reports for communities: [63, 40, 31, 37, 73, 42, 35, 27]


                                     id  human_readable_id  community  level  \
0  d8ce48b7-62e1-4a06-9606-a57f9660d3ff                242        242      3   
1  5484e83f-90b8-4341-bc4a-fa538a5c1578                243        243      3   
2  0d63a482-d083-454a-9e8d-2ca04b1c0550                244        244      3   
3  ac6a14b2-2f5c-4ea9-a149-d757991b4879                245        245      3   
4  c3b5057e-6556-4f1e-b68d-d5b40ea91daf                148        148      2   

                                                      title  \
0                    BM&FBOVESPA and Sustainability Indices   
1  Dow Jones Sustainability Index and S&P Dow Jones Indices   
2             Corporate Sustainability Management Community   
3              FSSD and Corporate Sustainability Management   
4           ISE Sustainability Index and Advisory Committee   

                                                                                                                                            

In [13]:
print(evaluation_dataset)


EvaluationDataset(features=['user_input', 'response', 'reference'], len=5)


In [14]:
evaluator_llm = LangchainLLMWrapper(llm_sap)
#evaluator_embeddings = LangchainEmbeddingsWrapper(OpenAIEmbeddings())

metrics = [
    FactualCorrectness(llm=evaluator_llm, mode="precision", name="FactualCorrectness_Precision"),
    FactualCorrectness(llm=evaluator_llm, mode="recall", name="FactualCorrectness_Recall"),
    FactualCorrectness(llm=evaluator_llm, mode="f1", name="FactualCorrectness_F1"),
    #Faithfulness(llm=evaluator_llm), 
]
results = evaluate(dataset=evaluation_dataset, metrics=metrics)
df_grag = results.to_pandas()



Evaluating: 100%|██████████| 15/15 [01:25<00:00,  5.73s/it]


In [15]:
pd.set_option('display.max_colwidth', 500) # default is 50 / None for unlimited
df_grag

Unnamed: 0,user_input,response,reference,FactualCorrectness_Precision,FactualCorrectness_Recall,FactualCorrectness_F1
0,"How can a company strategically integrate a Corporate Sustainability Management System (CSMS) with a Green Shared Vision and a comprehensive evaluation framework to simultaneously enhance environmental conservation, improve financial performance, and foster stronger stakeholder relationships, while addressing specific challenges such as green confusion, social sustainability, and the impact of external factors like climate vulnerability and pandemics?","## Strategic Integration of CSMS with Green Shared Vision\n\nTo effectively integrate a Corporate Sustainability Management System (CSMS) with a Green Shared Vision (GSV), companies must ensure that their sustainability goals are aligned with their core business strategies. This alignment is crucial as it embeds sustainability into the organizational culture, leading to improved environmental conservation efforts and enhanced financial performance. A CSMS provides a structured approach that ...","Endiana et al. assert that the accounting sector of a company can foster environmental conservation and enhance performance through the implementation of a Corporate Sustainability Management System (CSMS), by appropriately allocating environmental costs, thereby improving financial outcomes and customer loyalty. From the employees' perspective, Chang et al. emphasize the importance of establishing a solid Green Shared Vision to mitigate green confusion and boost economic performance, highli...",0.08,0.45,0.33
1,"How can multinational corporations effectively integrate sustainable human resource management practices with innovative environmental strategies and diverse team dynamics to enhance their overall corporate sustainability performance, while simultaneously addressing socio-environmental, socio-economical, and eco-efficiency dimensions across different global markets?","# Integrating Sustainable HRM with Environmental Strategies and Diversity\n\nMultinational corporations (MNCs) face the challenge of integrating sustainable human resource management (HRM) practices with innovative environmental strategies and diverse team dynamics to enhance their overall corporate sustainability performance. This integration is essential for addressing the socio-environmental, socio-economical, and eco-efficiency dimensions across different global markets.\n\n## Sustainabl...","Mazur and Walczynia, in alignment with Chang et al., emphasize the importance of implementing sustainable management of human resources (SMHR) when a company has a robust corporate sustainability perspective, ensuring that current needs are met without jeopardizing future stakeholders' needs. Complementing this, Xia et al. identify practices that enhance corporate sustainability performance across socio-environmental, socio-economical, and eco-efficiency dimensions by fostering environmental...",0.23,0.25,0.3
2,"How can multinational corporations effectively integrate Global Reporting Initiative (GRI) indicators to achieve corporate sustainability, considering the diverse interpretations of sustainability across different countries, while also leveraging strategic management frameworks to balance the creation of value for business, society, and nature in their operations and product offerings?",# Integrating GRI Indicators for Corporate Sustainability\n\nMultinational corporations (MNCs) face the challenge of effectively integrating Global Reporting Initiative (GRI) indicators into their sustainability strategies while navigating the diverse interpretations of sustainability across different cultural and regulatory contexts. This integration is essential for aligning corporate practices with sustainability goals and enhancing accountability and transparency in reporting.\n\n## Unde...,"In the context of growing global concern for companies to align their corporate sustainability goals with international standards, it is proposed by Zhang et al. that firms utilize indicators from the Global Reporting Initiative (GRI) for sustainability disclosure. However, they highlight persistent challenges due to varying interpretations of corporate sustainability across different countries. Complementing this view, Weber and Chowdury emphasize the importance of evaluating corporate sust...",0.31,0.6,0.22
3,"How can firms globally enhance their corporate sustainability efforts by integrating comprehensive criteria across various business domains, such as governance, social responsibility, and supply chain management, while addressing contemporary challenges like climate vulnerability and the COVID-19 pandemic, as suggested by the categorization framework of Ikram et al. and the supply chain integration approach of Kafa et al.?","# Enhancing Corporate Sustainability Through Integrated Approaches\n\nFirms globally can significantly enhance their corporate sustainability efforts by adopting a comprehensive framework that integrates criteria across governance, social responsibility, and supply chain management. This holistic approach is essential for addressing contemporary challenges, including climate vulnerability and the impacts of the COVID-19 pandemic.\n\n## Integration of Governance, Social Responsibility, and Su...","Ikram et al. emphasize the necessity for more categories to accurately assess specific aspects of firms, particularly focusing on social sustainability, by proposing nine distinct categories: Corporate Governance, Product Responsibility, Transparency and Communication, Economic, Environmental, Social, Natural Environment and Climate Vulnerability, Energy Consumption, and the impact of the COVID-19 pandemic. Complementing this, Kafa et al. highlight the importance for companies to develop sup...",0.41,0.53,0.42
4,"How can corporations integrate diverse team dynamics, strategic management, and key financial determinants to create a holistic and effective sustainability strategy that enhances value for businesses, society, and the environment, while mitigating sector-specific risks and leveraging growth opportunities?","# Integrating Diverse Team Dynamics, Strategic Management, and Financial Determinants for Sustainability\n\nCorporations seeking to develop a holistic and effective sustainability strategy must focus on the integration of diverse team dynamics, strategic management, and key financial determinants. This multifaceted approach not only enhances value for businesses but also benefits society and the environment while addressing sector-specific risks and leveraging growth opportunities.\n\n## The...","Hahn et al., Baumgartner and Rauter, and CrisÃ³stomo et al. collectively emphasize the multifaceted approach required to enhance corporate sustainability performance. Hahn et al. highlight the importance of diverse team settings, suggesting that a mix of business case-minded individuals and those with a strong awareness of environmental and social issues can lead to more successful sustainability outcomes than teams dominated by either perspective alone. Complementing this, Baumgartner and R...",0.29,0.53,0.4


In [16]:
average_metrics_grag = df_grag[['FactualCorrectness_Precision', 'FactualCorrectness_Recall', 'FactualCorrectness_F1']].mean().round(4)
average_metrics_rag = df_rag[['FactualCorrectness_Precision', 'FactualCorrectness_Recall', 'FactualCorrectness_F1']].mean().round(4)

print("Average for each metric (GraphRAG):")
print(average_metrics_grag)
print("Average for each metric (RAG):")
print(average_metrics_rag)


Average for each metric (GraphRAG):
FactualCorrectness_Precision    0.264
FactualCorrectness_Recall       0.472
FactualCorrectness_F1           0.334
dtype: float64
Average for each metric (RAG):
FactualCorrectness_Precision    0.412
FactualCorrectness_Recall       0.262
FactualCorrectness_F1           0.322
dtype: float64
