In [1]:
import pandas as pd
import os
from dotenv import load_dotenv
from ragas import evaluate
from datasets import Dataset
from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    context_recall,
    context_precision,
)

In [2]:
load_dotenv()
OPENAI_KEY=os.environ["OPENAI_API"]

In [3]:
df = pd.read_csv('../test-dataset/dataset_with_answers.csv')
df.head()

Unnamed: 0,question,contexts,ground_truth,evolution_type,episode_done,answer
0,What is the purpose of developing a user-frien...,"[""\n\nIterate on LLMs faster: Measure LLM qual...",The purpose of developing a user-friendly inte...,simple,True,The purpose of developing a user-friendly int...
1,What is the importance of automated prompt gen...,['Yabebal - Emitinan - Rehmet\n\nBadges\n\nEac...,The importance of automated prompt generation ...,simple,True,Automated prompt generation is important in b...
2,What are the key developments in automatic pro...,['Yabebal - Emitinan - Rehmet\n\nBadges\n\nEac...,The key developments in automatic prompt engin...,simple,True,\n\nSome key developments in automatic prompt ...
3,What is the purpose of RAG-based LLM Applicati...,"[""\n\nIterate on LLMs faster: Measure LLM qual...",The purpose of RAG-based LLM Applications is t...,simple,True,The purpose of RAG-based LLM Applications is ...
4,What is the importance of prompt engineering i...,['10 Academy Cohort A\n\nWeekly Challenge: Wee...,Prompt engineering is important in optimizing ...,simple,True,\n\nPrompt engineering is crucial in optimizin...


In [4]:
df['contexts'] = df['contexts'].apply(lambda x: [x])

In [5]:
dataset = Dataset.from_pandas(df)

In [6]:
result = evaluate(
    dataset=dataset, 
    metrics=[
        context_precision,
        context_recall,
        faithfulness,
        answer_relevancy,
    ],
)

evaluation_df = result.to_pandas()

Evaluating:   0%|          | 0/40 [00:00<?, ?it/s]

In [7]:
evaluation_df

Unnamed: 0,question,contexts,ground_truth,evolution_type,episode_done,answer,context_precision,context_recall,faithfulness,answer_relevancy
0,What is the purpose of developing a user-frien...,"[[""\n\nIterate on LLMs faster: Measure LLM qua...",The purpose of developing a user-friendly inte...,simple,True,The purpose of developing a user-friendly int...,1.0,1.0,,1.0
1,What is the importance of automated prompt gen...,[['Yabebal - Emitinan - Rehmet\n\nBadges\n\nEa...,The importance of automated prompt generation ...,simple,True,Automated prompt generation is important in b...,1.0,1.0,0.166667,0.988032
2,What are the key developments in automatic pro...,[['Yabebal - Emitinan - Rehmet\n\nBadges\n\nEa...,The key developments in automatic prompt engin...,simple,True,\n\nSome key developments in automatic prompt ...,1.0,1.0,0.0,0.976576
3,What is the purpose of RAG-based LLM Applicati...,"[[""\n\nIterate on LLMs faster: Measure LLM qua...",The purpose of RAG-based LLM Applications is t...,simple,True,The purpose of RAG-based LLM Applications is ...,1.0,1.0,1.0,1.0
4,What is the importance of prompt engineering i...,[['10 Academy Cohort A\n\nWeekly Challenge: We...,Prompt engineering is important in optimizing ...,simple,True,\n\nPrompt engineering is crucial in optimizin...,1.0,1.0,1.0,0.985502
5,What's involved in the automated prompt genera...,[['Yabebal - Emitinan - Rehmet\n\nBadges\n\nEa...,The automated prompt generation and evaluation...,multi_context,True,\nThe automated prompt generation and evaluati...,1.0,1.0,1.0,0.969021
6,What's involved in the automated prompt genera...,[['Yabebal - Emitinan - Rehmet\n\nBadges\n\nEa...,The automated prompt generation and evaluation...,multi_context,True,\nThe automated prompt generation and evaluati...,1.0,1.0,1.0,0.950942
7,Why is a user-friendly interface important in ...,"[[""\n\nIterate on LLMs faster: Measure LLM qua...",A user-friendly interface is important in the ...,multi_context,True,\nA user-friendly interface is important in th...,1.0,1.0,0.6,0.952627
8,Why is a user-friendly interface important in ...,"[[""\n\nIterate on LLMs faster: Measure LLM qua...",A user-friendly interface is important in the ...,multi_context,True,\nA user-friendly interface is important in th...,1.0,1.0,0.6,0.952586
9,What are the three key services provided by Pr...,[['10 Academy Cohort A\n\nWeekly Challenge: We...,The three key services provided by PromptlyTec...,reasoning,True,\n\nThe three key services provided by Promptl...,1.0,1.0,0.0,0.957671


In [8]:
evaluation_df.to_csv('../test-dataset/evaluation_result.csv', index=False)