In [None]:
!pip install ragas==0.1.0
!pip install ipywidgets

In [1]:
from datasets import Dataset
from dotenv import load_dotenv
from ragas import evaluate
from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    context_precision,
    context_relevancy,
    context_recall,
    answer_similarity,
    answer_correctness,
)
import os
import pandas as pd

In [2]:
load_dotenv()

True

In [3]:
OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')

In [4]:
# df = pd.read_csv('single_LLM.csv')
df = pd.read_csv('multi_LLM.csv')

In [5]:
df['contexts'] = df['contexts'].apply(lambda x: [x] if isinstance(x, str) else x)

In [6]:
eval_data = Dataset.from_dict(df)
eval_data

Dataset({
    features: ['question', 'ground_truth', 'answer', 'contexts'],
    num_rows: 20
})

In [7]:
df

Unnamed: 0,question,ground_truth,answer,contexts
0,Good place to send money home?,Automated Remittance Kiosk Use authorised remi...,A good place to send money home is through aut...,[{'money home through WhatsApp or any instant ...
1,Who can help me if I cannot remit money back h...,If you are unable to send money home on your o...,"If you are unable to remit money back home, yo...",[{'Will the Police be able to recover my money...
2,"If I am sick, what doctor can I go to?",You may go to any healthcare providers under t...,"If you are feeling sick, you can go to the MOM...",[{'4. FAQs for PCP -eligible Workers ...........
3,How to make claims for my workplace injury tre...,For temporary incapacity cases - report the ac...,To make a claim for your workplace injury trea...,[{'COMPENSATION CLAIM PROCESS\nYour employee h...
4,How do I renew my work permit?,To renew your work permit kindly notify your e...,"To renew your work permit, you will need to fo...",[{'of your card.Should your employer cancel yo...
5,Will I be covered by insurance by my employer?,You will be entitled to medical insurance that...,"Yes, as an employee in Singapore, you will be ...","[{'certificates, medical bills) and hand the o..."
6,Where are some places I can access support and...,You can contact the Migrant Workers’ Centre (M...,You can access support and help from the follo...,[{'This section of the guide contains contact ...
7,Where do I go if I need access to legal servic...,"If you require legal assistance or services, y...","If you need access to legal services, you can ...","[{'Resource Guide\nBrought to you by:', 'WHERE..."
8,What should I do if my employer deducted my sa...,Your employer can only deduct due to the follo...,If your employer deducted your salary without ...,[{'You gave written consent \nto the deduction...
9,Can I do other jobs if requested by my employer?,You must only work in the occupation and for t...,"No, you are not allowed to work in another occ...",[{'14\n(b) Salary Deductions\n• Your employ...


In [8]:
result = evaluate(
    dataset=eval_data,
    metrics=[
        faithfulness,
        answer_relevancy,
        context_precision,
        context_relevancy,
        context_recall,
        answer_similarity,
        answer_correctness,
    ],
)
result = result.to_pandas()

Evaluating:   0%|          | 0/140 [00:00<?, ?it/s]

In [9]:
result[[
        'faithfulness',
        'answer_relevancy',
        'context_precision',
        'context_relevancy',
        'context_recall',
        'answer_similarity',
        'answer_correctness',
    ]].mean(axis=0)

faithfulness          0.894737
answer_relevancy      0.942598
context_precision     0.750000
context_relevancy     0.336707
context_recall        0.720833
answer_similarity     0.896103
answer_correctness    0.359467
dtype: float64

In [10]:
context_recall = result['context_recall'].mean()
context_recall

0.7208333333333333

In [11]:
context_precision = result['context_precision'].mean()
context_precision

0.749999999925

In [12]:
faithfulness = result['faithfulness'].mean()
faithfulness

0.8947368421052632

In [13]:
answer_relevancy = result['answer_relevancy'].mean()
answer_relevancy

0.9425982971954096

In [14]:
answer_similarity = result['answer_similarity'].mean()
answer_similarity

0.8961034764303912

In [None]:
result.to_csv('single_LLM_ragas.csv', index=False)
# result.to_csv('multi_LLM_ragas.csv', index=False)