In [1]:
# !pip install -q ragas datasets

# 1. Imports

In [None]:
import os
import json
import random
import pandas as pd

from ragas import evaluate
from ragas.metrics import (answer_relevancy, faithfulness, context_recall, context_precision,)

from langchain.chat_models import ChatOpenAI #depracated
from langchain_openai import ChatOpenAI #updated

from datasets import load_dataset
from datasets import Dataset


from dotenv import load_dotenv

# Load environment variables defined in .env
load_dotenv()

  from .autonotebook import tqdm as notebook_tqdm


True

# 2 - Load a Test Dataset from Hugging Face (explodinggradients/fiqa)

For this tutorial, we will use an example dataset from one of the baselines created for the Financial Opinion Mining and Question Answering (FiQA) dataset. The dataset includes the following columns:

- **question**: `list[str]` - These are the questions your Retrieval-Augmented Generation (RAG) pipeline will be evaluated on.
- **answer**: `list[str]` - The answers generated by the RAG pipeline and provided to the user.
- **contexts**: `list[list[str]]` - The contexts that were passed into the Large Language Model (LLM) to answer the questions.
- **ground_truths**: `list[list[str]]` - The ground truth answers to the questions. (Only required if you are using context recall)


In [3]:
# Load the dataset and we will only work on the first 10 questions
fiqa_eval = load_dataset("explodinggradients/fiqa", "ragas_eval")
fiqa_eval_5_questions = fiqa_eval["baseline"].select(range(5))

# Show the dataset definition in terms of features and rows
print(fiqa_eval_5_questions)

# Show the dataset content
for item in fiqa_eval_5_questions:
    print(item)

Dataset({
    features: ['question', 'ground_truths', 'answer', 'contexts'],
    num_rows: 5
})
{'question': 'How to deposit a cheque issued to an associate in my business into my business account?', 'ground_truths': ["Have the check reissued to the proper payee.Just have the associate sign the back and then deposit it.  It's called a third party cheque and is perfectly legal.  I wouldn't be surprised if it has a longer hold period and, as always, you don't get the money if the cheque doesn't clear. Now, you may have problems if it's a large amount or you're not very well known at the bank.  In that case you can have the associate go to the bank and endorse it in front of the teller with some ID.  You don't even technically have to be there.  Anybody can deposit money to your account if they have the account number. He could also just deposit it in his account and write a cheque to the business."], 'answer': '\nThe best way to deposit a cheque issued to an associate in your business in

# 3. Evaluate some models using RAGAS

In [7]:
# ---------------------------
# Evaluate using GPT-4o-mini
# ---------------------------
llm_gpt4 = ChatOpenAI(
    model="gpt-4o-mini",  # Updated model name for GPT-4o-mini
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2
)

# Generate answers for each question using GPT-4o-mini
answers_gpt4 = []
for item in fiqa_eval_5_questions:
    answer = llm_gpt4.invoke(item['question']).content
    answers_gpt4.append(answer)

print("GPT-4o-mini Generated Answers:")
print(answers_gpt4)

# Prepare dataset dictionary for GPT-4o-mini evaluation.
dataset_dict_gpt4 = {
    'question': [item["question"] for item in fiqa_eval_5_questions],
    'answer': answers_gpt4,
    'contexts': [item["contexts"] for item in fiqa_eval_5_questions],
    'ground_truths': [item["ground_truths"] for item in fiqa_eval_5_questions],
    # Join ground truths if they are lists
    'reference': [
        ", ".join(item["ground_truths"]) if isinstance(item["ground_truths"], list) 
        else item["ground_truths"]
        for item in fiqa_eval_5_questions
    ]
}

ragas_dataset_gpt4 = Dataset.from_dict(dataset_dict_gpt4)

# Evaluate with the defined metrics
result_gpt4 = evaluate(
    ragas_dataset_gpt4,
    metrics=[faithfulness, answer_relevancy, context_precision, context_recall],
)

# Convert evaluation results to a DataFrame and add a column for model name.
df_gpt4 = result_gpt4.to_pandas()
df_gpt4["model"] = "gpt-4o-mini"

# ---------------------------
# Evaluate using GPT-3.5-turbo
# ---------------------------
llm_gpt35 = ChatOpenAI(
    model="gpt-3.5-turbo",  # Using GPT-3.5-turbo for the second evaluation
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2
)

answers_gpt35 = []
for item in fiqa_eval_5_questions:
    answer = llm_gpt35.invoke(item['question']).content
    answers_gpt35.append(answer)

print("GPT-3.5-turbo Generated Answers:")
print(answers_gpt35)

dataset_dict_gpt35 = {
    'question': [item["question"] for item in fiqa_eval_5_questions],
    'answer': answers_gpt35,
    'contexts': [item["contexts"] for item in fiqa_eval_5_questions],
    'ground_truths': [item["ground_truths"] for item in fiqa_eval_5_questions],
    'reference': [
        ", ".join(item["ground_truths"]) if isinstance(item["ground_truths"], list) 
        else item["ground_truths"]
        for item in fiqa_eval_5_questions
    ]
}

ragas_dataset_gpt35 = Dataset.from_dict(dataset_dict_gpt35)

result_gpt35 = evaluate(
    ragas_dataset_gpt35,
    metrics=[faithfulness, answer_relevancy, context_precision, context_recall],
)

df_gpt35 = result_gpt35.to_pandas()
df_gpt35["model"] = "gpt-3.5-turbo"

# ---------------------------
# Combine evaluation results
# ---------------------------
df_combined = pd.concat([df_gpt4, df_gpt35], ignore_index=True)
print("Combined Evaluation Results:")
print(df_combined.head())

GPT-4o-mini Generated Answers:
['Depositing a cheque issued to an associate in your business into your business account typically involves a few steps. However, it\'s important to ensure that you follow the proper procedures to avoid any issues. Here’s a general guide:\n\n1. **Endorse the Cheque**: The associate who received the cheque must endorse it. This usually involves signing the back of the cheque. They may also need to write "Pay to the order of [Your Business Name]" below their signature to indicate that the cheque is being transferred to your business.\n\n2. **Obtain Permission**: Ensure that you have the associate\'s permission to deposit the cheque into your business account. This is important for legal and ethical reasons.\n\n3. **Prepare a Deposit Slip**: Fill out a deposit slip for your business account. Include the cheque amount and any other required information.\n\n4. **Visit Your Bank**: Take the endorsed cheque and the completed deposit slip to your bank. You can do

Evaluating: 100%|██████████| 20/20 [01:06<00:00,  3.34s/it]


GPT-3.5-turbo Generated Answers:
['To deposit a cheque issued to an associate in your business into your business account, you can follow these steps:\n\n1. Endorse the cheque: The associate should endorse the back of the cheque by signing their name. They can also write "Pay to the order of [Your Business Name]" to transfer the cheque to your business.\n\n2. Visit the bank: Take the endorsed cheque to your bank branch. You may also be able to deposit the cheque using a mobile banking app or an ATM, depending on your bank\'s policies.\n\n3. Fill out a deposit slip: If you are depositing the cheque at a bank branch, fill out a deposit slip with your business account information and the amount of the cheque.\n\n4. Submit the cheque: Give the cheque and deposit slip to the bank teller or follow the instructions on the ATM or mobile banking app to deposit the cheque.\n\n5. Wait for the funds to clear: It may take a few business days for the funds from the cheque to be available in your bus

Evaluating: 100%|██████████| 20/20 [00:25<00:00,  1.28s/it]


Combined Evaluation Results:
                                          user_input  \
0  How to deposit a cheque issued to an associate...   
1  Can I send a money order from USPS as a business?   
2  1 EIN doing business under multiple business n...   
3         Applying for and receiving business credit   
4               401k Transfer After Business Closure   

                                  retrieved_contexts  \
0  [Just have the associate sign the back and the...   
1  [Sure you can.  You can fill in whatever you w...   
2  [You're confusing a lot of things here. Compan...   
3  [Set up a meeting with the bank that handles y...   
4  [The time horizon for your 401K/IRA is essenti...   

                                            response  \
0  Depositing a cheque issued to an associate in ...   
1  Yes, you can send a money order from the Unite...   
2  An Employer Identification Number (EIN) is a u...   
3  Applying for and receiving business credit is ...   
4  Transferring a