In [1]:
import requests
import gdown
import tarfile
from bs4 import BeautifulSoup
import json
import time
import random
from tqdm import tqdm
from rich.pretty import pprint
import os

import collections
import re
import string
import unicodedata

from datasets import Dataset
from langchain_groq import ChatGroq
from langchain_community.embeddings import HuggingFaceEmbeddings

from ragas.metrics import (
    answer_relevancy,
    faithfulness,
    context_recall,
    context_precision,
    context_relevancy,
    answer_correctness,
    answer_similarity
)

from ragas.metrics.critique import harmfulness
import ragas.evaluation as ragas_eval

from IPython.display import Markdown, display
import pickle
import pandas as pd

GROQ_API_KEY = os.getenv("GROQ_API_KEY")
llm = ChatGroq(temperature=0, groq_api_key=GROQ_API_KEY, model_name="llama3-70b-8192")
embedder = HuggingFaceEmbeddings()

  from .autonotebook import tqdm as notebook_tqdm


# 1. Dataset Format

In [2]:
def extract_evidence_and_answer(text):
    try:
        # Ajustando a expressão regular para ser mais flexível com espaços e quebras de linha
        pattern = r"Evidence:\s*(.*?)\s*\n+\s*Answer:\s*(.*?)(?=.', ChatCompletion)"
        matches = re.search(pattern, text, re.DOTALL)

        if not matches:
            # Se não encontrar, tentar uma abordagem menos específica
            pattern_loose = r"Evidence:\s*(.*?)\s*Answer:\s*(.*)"
            matches = re.search(pattern_loose, text, re.DOTALL)
            if not matches:
                raise ValueError("As seções 'Evidence' e 'Answer' não puderam ser encontradas no texto fornecido.")

        evidence = matches.group(1)
        answer = matches.group(2)

        return {
            "evidence": evidence.strip(),
            "answer": answer.strip()
        }

    except Exception as e:
        print(f"Erro ao extrair os componentes: {e}")
        return None

In [3]:
# opens the pickle file
with open("../input/Raw Text/Rag Data Experiments/experiment_6/respostas_obtidas.pickle_9", 'rb') as input_file:
    # loads the pickle file into a pandas DataFrame
    dataset = pd.read_pickle(input_file)

In [4]:
# iterando sobre todos os dados do FAQ atualizado para formatá-los
dataset_organized = []
for sample in dataset:
    if 'resposta_obtida' in sample and isinstance(sample['resposta_obtida'], tuple):
        
        text = sample['resposta_obtida'][0]  # pegando Evidence e Answer das respostas do RAG - pega so o primeiro elemento da tupla, pra nao pegar o ChatCompletion
        extracted_data = extract_evidence_and_answer(text)  # segmentando Evidence e Answer em formato dict 
        
        if extracted_data:
            organized_sample = {
                'question': sample['pergunta'],  # pergunta do FAQ
                'ground_truths': [sample['resposta_esperada']],  # resposta esperada do FAQ
                'answer': extracted_data['answer'],  # resposta obtida, segmentada answer
                'contexts': [extracted_data['evidence']]
            }
            dataset_organized.append(organized_sample)  # novo dataset organizado

In [5]:
# Salvando o dataset organizado em formato JSON
file_path = '../input/Raw Text/Rag Data Experiments/experiment_6/RAG_dataset_IIRCformat.json'
with open(file_path, 'w', encoding='utf-8') as file:
    json.dump(dataset_organized, file, ensure_ascii=False, indent=4)

print(f'Dataset organizado salvo com sucesso em {file_path}!')

Dataset organizado salvo com sucesso em ../input/Raw Text/Rag Data Experiments/experiment_6/RAG_dataset_IIRCformat.json!


# RAGAS Implementation

In [6]:
from datasets import Dataset
import time

def evaluate_per_sample(dataset_organized):
    # Cria um Dataset para uso com a biblioteca 'ragas'
    formated_dataset = Dataset.from_list(dataset_organized)

    for i, sample in enumerate(dataset_organized):
        # Seleciona cada amostra individualmente para avaliação
        chunk = formated_dataset.select([i])
        
        # Avalia a amostra com as métricas definidas
        result = ragas_eval.evaluate(dataset=chunk, metrics=[answer_relevancy, faithfulness, context_recall, context_precision], llm=llm, embeddings=embedder)
        
        # Atualiza o dicionário da amostra com os resultados das métricas
        sample.update({
            'answer_relevancy': result['answer_relevancy'],
            'faithfulness': result['faithfulness'],
            'context_recall': result['context_recall'],
            'context_precision': result['context_precision']
        })
        
        print(f"Evaluating sample {i + 1}/{len(dataset_organized)}: {result}")
        time.sleep(60)  # Sleep 60 seconds after each sample evaluation to avoid overloading the system

    return dataset_organized

In [7]:
# dataset_organized should be defined and filled with the required data before calling this function
updated_dataset = evaluate_per_sample(dataset_organized)

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`


Evaluating: 100%|██████████| 4/4 [00:51<00:00, 12.76s/it]


Evaluating sample 1/40: {'answer_relevancy': 0.6764, 'faithfulness': 1.0000, 'context_recall': 0.0000, 'context_precision': 0.0000}


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating: 100%|██████████| 4/4 [00:02<00:00,  1.66it/s]


Evaluating sample 2/40: {'answer_relevancy': 0.5130, 'faithfulness': 1.0000, 'context_recall': 1.0000, 'context_precision': 1.0000}


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating: 100%|██████████| 4/4 [00:25<00:00,  6.41s/it]


Evaluating sample 3/40: {'answer_relevancy': 0.4271, 'faithfulness': 1.0000, 'context_recall': 1.0000, 'context_precision': 1.0000}


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating: 100%|██████████| 4/4 [00:03<00:00,  1.09it/s]


Evaluating sample 4/40: {'answer_relevancy': 0.2616, 'faithfulness': 1.0000, 'context_recall': 0.2500, 'context_precision': 1.0000}


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating: 100%|██████████| 4/4 [00:04<00:00,  1.03s/it]


Evaluating sample 5/40: {'answer_relevancy': 0.7513, 'faithfulness': 1.0000, 'context_recall': 0.6667, 'context_precision': 1.0000}


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating: 100%|██████████| 4/4 [00:04<00:00,  1.17s/it]


Evaluating sample 6/40: {'answer_relevancy': 0.4649, 'faithfulness': 0.8333, 'context_recall': 1.0000, 'context_precision': 1.0000}


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating: 100%|██████████| 4/4 [00:09<00:00,  2.49s/it]


Evaluating sample 7/40: {'answer_relevancy': 0.3452, 'faithfulness': 1.0000, 'context_recall': 1.0000, 'context_precision': 1.0000}


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating: 100%|██████████| 4/4 [00:04<00:00,  1.09s/it]


Evaluating sample 8/40: {'answer_relevancy': 0.3483, 'faithfulness': 1.0000, 'context_recall': 0.6364, 'context_precision': 1.0000}


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating: 100%|██████████| 4/4 [00:06<00:00,  1.56s/it]


Evaluating sample 9/40: {'answer_relevancy': 0.6970, 'faithfulness': 1.0000, 'context_recall': 0.3333, 'context_precision': 1.0000}


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating: 100%|██████████| 4/4 [00:04<00:00,  1.09s/it]


Evaluating sample 10/40: {'answer_relevancy': 0.1314, 'faithfulness': 1.0000, 'context_recall': 1.0000, 'context_precision': 1.0000}


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating: 100%|██████████| 4/4 [00:04<00:00,  1.04s/it]


Evaluating sample 11/40: {'answer_relevancy': 0.2110, 'faithfulness': 1.0000, 'context_recall': 1.0000, 'context_precision': 1.0000}


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating: 100%|██████████| 4/4 [00:20<00:00,  5.04s/it]


Evaluating sample 12/40: {'answer_relevancy': 0.3347, 'faithfulness': 0.2000, 'context_recall': 1.0000, 'context_precision': 1.0000}


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating: 100%|██████████| 4/4 [00:02<00:00,  1.70it/s]


Evaluating sample 13/40: {'answer_relevancy': 0.3893, 'faithfulness': 1.0000, 'context_recall': 1.0000, 'context_precision': 1.0000}


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating: 100%|██████████| 4/4 [00:03<00:00,  1.28it/s]


Evaluating sample 14/40: {'answer_relevancy': 0.5021, 'faithfulness': 1.0000, 'context_recall': 0.4000, 'context_precision': 1.0000}


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating: 100%|██████████| 4/4 [00:08<00:00,  2.15s/it]


Evaluating sample 15/40: {'answer_relevancy': 0.3725, 'faithfulness': 1.0000, 'context_recall': 1.0000, 'context_precision': 1.0000}


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating: 100%|██████████| 4/4 [00:05<00:00,  1.27s/it]


Evaluating sample 16/40: {'answer_relevancy': 0.0000, 'faithfulness': 0.0000, 'context_recall': 0.6667, 'context_precision': 0.0000}


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating: 100%|██████████| 4/4 [00:10<00:00,  2.54s/it]


Evaluating sample 17/40: {'answer_relevancy': 0.4870, 'faithfulness': 1.0000, 'context_recall': 1.0000, 'context_precision': 1.0000}


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating: 100%|██████████| 4/4 [00:02<00:00,  1.48it/s]


Evaluating sample 18/40: {'answer_relevancy': 0.1567, 'faithfulness': 1.0000, 'context_recall': 0.3333, 'context_precision': 1.0000}


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating: 100%|██████████| 4/4 [00:09<00:00,  2.35s/it]


Evaluating sample 19/40: {'answer_relevancy': 0.2277, 'faithfulness': 1.0000, 'context_recall': 0.5000, 'context_precision': 0.0000}


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating: 100%|██████████| 4/4 [00:05<00:00,  1.41s/it]


Evaluating sample 20/40: {'answer_relevancy': 0.2361, 'faithfulness': 1.0000, 'context_recall': 0.0000, 'context_precision': 0.0000}


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating: 100%|██████████| 4/4 [00:05<00:00,  1.36s/it]


Evaluating sample 21/40: {'answer_relevancy': 0.4368, 'faithfulness': 0.6667, 'context_recall': 1.0000, 'context_precision': 1.0000}


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating: 100%|██████████| 4/4 [00:03<00:00,  1.03it/s]


Evaluating sample 22/40: {'answer_relevancy': 0.1847, 'faithfulness': 1.0000, 'context_recall': 1.0000, 'context_precision': 1.0000}


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating: 100%|██████████| 4/4 [00:05<00:00,  1.33s/it]


Evaluating sample 23/40: {'answer_relevancy': 0.4654, 'faithfulness': 1.0000, 'context_recall': 0.0000, 'context_precision': 0.0000}


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating: 100%|██████████| 4/4 [00:05<00:00,  1.27s/it]


Evaluating sample 24/40: {'answer_relevancy': 0.6888, 'faithfulness': 1.0000, 'context_recall': 0.2500, 'context_precision': 0.0000}


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating: 100%|██████████| 4/4 [00:11<00:00,  2.80s/it]


Evaluating sample 25/40: {'answer_relevancy': 0.4201, 'faithfulness': 0.5000, 'context_recall': 0.0000, 'context_precision': 1.0000}


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating: 100%|██████████| 4/4 [00:02<00:00,  1.50it/s]


Evaluating sample 26/40: {'answer_relevancy': 0.5073, 'faithfulness': 1.0000, 'context_recall': 0.0000, 'context_precision': 0.0000}


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating: 100%|██████████| 4/4 [00:03<00:00,  1.11it/s]


Evaluating sample 27/40: {'answer_relevancy': 0.1404, 'faithfulness': 1.0000, 'context_recall': 0.0000, 'context_precision': 0.0000}


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating: 100%|██████████| 4/4 [00:03<00:00,  1.31it/s]


Evaluating sample 28/40: {'answer_relevancy': 0.2770, 'faithfulness': 1.0000, 'context_recall': 0.6667, 'context_precision': 1.0000}


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating: 100%|██████████| 4/4 [00:02<00:00,  1.63it/s]


Evaluating sample 29/40: {'answer_relevancy': 0.3472, 'faithfulness': 1.0000, 'context_recall': 0.6667, 'context_precision': 1.0000}


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating: 100%|██████████| 4/4 [00:07<00:00,  1.92s/it]


Evaluating sample 30/40: {'answer_relevancy': 0.3163, 'faithfulness': 1.0000, 'context_recall': 0.6667, 'context_precision': 1.0000}


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating: 100%|██████████| 4/4 [00:04<00:00,  1.25s/it]


Evaluating sample 31/40: {'answer_relevancy': 0.3690, 'faithfulness': 1.0000, 'context_recall': 0.7500, 'context_precision': 1.0000}


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating: 100%|██████████| 4/4 [00:03<00:00,  1.11it/s]


Evaluating sample 32/40: {'answer_relevancy': 0.1689, 'faithfulness': 0.3333, 'context_recall': 0.3333, 'context_precision': 0.0000}


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating: 100%|██████████| 4/4 [00:19<00:00,  4.97s/it]


Evaluating sample 33/40: {'answer_relevancy': 0.6626, 'faithfulness': 1.0000, 'context_recall': 0.6667, 'context_precision': 1.0000}


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating: 100%|██████████| 4/4 [00:03<00:00,  1.18it/s]


Evaluating sample 34/40: {'answer_relevancy': 0.3473, 'faithfulness': 1.0000, 'context_recall': 1.0000, 'context_precision': 1.0000}


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating: 100%|██████████| 4/4 [00:06<00:00,  1.51s/it]


Evaluating sample 35/40: {'answer_relevancy': 0.1170, 'faithfulness': 1.0000, 'context_recall': 0.0000, 'context_precision': 1.0000}


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating: 100%|██████████| 4/4 [00:04<00:00,  1.08s/it]


Evaluating sample 36/40: {'answer_relevancy': 0.3371, 'faithfulness': 1.0000, 'context_recall': 0.5000, 'context_precision': 1.0000}


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating: 100%|██████████| 4/4 [00:03<00:00,  1.19it/s]


Evaluating sample 37/40: {'answer_relevancy': 0.5795, 'faithfulness': 1.0000, 'context_recall': 1.0000, 'context_precision': 1.0000}


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating: 100%|██████████| 4/4 [00:31<00:00,  7.78s/it]


Evaluating sample 38/40: {'answer_relevancy': 0.5026, 'faithfulness': 1.0000, 'context_recall': 1.0000, 'context_precision': 1.0000}


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating: 100%|██████████| 4/4 [00:02<00:00,  1.51it/s]


Evaluating sample 39/40: {'answer_relevancy': 0.3023, 'faithfulness': 1.0000, 'context_recall': 1.0000, 'context_precision': 1.0000}


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating: 100%|██████████| 4/4 [00:03<00:00,  1.09it/s]


Evaluating sample 40/40: {'answer_relevancy': 0.5839, 'faithfulness': 0.6667, 'context_recall': 1.0000, 'context_precision': 1.0000}


In [8]:
# Salvando o dataset organizado em formato JSON
file_path = '../input/Raw Text/dataset/RAG_FinalDataset_experiment_6.json'
with open(file_path, 'w', encoding='utf-8') as file:
    json.dump(updated_dataset, file, ensure_ascii=False, indent=5)

print(f'Dataset organizado salvo com sucesso em {file_path}!')

Dataset organizado salvo com sucesso em ../input/Raw Text/dataset/RAG_FinalDataset_experiment_6.json!


In [9]:
import pandas as pd
df = pd.read_json(file_path)
df

Unnamed: 0,question,ground_truths,answer,contexts,answer_relevancy,faithfulness,context_recall,context_precision
0,Em quais hipóteses a Procuradoria Geral pode e...,[A Procuradoria Geral da UNICAMP integra a Adv...,Não há hipóteses estabelecidas para a Procurad...,[A Constituição Federal não estabelece hipótes...,0.676441,1.0,0.0,0.0
1,"Como ocorrem as atividades de cooperação, pesq...",[Nos termos do art. 1° da Deliberação CONSU-A-...,"Mediante prévia celebração de convênios, contr...","[De acordo com a Deliberação CONSU-A-016/2022,...",0.513008,1.0,1.0,1.0
2,Qual é o procedimento para a celebração de con...,"[Na Unicamp, a celebração de convênios, contra...","O procedimento para a celebração de convênios,...","[De acordo com a Deliberação CONSU-A-016/2022,...",0.427053,1.0,1.0,1.0
3,Qual é o sistema utilizado para a tramitação d...,[Os documentos essenciais estão elencados no a...,Processos administrativos eletrônicos.,"[De acordo com o trecho ""As propostas de convê...",0.26158,1.0,0.25,1.0
4,O que é o Plano de Aplicação de Recursos?,[O Plano de Aplicação de Recursos é o document...,O Plano de Aplicação de Recursos é um document...,[O Plano de Aplicação de Recursos é o document...,0.75134,1.0,0.666667,1.0
5,Quem pode ser executor de um convênio e quais ...,"[Nos termos do art. 18, §1° da Deliberação CON...",Os executores dos convênios são servidores ati...,[De acordo com a [Deliberação CONSU-A-016/2022...,0.464884,0.833333,1.0,1.0
6,Quem é a autoridade competente para assinatura...,"[Como regra, a autoridade competente para assi...",As autoridades competentes para assinatura dos...,[De acordo com o Art. 9º da [Deliberação CONSU...,0.345204,1.0,1.0,1.0
7,Existe uma tramitação simplificada para aprova...,"[Sim, o art. 7º da Deliberação CONSU-A-016/202...",Sim,"[Sim, existem tramitações simplificadas para a...",0.34829,1.0,0.636364,1.0
8,Quando se deve utilizar um Termo Aditivo a um ...,[Em razão da inexistência de uma única lei ou ...,Quando o objetivo é alterar os termos do convê...,"[De acordo com [nan2], um Termo Aditivo deve s...",0.696983,1.0,0.333333,1.0
9,É possível que o convênio preveja o pagamento ...,"[Sim, a concessão de bolsas estímulo à inovaçã...",Sim,"[Sim, é possível que o convênio preveja o paga...",0.131411,1.0,1.0,1.0


In [10]:
print(df['answer_relevancy'].mean())
print(df['faithfulness'].mean())
print(df['context_recall'].mean())
print(df['context_precision'].mean())

0.38218471354330974
0.905
0.6321590909090908
0.7749999999225
