# This notebook evaluates the performance of RAG based on the given outputs

In [1]:
# Library imports
import os

import pandas as pd
import numpy as np
from rouge_score import rouge_scorer, scoring

In [2]:
# Global variables
MODELS = "Qwen/Qwen3-Embedding-8B_Qwen3-8B-Q4_K_M-think"
RESPONSES_PATH = '../evaluation/responses'

In [3]:
def calculate_rouge(predicted, reference):
    scorer = rouge_scorer.RougeScorer(["rouge1", "rouge2", "rougeL", "rougeLsum"], use_stemmer=True)
    aggregator = scoring.BootstrapAggregator()

    for pred, ref in zip(predicted, reference):
        score = scorer.score(ref, pred)
        aggregator.add_scores(score)
    result = aggregator.aggregate()

    return {metric: result[metric].mid.fmeasure for metric in result}

In [4]:
def calculate_rouge_by_file(predicted, reference, models):
    scorer = rouge_scorer.RougeScorer(["rouge1", "rouge2", "rougeL", "rougeLsum"], use_stemmer=True)
    aggregator = scoring.BootstrapAggregator()

    files = predicted['file'].unique()
    metrics = {
        'rouge1': np.zeros((len(files),), dtype=np.float32),
        'rouge2': np.zeros((len(files),), dtype=np.float32),
        'rougeL': np.zeros((len(files),), dtype=np.float32),
        'rougeLsum': np.zeros((len(files),), dtype=np.float32),
    }
    for f_idx, f in enumerate(files):
        file_predicted = predicted[predicted['file'] == f]
        file_reference = reference[reference['file'] == f]

        for pred, ref in zip(file_predicted['text'], file_reference['text']):
            single_score = scorer.score(ref, pred)
            aggregator.add_scores(single_score)
        result = aggregator.aggregate()
        score = {metric: result[metric].mid.fmeasure for metric in result}

        metrics['rouge1'][f_idx] = score['rouge1']
        metrics['rouge2'][f_idx] = score['rouge2']
        metrics['rougeL'][f_idx] = score['rougeL']
        metrics['rougeLsum'][f_idx] = score['rougeLsum']

    return pd.DataFrame({
            f'{models} rouge1': metrics['rouge1'],
            f'{models} rouge2': metrics['rouge2'],
            f'{models} rougeL': metrics['rougeL'],
            f'{models} rougeLsum': metrics['rougeLsum']
        },
        index=files)

In [5]:
# Loading the data
context = pd.read_csv(os.path.join(RESPONSES_PATH, MODELS, 'context.csv'), sep=',', index_col=0)
predicted = pd.read_csv(os.path.join(RESPONSES_PATH, MODELS, 'predicted.csv'), sep=',', index_col=0)
reference = pd.read_csv(os.path.join(RESPONSES_PATH, MODELS, 'reference.csv'), sep=',', index_col=0)

In [6]:
new_text = []
for text in predicted['text']:
    index = text.find('</think>\n\n')
    if index >= 0:
        new_text.append(text[index+10:])
    else:
        new_text.append(text)
#predicted['text'] = list(map(lambda item: item[1]['text'][item[1]['text'].find('</think>\n\n')+10:], predicted.iterrows()))
predicted['text'] = new_text

In [7]:
predicted['text']

0       La Ley Orgánica de la Universidad de Guanajuat...
1       El fundamento legal que sustenta la **Ley Orgá...
2       Sí, la Universidad de Guanajuato es autónoma. ...
3       Los fines principales de la Universidad de Gua...
4       Los principios que rigen a la Universidad de G...
                              ...                        
1824    El artículo séptimo del **Reglamento de Respon...
1825    La información proporcionada en los documentos...
1826    El Acuerdo de reforma al **Reglamento de Respo...
1827    Según el **Artículo Tercero (Ultractividad)** ...
1828    Las instancias universitarias competentes debe...
Name: text, Length: 1828, dtype: object

In [8]:
rouge_score = calculate_rouge(
    [pred['text'] for _, pred in predicted.iterrows()],
    [ref['text'] for _, ref in reference.iterrows()]
)

rouge = pd.DataFrame(rouge_score, index=[MODELS])
rouge

Unnamed: 0,rouge1,rouge2,rougeL,rougeLsum
Qwen/Qwen3-Embedding-8B_Qwen3-8B-Q4_K_M-think,0.234747,0.178373,0.215965,0.222188


In [9]:
rouge_by_file= calculate_rouge_by_file(predicted, reference, MODELS)
rouge_by_file

Unnamed: 0,Qwen/Qwen3-Embedding-8B_Qwen3-8B-Q4_K_M-think rouge1,Qwen/Qwen3-Embedding-8B_Qwen3-8B-Q4_K_M-think rouge2,Qwen/Qwen3-Embedding-8B_Qwen3-8B-Q4_K_M-think rougeL,Qwen/Qwen3-Embedding-8B_Qwen3-8B-Q4_K_M-think rougeLsum
ley-organica-de-la-universidad-de-guanajuato,0.1847,0.138469,0.171591,0.175655
reglamento-del-personal-academico-de-la-universidad-de-guanajuato,0.284517,0.218602,0.256543,0.266521
reglamento-de-distinciones-universitarias-de-la-universidad-de-guanajuato,0.310765,0.241885,0.281501,0.291481
reglamento-de-la-junta-directiva-de-la-universidad-de-guanajuato,0.306633,0.23833,0.277515,0.287827
reglamento-para-la-incorporacion-al-regimen-academico-de-la-universidad-de-guanajuato,0.281312,0.218214,0.256964,0.264687
reglamento-interno-del-patronato-de-la-universidad-de-guanajuato,0.272187,0.210942,0.24908,0.256047
reglamento-de-quienes-integran-la-orquesta-sinfonica-de-la-universidad-de-guanajuato_2021-2023,0.271232,0.209046,0.247863,0.254758
reglamento-academico-de-la-universidad-de-guanajuato,0.264448,0.205576,0.242373,0.249085
codigo-de-etica-de-las-personas-servidoras-publicas-universidad-de-gunajuato,0.260411,0.202285,0.238509,0.244969
reglamento-de-la-defensoria-de_los-derechos-humanos-en-el-entorno-universitario-de-la-universidad-de-guanajuato,0.265653,0.205322,0.243184,0.249577
