## Import Libraries and setup

In [290]:
import sklearn
import pandas as pd
import numpy as np

## Select A Model Result to Evaluate
- You can chooice from these model result:
    - gemma_3_NOCONTEXT
    - gemma_3_CONTEXT
    - gemma_3_LANGUAGE_TEACHER
    - gemma_3_RULES
    - helsinki_AUG
    - helsinky_NOAUG
    - Llama8b
    - Flan_t5_XL
- If yoy want to save evaluation to json turn var "save_as_json = True"

In [None]:

MODEL = "Flan_t5_XL"

save_as_json = True

In [292]:
file_name = ""
match MODEL:
    # GEMMA FAM
    case "gemma_3_NOCONTEXT":
        file_name = "gemma_3_1b_nocontext_res"    
    case "gemma_3_CONTEXT":
        file_name = "gemma_3_1b_context_res"
    case "gemma_3_LANGUAGE_TEACHER":
        file_name = "gemma_3_1b_language_teacher_res"
    case "gemma_3_RULES":
        file_name = "gemma_3_1b_with_rules_res"


    #HELSINKI FAM
    case "helsinki_AUG":
        file_name = "Helsinki_res"
    case "helsinky_NOAUG":
        file_name = "Helsinki_noaug_res"

    #LLAMA
    case "Llama8b":
        file_name = "LLama8b_res"

    #FLAN T5
    case "Flan_t5_XL":
        file_name = "flan_t5_XL_res"

    #BASE CASE 
    case _:
        result_path = ""

result_path =f"./model_translation_result/{file_name}.csv"

## Loading Results

In [293]:
df = pd.read_csv(result_path, sep=";")

In [294]:
gpt_score = df["GPT_score"].to_numpy()
user_score = df["user_score"].to_numpy()
prometheus_score = df["prometheus_score"].to_numpy()

print(gpt_score)

[3 5 1 5 4 5 1 5 3 1]


## Correlation between score

In [295]:
print(f"--------Compute Mean for each score--------")

print(f"GPT score        -> {np.mean(gpt_score)}")
print(f"Prometheus score -> {np.mean(prometheus_score)}")
print(f"User score       -> {np.mean(user_score)}")


--------Compute Mean for each score--------
GPT score        -> 3.3
Prometheus score -> 2.7
User score       -> 3.0


1. GPT score correlation

In [296]:
from scipy.stats import pearsonr, spearmanr, kendalltau


pearson_corr, _ = pearsonr(user_score, gpt_score)
spearman_corr, _ = spearmanr(user_score, gpt_score)
kendall_corr, _ = kendalltau(user_score, gpt_score)


print(f"Pearson:  {pearson_corr:.2f}")
print(f"Spearman: {spearman_corr:.2f}")
print(f"Kendall:  {kendall_corr:.2f}")

Prometheus_score = {"Pearson": pearson_corr, "spearman_corr": spearman_corr, "kendall_corr": kendall_corr}

Pearson:  0.92
Spearman: 0.92
Kendall:  0.88


2. Prometheus score correlation

In [297]:
from scipy.stats import pearsonr, spearmanr, kendalltau


pearson_corr, _ = pearsonr(user_score, prometheus_score)
spearman_corr, _ = spearmanr(user_score, prometheus_score)
kendall_corr, _ = kendalltau(user_score, prometheus_score)


print(f"Pearson:  {pearson_corr:.2f}")
print(f"Spearman: {spearman_corr:.2f}")
print(f"Kendall:  {kendall_corr:.2f}")

GPT_scores = {"Pearson": pearson_corr, "spearman_corr": spearman_corr, "kendall_corr": kendall_corr}

Pearson:  0.58
Spearman: 0.58
Kendall:  0.45


## Save Json

In [298]:
import json
import os
# Costruisci la struttura JSON richiesta
output = [df.to_dict(orient='records'), {'GPT_score': GPT_scores}, {'Prometheus_score': Prometheus_score}]

path = f"./model_translation_result_json/{file_name}.json"
os.makedirs(os.path.dirname(path), exist_ok=True)
# Salva su file
with open(path, 'w', encoding='utf-8') as f:
    json.dump(output, f, indent=2, ensure_ascii=False)