In [2]:
import pandas as pd
import os
from glob import glob
from collections import defaultdict
from sklearn.metrics import classification_report, accuracy_score
import numpy as np
import re

In [22]:
import os
import re
import pandas as pd

# Funzione per aprire i file in una directory e ordinarli
def open_files(directory):
    files = os.listdir(directory)
    files.sort()
    return files

# Funzione per estrarre la figura retorica dalla stringa
def extract_figure(text):
    match = re.search(r"that's why is an example of\s+(.*)", str(text), re.IGNORECASE)
    if match:
        figure = match.group(1).strip().rstrip(".")
        figure = figure.upper()  # converto in maiuscolo
        return figure
    return ""

In [26]:
label_rename_map = {
    'IM:FALSE ASSERTION': 'FALSE ASSERTION',
    'EX:CONTEXT SHIFT': 'CONTEXT SHIFT',
    'EX:OXYMORON PARADOX': 'OXYMORON',
    'OXYMORON PARADOX': 'OXYMORON',
}

files = open_files('models_generations_raw')

models = {}
for file in files:
    # estrai il nome modello rimuovendo pattern specifici
    model = re.sub(r"^(fine-tuned-)?|-decoding-\d+\.csv$", "", file)
    print(f"Processing model: {model}")

    model_generations = pd.read_csv(os.path.join('models_generations_raw', file))

    # applica extract_figure a ogni predizione
    y_pred = model_generations['prediction'].apply(extract_figure)

    # rinomina le label secondo la mappa
    y_pred = y_pred.apply(lambda x: label_rename_map.get(x, x))

    # crea DataFrame con post_id, reply_id e prediction
    df = pd.DataFrame({
        'post_id': model_generations['post_id'],
        'reply_id': model_generations['reply_id'],
        'prediction': y_pred
    })

    models[model] = df

# Ora models contiene un DataFrame per ogni modello
# Per esempio per accedere a uno:
models['LLaMAntino-3-ANITA-8B-Inst-DPO-ITA'].head()


Processing model: LLaMAntino-3-ANITA-8B-Inst-DPO-ITA
Processing model: Llama-3.1-8B-Instruct-decoding-1 .csv
Processing model: Minerva-7B-instruct-v1.0
Processing model: Ministral-8B-Instruct-2410-decoding-1 .csv
Processing model: Qwen2.5-7B-Instruct


Unnamed: 0,post_id,reply_id,prediction
0,7961696982295378594,15902869978243358740,EUPHEMISM
1,4824587311292975614,4652172244294165548,RHETORICAL QUESTION
2,7155693771356046855,18161905213580884688,ANALOGY
3,5929177359346716605,5855671312721211658,EX:CONTEXTUALIZATION ERROR
4,12099038560420169473,10284801989587274790,HYPERBOLE


In [27]:
# crea un file per ogni df in models
for model, df in models.items():
    output_file = f"models_generations/{model}_predictions.csv"
    df.to_csv(output_file, index=False)
    print(f"Saved predictions for {model} to {output_file}")

Saved predictions for LLaMAntino-3-ANITA-8B-Inst-DPO-ITA to models_generations/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA_predictions.csv
Saved predictions for Llama-3.1-8B-Instruct-decoding-1 .csv to models_generations/Llama-3.1-8B-Instruct-decoding-1 .csv_predictions.csv
Saved predictions for Minerva-7B-instruct-v1.0 to models_generations/Minerva-7B-instruct-v1.0_predictions.csv
Saved predictions for Ministral-8B-Instruct-2410-decoding-1 .csv to models_generations/Ministral-8B-Instruct-2410-decoding-1 .csv_predictions.csv
Saved predictions for Qwen2.5-7B-Instruct to models_generations/Qwen2.5-7B-Instruct_predictions.csv


In [5]:

def round_sig(x, sig=3):
    if isinstance(x, (float, int)):
        return float(f"{x:.{sig}g}")
    return x

def format_report(report_dict):
    rounded = {}
    for label, metrics in report_dict.items():
        if isinstance(metrics, dict):
            rounded[label] = {k: round_sig(v) for k, v in metrics.items()}
        else:
            rounded[label] = round_sig(metrics)
    return rounded

def print_formatted_report(report_dict):
    print("\n📈 Report di classificazione (precision, recall, f1-score, support):")
    labels = [label for label in report_dict if label not in ('accuracy', 'macro avg', 'weighted avg')]
    header = f"{'Label':<20} {'Prec':>8} {'Rec':>8} {'F1':>8} {'Support':>8}"
    print(header)
    print("-" * len(header))
    for label in labels + ['macro avg', 'weighted avg']:
        row = report_dict[label]
        print(f"{label:<20} {row['precision']:>8.3f} {row['recall']:>8.3f} {row['f1-score']:>8.3f} {row['support']:>8.0f}")
    print(f"{'Accuracy':<20} {'':>8} {'':>8} {'':>8} {report_dict['accuracy']:>8.3f}")

def calculate_metrics(file_path, print_confusion=False):
    df = pd.read_csv(file_path)

    df['extracted_prediction'] = df['prediction'].apply(extract_figure)
    df['rhetorical_figure'] = df['rhetorical_figure'].astype(str).str.lower().str.strip()

    if print_confusion:
        print(f"\n🧩 Matrice di confusione per {os.path.basename(file_path)}:")
        confusion = pd.crosstab(df['rhetorical_figure'], df['extracted_prediction'],
                                rownames=['Actual'], colnames=['Predicted'])
        print(confusion)

    report = classification_report(df['rhetorical_figure'], df['extracted_prediction'], output_dict=True, zero_division=0)
    return report

def average_reports(reports):
    avg_report = {}
    keys = reports[0].keys()

    for key in keys:
        if isinstance(reports[0][key], dict):
            avg_report[key] = {}
            for metric in reports[0][key]:
                values = [r[key].get(metric, 0.0) for r in reports if key in r]
                avg_report[key][metric] = np.mean(values)
        else:  # accuracy
            values = [r.get(key, 0.0) for r in reports]
            avg_report[key] = np.mean(values)

    return avg_report

def collect_model_accuracies(folder_path):
    decoding_files = glob(os.path.join(folder_path, "*-decoding-[1-3].csv"))
    model_groups = defaultdict(list)

    for file_path in decoding_files:
        filename = os.path.basename(file_path)
        model_prefix = filename.rsplit("-decoding-", 1)[0]
        model_groups[model_prefix].append(file_path)

    for model_prefix, file_list in model_groups.items():
        print(f"\n🔍 Modello: {model_prefix}")
        reports = []
        for file in sorted(file_list):
            report = calculate_metrics(file, print_confusion=False)
            reports.append(report)
        avg = average_reports(reports)
        print_formatted_report(avg)

# Directory corrente
folder_path = "."
collect_model_accuracies(folder_path)