In [1]:
import pandas as pd
import os
import re

In [2]:
# Funzione per aprire i file in una directory e ordinarli
def open_files(directory):
    files = os.listdir(directory)
    files.sort()
    return files

# Funzione per estrarre la figura retorica dalla stringa
def extract_figure(text):
    match = re.search(r"that's why is an example of\s+(.*)", str(text), re.IGNORECASE)
    if match:
        figure = match.group(1).strip().rstrip(".")
        figure = figure.upper()  # converto in maiuscolo
        return figure
    return ""

In [3]:
label_rename_map = {
    'IM:FALSE ASSERTION': 'FALSE ASSERTION',
    'EX:CONTEXT SHIFT': 'CONTEXT SHIFT',
    'EX:OXYMORON PARADOX': 'OXYMORON',
    'OXYMORON PARADOX': 'OXYMORON',
}

files = open_files('models_generations_raw/old_prompt')

models = {}
for file in files:
    # estrai il nome modello rimuovendo pattern specifici
    model = re.sub(r"^(fine-tuned-)?|-decoding-\d+\.csv$", "", file)
    print(f"Processing model: {model}")

    model_generations = pd.read_csv(os.path.join('models_generations_raw/old_prompt', file))

    # applica extract_figure a ogni predizione
    y_pred = model_generations['prediction'].apply(extract_figure)

    # rinomina le label secondo la mappa
    y_pred = y_pred.apply(lambda x: label_rename_map.get(x, x))

    # crea DataFrame con post_id, reply_id e prediction
    df = pd.DataFrame({
        'post_id': model_generations['post_id'],
        'reply_id': model_generations['reply_id'],
        'prediction': y_pred
    })

    models[model] = df

# Ora models contiene un DataFrame per ogni modello
# Per esempio per accedere a uno:
models['LLaMAntino-3-ANITA-8B-Inst-DPO-ITA'].head()


Processing model: LLaMAntino-3-ANITA-8B-Inst-DPO-ITA
Processing model: Llama-3.1-8B-Instruct
Processing model: Minerva-7B-instruct-v1.0
Processing model: Ministral-8B-Instruct-2410
Processing model: Qwen2.5-7B-Instruct


Unnamed: 0,post_id,reply_id,prediction
0,7961696982295378594,15902869978243358740,HYPERBOLE
1,4824587311292975614,4652172244294165548,OTHER
2,7155693771356046855,18161905213580884688,ANALOGY
3,5929177359346716605,5855671312721211658,OXYMORON
4,12099038560420169473,10284801989587274790,ANALOGY


In [4]:
# crea un file per ogni df in models
for model, df in models.items():
    output_file = f"models_generations/{model}_predictions.csv"
    df.to_csv(output_file, index=False)
    print(f"Saved predictions for {model} to {output_file}")

Saved predictions for LLaMAntino-3-ANITA-8B-Inst-DPO-ITA to models_generations/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA_predictions.csv
Saved predictions for Llama-3.1-8B-Instruct to models_generations/Llama-3.1-8B-Instruct_predictions.csv
Saved predictions for Minerva-7B-instruct-v1.0 to models_generations/Minerva-7B-instruct-v1.0_predictions.csv
Saved predictions for Ministral-8B-Instruct-2410 to models_generations/Ministral-8B-Instruct-2410_predictions.csv
Saved predictions for Qwen2.5-7B-Instruct to models_generations/Qwen2.5-7B-Instruct_predictions.csv
