In [None]:
from sklearn.metrics import f1_score, classification_report, accuracy_score
import pathlib
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import seaborn as sns
import numpy as np
import pandas as pd
import os
import re

current_path = pathlib.Path().resolve().parent
print(current_path)

def find_pt_file(folder_path):
    # List all files in the folder
    all_files = os.listdir(folder_path)
    
    # Filter out the .pt files
    pt_filename = [f for f in all_files if f.endswith('.pt')][0]

    return int(pt_filename.split(".")[0].split('_')[-1])
    

## Binary

In [None]:
model = 'distilbert'
corpus = 'ibm'
task = 'binary'

results_path = current_path / 'results' / f"{corpus}-{model}-{task}/"

macro_f1_scores = []
accuracy_scores = [] 

names_list = ['No-PM', 'PM']

scores_per_category = {x: [] for x in names_list}

best_iter = find_pt_file(results_path)

for exp in range(10):
    df = pd.read_csv(results_path / f'test_results_exp_{exp}.csv')
    true_labels = df['true_label']
    predictions = df['prediction']

    classes_scores = classification_report(true_labels, predictions, target_names = names_list, output_dict = True)

    for cl in names_list:
        scores_per_category[cl] = scores_per_category[cl] + [classes_scores[cl]['f1-score']]
    
    macro_f1 = f1_score(df['true_label'], df['prediction'], average='macro')
    macro_f1_scores.append(macro_f1)
    
    accuracy = accuracy_score(df['true_label'], df['prediction'])
    accuracy_scores.append(accuracy)

    if(exp == best_iter):
        print(exp)
        print(classification_report(true_labels, predictions, target_names=['No-PM', 'PM']))


values = []
for cl in ['PM', 'No-PM']:
    f1s = scores_per_category[cl]
    values.append(f"{round(np.mean(f1s), 3)}$\pm${round(np.std(f1s), 3)}")

values.append(f"{round(np.mean(macro_f1_scores), 3)}$\pm${round(np.std(macro_f1_scores), 3)}")
values.append(f"{round(np.mean(accuracy_scores), 3)}$\pm${round(np.std(accuracy_scores), 3)}")

output_string = " & ".join(values)
print(output_string)

output_string = re.sub(r'(?<!\d)0\.', '.', output_string)
print(output_string)


## Multi-class


In [None]:
model = 'bert'
corpus = 'ukp'
task = 'multi'
results_path = current_path / 'results' / f"{corpus}-{model}-{task}/"

best_iter = find_pt_file(results_path)
print(best_iter)

names_list = ['correct', 'flipped', 'neutralized', 'polarized']

scores_per_category = {x: [] for x in names_list}

macro_f1_scores = []
accuracy_scores = [] 

for exp in range(10):
    df = pd.read_csv(results_path / f'test_results_exp_{exp}.csv')
    true_labels = df['true_label']
    predictions = df['prediction']
    classes_scores = classification_report(true_labels, predictions, target_names = names_list, output_dict = True)

    for cl in names_list:
        scores_per_category[cl] = scores_per_category[cl] + [classes_scores[cl]['f1-score']]
    
    macro_f1 = f1_score(df['true_label'], df['prediction'], average='macro')
    macro_f1_scores.append(macro_f1)
    
    accuracy = accuracy_score(df['true_label'], df['prediction'])
    accuracy_scores.append(accuracy)
    
    if exp == best_iter:
        print(classification_report(true_labels, predictions, target_names=names_list))

values = []
for cl in ['correct', 'neutralized', 'polarized', 'flipped']:
    f1s = scores_per_category[cl]
    values.append(f"{round(np.mean(f1s), 3)}$\pm${round(np.std(f1s), 3)}")

values.append(f"{round(np.mean(macro_f1_scores), 3)}$\pm${round(np.std(macro_f1_scores), 3)}")
values.append(f"{round(np.mean(accuracy_scores), 3)}$\pm${round(np.std(accuracy_scores), 3)}")

output_string = " & ".join(values)
print(output_string)

output_string = re.sub(r'(?<!\d)0\.', '.', output_string)
print(output_string)