In [2]:
import os
import pandas as pd
from sklearn.metrics import (accuracy_score, balanced_accuracy_score, f1_score,
                             precision_score, recall_score, roc_auc_score)

In [3]:
files = [f for f in os.listdir('.') if f.endswith('.csv')]
files

['preds_master-testset-hard-0.csv',
 'preds_master-testset-hard-2.csv',
 'preds_master-testset-hard-3.csv',
 'preds_master-testset-hard-4.csv',
 'preds_master-testset.csv',
 'preds_master-testset-hard-5.csv',
 'preds_master-testset-hard-1.csv']

In [11]:
for file in files:
    df = pd.read_csv(file)
    preds = df['preds']
    labels = df['label']

    preds_bin = preds.apply(lambda x: 1 if x > 0.5 else 0)
    preds_bin = preds_bin.astype(int)

    acc = accuracy_score(labels, preds_bin)
    bal_acc = balanced_accuracy_score(labels, preds_bin)
    f1 = f1_score(labels, preds_bin)
    prec = precision_score(labels, preds_bin)
    rec = recall_score(labels, preds_bin)
    try:
        auc = roc_auc_score(labels, preds)
    except ValueError:
        auc = 'N/A'  # AUC cannot be computed if labels are not binary or if there's only one class

    print(f"Results for {file}:")
    print(f"Accuracy: {acc:.4f}")
    print(f"Balanced Accuracy: {bal_acc:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print(f"Precision: {prec:.4f}")
    print(f"Recall: {rec:.4f}")
    print(f"AUC: {auc}")
    print("-" * 40)




Results for preds_master-testset-hard-0.csv:
Accuracy: 0.9643
Balanced Accuracy: 0.9643
F1 Score: 0.9818
Precision: 1.0000
Recall: 0.9643
AUC: N/A
----------------------------------------
Results for preds_master-testset-hard-2.csv:
Accuracy: 0.9936
Balanced Accuracy: 0.9936
F1 Score: 0.9968
Precision: 1.0000
Recall: 0.9936
AUC: N/A
----------------------------------------
Results for preds_master-testset-hard-3.csv:
Accuracy: 0.9957
Balanced Accuracy: 0.9957
F1 Score: 0.9978
Precision: 1.0000
Recall: 0.9957
AUC: N/A
----------------------------------------
Results for preds_master-testset-hard-4.csv:
Accuracy: 0.9959
Balanced Accuracy: 0.9959
F1 Score: 0.9979
Precision: 1.0000
Recall: 0.9959
AUC: N/A
----------------------------------------
Results for preds_master-testset.csv:
Accuracy: 0.8797
Balanced Accuracy: 0.8722
F1 Score: 0.8966
Precision: 0.8396
Recall: 0.9619
AUC: 0.9738618413397311
----------------------------------------
Results for preds_master-testset-hard-5.csv:
Accurac

