In [39]:
import os
configs = os.listdir("./results/")

In [50]:
import random
import numpy as np
import pickle as pkl
from collections import defaultdict
from sklearn.metrics import (
    precision_recall_curve,
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    roc_auc_score
)


def compute(labels, preds):
    return { 'accuracy': accuracy_score(labels, preds),
            'precision': precision_score(labels, preds),
            'recall': recall_score(labels, preds),
            'f1_score': f1_score(labels, preds)
    }

def findMean(data):
    _result = {}
    for key in data.keys():
        val = data[key]
        val = np.array(val).astype(np.float32)
        mean = np.round(np.mean(val), 3)
        std = np.round(np.std(val), 3)
        _result[key] = str(mean) + u" \u00B1 " + str(std)
    return _result

def evaluate(data):
    labels = data['label']
    preds = data['prediction']

    cresult = defaultdict(list)

    for seed in range(10):
        random.seed(seed)
        zlist = list(zip(labels, preds))
        random.shuffle(zlist)

        val_size = int(0.1 * len(zlist))
        val_zlist = zlist[0:val_size]
        test_zlist = zlist[val_size:]

        vlabels, vpreds = zip(*val_zlist)
        tlabels, tpreds = zip(*test_zlist)

        precision, recall, threshold = precision_recall_curve(vlabels, vpreds)
        f1_scores = 2*recall*precision/(recall+precision + 1e-8)
        cutoff = threshold[np.argmax(f1_scores)]

        rpreds = 1.0 * np.array([x > cutoff for x in tpreds])
        _result = compute(tlabels, rpreds)
        _result['auc_roc_score'] = roc_auc_score(tlabels, tpreds)

        for key in _result.keys():
            cresult[key].append(_result[key])
    
    return findMean(cresult)



In [51]:
presults = defaultdict(lambda: defaultdict(dict))

for config in configs:
    data = pkl.load(open(f"./results/{config}", "rb"))
    config_name = str(config)[:-4]
    
    for domain in data.keys():
        presults[config_name][domain] = evaluate(data[domain])


In [54]:
data = presults

In [59]:
import csv
csv_rows = []

# Prepare headers
headers_main = ["Category"]
headers_sub = [""]
metrics = ['accuracy', 'precision', 'recall', 'f1_score', 'auc_roc_score']

for individual in data.keys():
    headers_main.append(individual)
    headers_main.extend([""]*4)
    headers_sub.extend(metrics)

csv_rows.append(headers_main)
csv_rows.append(headers_sub)

# Prepare rows by category
categories = list(data['individual_20'].keys())
for category in categories:
    row = [category]
    for individual in data.keys():
        for metric in metrics:
            row.append(data[individual][category][metric])
    csv_rows.append(row)

# Write to CSV
with open("output.csv", "w", newline='') as file:
    writer = csv.writer(file)
    writer.writerows(csv_rows)
