In [1]:
from pathlib import Path
from sklearn.metrics import roc_auc_score
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm

global PATH
PATH = "results/*"

def plot_auc_result(data, category, execution, method):
    best_aucs = []
    for kfold, results in data.items():
        plotting = []
        for train_record in results[category]:
            df = pd.read_csv(Path.cwd() / execution / kfold / train_record)
            plotting.append((
                int(str(train_record.parent).replace("Epoch ", "")), 
                roc_auc_score(df["actual"], df["pred"])
            ))

        plotting.sort(key = lambda x: -x[1])
        best_aucs.append(plotting[0][1])
    return method(best_aucs)

def load_results(method):
    executions = set((Path.cwd().glob(PATH))) # l2-normed/
    executions = executions - {
        Path.cwd() / "results/complete",
        Path.cwd() / "results/l2-normed",
        Path.cwd() / "results/l2-not-normed"
    }

    df = {}

    for execution in tqdm(list(executions)):
        data = {}
        for folder in list(execution.glob("kfold-*")):
            data[str(folder.relative_to(execution))] = {
                "train": [i.relative_to(execution / folder) for i in folder.glob("Epoch */train-records.csv")],
                "test": [i.relative_to(execution / folder) for i in folder.glob("Epoch */test-records.csv")],
                # "eval": [i.relative_to(execution / folder) for i in folder.glob("Epoch */eval-records.csv")],
            }
        try:
            df[str(execution.relative_to(Path.cwd() / "results"))] = {
                "train": plot_auc_result(data, "train", execution, method),
                # "eval": plot_auc_result(data, "eval", execution, method),
                "test": plot_auc_result(data, "test", execution, method),
            }
        except IndexError:
            df[str(execution.relative_to(Path.cwd() / "results"))] = {
                "train": None, "test": None,
                # "eval": "no-data",
            }
    
    return pd.DataFrame(df).T.sort_index()


In [2]:
df = load_results(np.mean).sort_values("test")
df

  0%|          | 0/14 [00:00<?, ?it/s]

Unnamed: 0,train,test
sceptr-default-johnson-lindenstarauss,0.710996,0.740367
sceptr-tiny-autoencoder,0.830835,0.823886
aaprop-autoencoder,0.838555,0.829035
sceptr-default-autoencoder,0.81799,0.840758
atchley-no-reduction,0.729025,0.845989
kidera-autoencoder,0.805253,0.871178
kidera-johnson-lindenstarauss,0.847103,0.892716
sceptr-tiny-johnson-lindenstarauss,0.869179,0.903805
aaprop-johnson-lindenstarauss,0.906706,0.926224
aaprop-no-reduction,0.841586,0.927883
