In [1]:
# Imports

import numpy as np
import os
import pandas as pd
import matplotlib.pyplot as plt
import pickle

In [5]:
result_files = os.listdir("results")
results = []
for result_file in result_files:
    dataset, random_seed = result_file.split(
        "."
    )[0].split("-")
    with open(f"results/{result_file}", "rb") as f:
        res = pickle.load(f)
        for accuracy_score, auc_score, ground_truth_accuracy_score, ground_truth_auc_score in zip(res["accuracy_score"], res["auc_score"], res["ground_truth_accuracy_score"], res["ground_truth_auc_score"]):
            results.append(
                [
                    dataset,
                    random_seed,
                    auc_score,
                    accuracy_score,
                    ground_truth_auc_score,
                    ground_truth_accuracy_score
                ]
            )

df = pd.DataFrame(
    columns=[
        "dataset",
        "random_seed",
        "auc_score",
        "accuracy_score",
        "ground_truth_auc_score",
        "ground_truth_accuracy_score"
    ],
    data=results,
)

name_dict = {
    "artificial_random": "Within search space",
    "artificial_random_noise": "Within search space with noise",
    "artificial_random_rich": "Beyond search space",
    "artificial_random_rich_noise": "Beyond search space with noise",
}

grouped = df.groupby(["dataset"])

In [6]:
for name, group in grouped:
    mean_auc = group["auc_score"].mean()
    std_auc = group["auc_score"].std()
    ground_truth_mean_auc = group["ground_truth_auc_score"].mean()
    ground_truth_std_auc = group["ground_truth_auc_score"].std()
    print(f"{name_dict[name[0]]}&{mean_auc:.4f} ($\\pm{std_auc:.2f}$) & {ground_truth_mean_auc:.4f} ($\\pm{ground_truth_std_auc:.2f}$)\\\\")

Beyond search space with noise&0.9627 ($\pm0.01$) & 0.9547 ($\pm0.02$)\\
