In [15]:
# Imports

import os
import pickle
import pandas as pd
import matplotlib.pyplot as plt

In [16]:
classifiers = ["EDC", "AMAXSC", "M4GP", "lda",  "decision_tree", "MLP", "random_forest", "svm_rbf"]
score_metric = "auc_score"
score_metric_index = 0 if score_metric == "auc_score" else 1

result_files = os.listdir("results")
results = []
for result_file in result_files:
    dataset, search_strategy, optimiser, random_seed = result_file.split(
        "."
    )[0].split("-")
    with open(f"results/{result_file}", "rb") as f:
        res = pickle.load(f)
    for elapsed_time, auc_score, accuracy_score in zip(res["elapsed_time"], res["auc_score"], res["accuracy_score"]):
        results.append(
            [
                dataset,
                "EDC",  # "classifier
                search_strategy,
                optimiser,
                elapsed_time,
                auc_score,
                accuracy_score,
                random_seed,
            ]
        )

result_files = os.listdir("comparison_results")
for result_file in result_files:
    dataset, classifier, random_seed = result_file.split(
        "."
    )[0].split("-")
    with open(f"comparison_results/{result_file}", "rb") as f:
        res = pickle.load(f)
    for elapsed_time, auc_score, accuracy_score in zip(res["elapsed_time"], res["auc_score"], res["accuracy_score"]):
        results.append(
            [
                dataset,
                classifier,  # "classifier
                None,
                None,
                elapsed_time,
                auc_score,
                accuracy_score,
                random_seed,
            ]
        )


df = pd.DataFrame(
    columns=[
        "dataset",
        "classifier",
        "search_strategy",
        "optimiser",
        "elapsed_time",
        "auc_score",
        "accuracy_score",
        "random_seed"
    ],
    data=results,
)

In [17]:
print(df)

       dataset classifier search_strategy     optimiser  elapsed_time  \
0     BANKNOTE        EDC            beam  hill_climber      1.718301   
1     BANKNOTE        EDC            beam  hill_climber      1.867863   
2     BANKNOTE        EDC            beam  hill_climber      1.700837   
3     BANKNOTE        EDC            beam  hill_climber      1.801598   
4    HEPATITIS        EDC            beam  hill_climber     36.987851   
..         ...        ...             ...           ...           ...   
845  OCCUPANCY       M4GP            None          None  10925.042849   
846  OCCUPANCY       M4GP            None          None   7885.030924   
847  OCCUPANCY       M4GP            None          None   8350.017893   
848  OCCUPANCY       M4GP            None          None   9791.152997   
849      ADULT       M4GP            None          None  68018.334314   

     auc_score  accuracy_score random_seed  
0     1.000000        1.000000   1805819_2  
1     1.000000        1.000000   

In [18]:
dataset_grouped = df.groupby(["dataset"])
all_scores = []
all_stds = []
for dataset_group in dataset_grouped:
    scores = {}
    stds = {}
    dataset = dataset_group[0][0]
    if dataset == "BANANA" or (dataset.startswith("AD0")):
        continue
    for classifier_group in dataset_group[1].groupby(["classifier"]):
        number_of_folds_done = len(classifier_group[1])

        classifier = classifier_group[0][0]
        mean_auc = classifier_group[1]["auc_score"].mean()
        std_auc = classifier_group[1]["auc_score"].std()
        mean_acc = classifier_group[1]["accuracy_score"].mean()
        std_acc = classifier_group[1]["accuracy_score"].std()
        scores[classifier] = (mean_auc, mean_acc)
        stds[classifier] = (std_auc, std_acc)
        


    # Get the ranks
    for classifier in classifiers:
        if classifier not in scores:
            scores[classifier] = (0, 0)
            stds[classifier] = (0, 0)

    for classifier in classifiers:
        current_score = scores[classifier][score_metric_index]
        rank = 1
        for other_classifier in classifiers:
            if other_classifier == classifier:
                continue
            if scores[other_classifier][score_metric_index] > current_score:
                rank += 1
        scores[classifier] = (scores[classifier][0], scores[classifier][1], rank)
    

    print(f"{dataset}", end="&")
    if score_metric == "auc_score":
        # Print AUC scores
        for clf in classifiers:
            print(f"{scores[clf][0]:.4f} ($\\pm{stds[clf][0]:.2f}$)", end="&")

    if score_metric == "accuracy_score":
        # Print Acc
        for clf in classifiers:
            print(f"{scores[clf][1]:.4f} ($\\pm{stds[clf][1]:.2f}$)", end="&")
    
    # # Print rank
    # for clf in classifiers:
    #     print(f"{scores[clf][2]}", end="&")
    
    print(f"\b\\\\")

    all_scores.append(scores)
    all_stds.append(stds)
    # print(f"{dataset} & {scores['EDC'][0]:.4f} & {scores['random_forest'][0]:.4f} & {scores['EDC'][1]:.4f} & {scores['random_forest'][1]:.4f} \\\\")



# Get average ranks
average_ranks = {}
average_aucs = {}
average_accs = {}
for classifier in classifiers:
    average_ranks[classifier] = 0
    average_aucs[classifier] = 0
    average_accs[classifier] = 0
    for dataset in all_scores:
        if classifier not in dataset:
            continue
        average_ranks[classifier] += dataset[classifier][2]
        average_aucs[classifier] += dataset[classifier][0]
        average_accs[classifier] += dataset[classifier][1]
    average_ranks[classifier] /= len(all_scores)
    average_aucs[classifier] /= len(all_scores)
    average_accs[classifier] /= len(all_scores)

print("\\midrule")
print(f"Average Score&", end="")
for classifier in classifiers:
    print(f"{average_aucs[classifier]:.4f}", end="&")
print(f"\b\\\\")

print(f"Average Rank&", end="")
for classifier in classifiers:
    print(f"{average_ranks[classifier]:.2f}", end="&")
print(f"\b\\\\")

ADULT&0.8800 ($\pm0.01$)&0.8010 ($\pm0.00$)&0.7899 ($\pmnan$)&0.9018 ($\pm0.00$)&0.7308 ($\pm0.01$)&0.9018 ($\pm0.00$)&0.8799 ($\pm0.01$)&0.8983 ($\pm0.00$)\\
BANKNOTE&1.0000 ($\pm0.00$)&0.9925 ($\pm0.00$)&1.0000 ($\pm0.00$)&0.9997 ($\pm0.00$)&0.9840 ($\pm0.01$)&1.0000 ($\pm0.00$)&1.0000 ($\pm0.00$)&1.0000 ($\pm0.00$)\\
BREAST&0.5945 ($\pm0.18$)&0.5728 ($\pm0.08$)&0.6079 ($\pm0.10$)&0.6500 ($\pm0.12$)&0.5782 ($\pm0.07$)&0.7116 ($\pm0.12$)&0.6558 ($\pm0.07$)&0.7257 ($\pm0.12$)\\
CREDIT&0.9137 ($\pm0.02$)&0.8935 ($\pm0.05$)&0.8421 ($\pm0.04$)&0.9299 ($\pm0.03$)&0.8197 ($\pm0.04$)&0.9126 ($\pm0.03$)&0.9313 ($\pm0.03$)&0.9204 ($\pm0.03$)\\
CYLINDER&0.7309 ($\pm0.09$)&0.5998 ($\pm0.09$)&0.6387 ($\pm0.13$)&0.7628 ($\pm0.09$)&0.6516 ($\pm0.11$)&0.8201 ($\pm0.06$)&0.8659 ($\pm0.08$)&0.7737 ($\pm0.08$)\\
DIABETES&0.8280 ($\pm0.05$)&0.8055 ($\pm0.06$)&0.7181 ($\pm0.03$)&0.8321 ($\pm0.06$)&0.6614 ($\pm0.07$)&0.8360 ($\pm0.03$)&0.8246 ($\pm0.07$)&0.8452 ($\pm0.04$)\\
HEPATITIS&0.8359 ($\pm0.13$)&0