In [None]:
from tuning.bo.optimizables import optimizable_models
from analysis_utils import highlight_max


## Optimization

In [None]:
from tuning.bo.core.tune import tune
import numpy as np
from collections import defaultdict
import time
import os
import torch.multiprocessing as mp
import itertools
import pandas as pd
from tqdm.notebook import tqdm

In [None]:
optimizables = [    
    optimizable_models.EdRVFL(name="edRVFL", input_direct_link=True, n_layers=20, prune_percentage=None),

    optimizable_models.MLP(name="MLP", loss="mse", use_drop_out=True), 
    optimizable_models.MOMLP(name="MO-MLP", loss="mse", use_drop_out=True), 

    optimizable_models.MOMLP(name="MO-MLP (CE)", loss="ce", use_drop_out=True), 
    optimizable_models.MOMLP(name="MO-MLP (Seq, Scl)", loss="mse", use_drop_out=True),
    optimizable_models.MOMLP(name="MO-MLP (Seq, Vec)", loss="mse", use_drop_out=True),
    
    optimizable_models.MLP(name="SNN", loss="mse", use_selu=True), 
    optimizable_models.MOMLP(name="MO-SNN", loss="mse", use_selu=True), 
    
    optimizable_models.SS(name="edAS", input_direct_link=True, sampling_method="exp_loss"),
    optimizable_models.SS(name="edAS (Loss)", input_direct_link=True, sampling_method="loss"),
    optimizable_models.SS(name="edAS (Acc.)", input_direct_link=True, sampling_method="acc"),
    optimizable_models.SS(name="edAS (CE)", input_direct_link=True, sampling_method="exp_loss", loss="ce"),
    optimizable_models.SS(name="edAS (Exp. Acc.)", input_direct_link=True),
]
datasets = [
    'abalone', 
    'plant-shape',
    'arrhythmia', 
    'bank', 
    'breast-cancer-wisc-prog',
    'breast-tissue',
    'cardiotocography-10clases',
    'cardiotocography-3clases', 
    'chess-krvkp',
    'congressional-voting',
    'conn-bench-vowel-deterding',
    'contrac', 
    'hill-valley', 
    'ionosphere',
    'iris', 
    'musk-2', 
    'oocytes_merluccius_nucleus_4d',
    'oocytes_merluccius_states_2f', 
    'oocytes_trisopterus_states_5b',
    'parkinsons', 
    'ringnorm', 
    'seeds',
    'spambase',
    'statlog-image',
    'statlog-landsat', 
    'synthetic-control',
    'twonorm',
    'vertebral-column-2clases',
    'vertebral-column-3clases',
    'wall-following', 
    'waveform', 
    'waveform-noise', 
    'wine-quality-white',
]

In [None]:
n_workers = 5
n_trials = 200

save_id = "PR0"

force_tune = False
seed = 42
evaluation_reps = 10

mp.set_start_method("spawn", True)

In [None]:
pool = mp.Pool(processes=n_workers)
responses = defaultdict(lambda: defaultdict(dict))

for dataset, optimizable_dict in itertools.product(datasets, optimizables):
    if type(optimizable_dict) is dict:
        optimizable = optimizable_dict["optimizable"]
        id = optimizable_dict["id"]
    else:
        optimizable = optimizable_dict
        id = save_id
    responses[dataset][optimizable.name] = pool.apply_async(
        tune, 
        args=(
            optimizable,
            dataset, 
            n_trials, 
            id,
            None,
            force_tune,
            seed,
            evaluation_reps,
            False,
        )
      
    )

pbar = tqdm(total=len(datasets) * len(optimizables))
ctr = 0
metrics = defaultdict(lambda: defaultdict(dict))
done =  np.zeros((len(datasets), len(optimizables)), dtype=bool)
while not np.all(done):
    for i, dataset in enumerate(datasets):
        for j, optimizable_dict in enumerate(optimizables):
            if type(optimizable_dict) is dict:
                optimizable = optimizable_dict["optimizable"]
            else:
                optimizable = optimizable_dict
            if responses[dataset][optimizable.name].ready() and not done[i][j]:
                metrics[dataset][optimizable.name] = responses[dataset][optimizable.name].get()
                print("received: ", i, j, dataset, optimizable.name, np.array(metrics[dataset][optimizable.name]["metrics"]).mean(0)[8])
                done[i][j] = True
                ctr += 1
                pbar.update(ctr - pbar.n)
        time.sleep(0.1)

In [None]:
pdidx = pd.IndexSlice
reps = 4 * evaluation_reps
metrics_df = pd.DataFrame(index=pd.MultiIndex.from_product([np.arange(reps), datasets], names=["rep", "dataset"]), columns=pd.MultiIndex.from_product([[optimizable.name for optimizable in optimizables], ["train_acc",  "train_f1", "train_precision",  "train_recall",  "val_acc",  "val_f1", "val_precision",  "val_recall" , "test_acc", "test_f1", "test_precision", "test_recall", "time"]], names=["model", "metric"]), dtype=float)
for dataset, optimizable in itertools.product(datasets, optimizables):
    try:
        metrics_df.loc[pdidx[:reps, dataset], pdidx[optimizable.name, ["train_acc",  "train_f1", "train_precision",  "train_recall",  "val_acc",  "val_f1", "val_precision",  "val_recall" , "test_acc", "test_f1", "test_precision", "test_recall"]]] = metrics[dataset][optimizable.name]["metrics"]
        metrics_df.loc[pdidx[:reps, dataset], pdidx[optimizable.name, "time"]] = metrics[dataset][optimizable.name]["train_time"]
    except Exception as e:
        print(dataset)
        raise e
tmp = metrics_df.loc[:40, pdidx[:, "test_acc"]].unstack(1).droplevel([1], 1).mean(0).unstack(0)
display((tmp * 100).round(2).style.apply(highlight_max, axis=1))
tmp.rank(axis=1, ascending=False).mean(0).sort_values()

## Save Experiment Results

In [None]:
experiment_name = "eval_stats"
file_name = f"./results/{experiment_name}.csv"
if os.path.exists(file_name):
    raise Exception("There already exist an experiment with this name!")
metrics_df.to_csv(file_name)
print(f"Saved as: {file_name}")