In [None]:
import sys
sys.path.append('../..')

%load_ext autoreload
%autoreload 2

In [None]:
from oab.data.load_dataset import load_dataset
from oab.evaluation import EvaluationObject, ComparisonObject

In [None]:
# load all algorithms
from conv_ae import ConvAutoEncoder
from cae_ABOD import CAEABOD
from cae_KNN import CAEKNN
from cae_LOF import CAELOF
from cae_iforest import CAEIForest

In [None]:
# define hyperparameters for all algorithms
CAE_parameters = {'latent_dim': 100, 'epochs': 50, 'verbose': 0}

def conv_ae_init(*args, **kwargs):
    return ConvAutoEncoder(**CAE_parameters)

# CAE+KNN
knn_factor = 0.05
knn_minimum = 10
def cae_KNN_init(n: int, *args, **kwargs):
    k = int(max(knn_factor * n, knn_minimum))
    KNN_parameters = {'n_neighbors': k}
    return CAEKNN(CAE_parameters=CAE_parameters, KNN_parameters=KNN_parameters)


# CAE+LOF
lof_factor = 0.1
lof_minimum = 10
def cae_LOF_init(n: int, *args, **kwargs):
    k = int(max(lof_factor * n, lof_minimum))
    LOF_parameters = {'n_neighbors': k}
    return CAELOF(CAE_parameters=CAE_parameters, LOF_parameters=LOF_parameters)

# CAE+ABOD
abod_factor = 0.01
abod_minimum = 10
def cae_ABOD_init(n: int, *args, **kwargs):
    k = int(max(abod_factor * n, abod_minimum))
    ABOD_parameters = {'n_neighbors': k}
    return CAEABOD(CAE_parameters=CAE_parameters, ABOD_parameters=ABOD_parameters)

def cae_IForest_init(*args, **kwargs):
    IForest_parameters = {'random_state': 42}
    return CAEIForest(CAE_parameters=CAE_parameters, IForest_parameters=IForest_parameters)

algo_names = [
    'caeknn',
    'caelof',              
    'caeabod',
    'caeiforest',     
    'cae',   
]

name_to_init = {
    'cae': conv_ae_init,
    'caeabod': cae_ABOD_init,
    'caelof': cae_LOF_init,
    'caeknn': cae_KNN_init,
    'caeiforest': cae_IForest_init,
}

In [None]:
# define data sets to load (by name)
dataset_names = [
    'mnist', 'cifar10',
    'mvtec_ad_transistor', 'mvtec_ad_screw', 'mvtec_ad_pill', 'mvtec_ad_carpet', 'mvtec_ad_hazelnut'
                 ]

In [None]:
co = ComparisonObject() # object that collects all results

n_steps = 10 # number of sampling steps

# iterate through all data sets
for dataset_name in dataset_names:
    # load dataset
    dataset = 0
    del dataset
    dataset = load_dataset(dataset_name) # per default makes unsupervised anomaly dataset
    print(f"{dataset.classification_dataset.name}") # update to see progress
    
    # iterate through all algorithms
    for algorithm_name in algo_names:
        print(f"---{algorithm_name}") # update to see progress
        init = name_to_init[algorithm_name]
        eval_obj = EvaluationObject(algorithm_name=algorithm_name) # object that collects results for one algorithm on one data set
        for (x, y), sample_config in dataset.sample_multiple(**dataset.get_sampling_parameters(), n_steps=n_steps, flatten_images=False): # sample n_steps times with sampling parameters as described in paper
            print('.', end='') # update to see progress
            algo = init(n=len(x))
            algo.fit(x)
            pred = algo.decision_scores_
            eval_obj.add(ground_truth=y, prediction=pred, description=sample_config)
        eval_desc = eval_obj.evaluate(print=False, metrics=['roc_auc', 'adjusted_average_precision', 'precision_recall_auc'])
        co.add_evaluation(eval_desc) # append evaluation of one algorithm on one data set to object that collects all results

In [None]:
co.print_results() # print results in human-readable form

In [None]:
co.print_latex() # print results as latex table