In [None]:
import sys
sys.path.append('../..')

%load_ext autoreload
%autoreload 2

In [None]:
from oab.data.load_dataset import load_dataset
from oab.evaluation import EvaluationObject, ComparisonObject

In [None]:
# load all algorithms
from cae_ocsvm import CAEOCSVM
from cae_iforest import CAEIForest
from conv_ae import ConvAutoEncoder

In [None]:
# define hyperparameters for all algorithms
CAE_parameters = {'latent_dim': 100, 'epochs': 50, 'verbose': 0}

names_to_parameters = {
    'ocsvm': {'degree': 3},
    'iforest': {'random_state': 42},
}

algo_names = [
    'caeocsvm', 
    'caeiforest', 
    'cae'
]

name_to_init = {
    'cae': ConvAutoEncoder,
    'caeocsvm': CAEOCSVM,
    'caeiforest': CAEIForest,
}

name_to_init_params = {
    'cae': CAE_parameters,
    'caeocsvm': {'CAE_parameters': CAE_parameters, 'OCSVM_parameters': names_to_parameters['ocsvm']},
    'caeiforest': {'CAE_parameters': CAE_parameters, 'IForest_parameters': names_to_parameters['iforest']},    
}

In [None]:
dataset_names = ['mnist', 'cifar10', 'mvtec_ad_transistor', 'mvtec_ad_screw', 'mvtec_ad_pill', 'mvtec_ad_carpet', 'mvtec_ad_hazelnut']

In [None]:
# sampling parameters
training_split = 0.7
max_contamination_rate = 0.5
n_steps = 10

In [None]:
co = ComparisonObject() # object that collects all results

# MNIST
mnist = load_dataset('mnist', semisupervised=True)
for algorithm_name in algo_names:
    print(f"---{algorithm_name}") # update to see progress
    init = name_to_init[algorithm_name]
    eval_obj = EvaluationObject(algorithm_name=algorithm_name) # object that collects results for one algorithm on one data set
    for (x_train, x_test, y_test), sample_config in mnist.sample_multiple_with_training_split(training_split=training_split, 
                                                                                    max_contamination_rate=max_contamination_rate, 
                                                                                    n_steps=n_steps,
                                                                                    flatten_images=False):
        print('.', end='') # update to see progress
        algo = init(**name_to_init_params[algorithm_name])
        algo.fit(x_train)
        pred = algo.decision_function(x_test)
        eval_obj.add(ground_truth=y_test, prediction=pred, description=sample_config)
    eval_desc = eval_obj.evaluate(print=False, metrics=['roc_auc', 'adjusted_average_precision', 'precision_recall_auc'])
    co.add_evaluation(eval_desc) # append evaluation of one algorithm on one data set to object that collects all results

del mnist

In [None]:
# CIFAR10
cifar10 = load_dataset('cifar10', semisupervised=True)
for algorithm_name in algo_names:
    print(f"---{algorithm_name}") # update to see progress
    init = name_to_init[algorithm_name]
    eval_obj = EvaluationObject(algorithm_name=algorithm_name)
    for (x_train, x_test, y_test), sample_config in cifar10.sample_multiple_with_training_split(training_split=training_split, 
                                                                                    max_contamination_rate=max_contamination_rate, 
                                                                                    n_steps=n_steps,
                                                                                    flatten_images=False):
        print('.', end='') # update to see progress
        algo = init(**name_to_init_params[algorithm_name])
        algo.fit(x_train)
        pred = algo.decision_function(x_test)
        eval_obj.add(ground_truth=y_test, prediction=pred, description=sample_config)
    eval_desc = eval_obj.evaluate(print=False, metrics=['roc_auc', 'adjusted_average_precision', 'precision_recall_auc'])
    co.add_evaluation(eval_desc)

del cifar10

In [None]:
# MVTec AD data sets already have a train test split, and that train test split is used here
for mvtec_dataset_name in ['mvtec_ad_transistor', 'mvtec_ad_screw', 'mvtec_ad_pill', 'mvtec_ad_carpet', 'mvtec_ad_hazelnut']
    dataset = load_dataset(mvtec_dataset_name, semisupervised=True)
    print(f"{mvtec_dataset_name}") # update to see progress
    for algorithm_name in algo_names:
        print(f"---{algorithm_name}") # update to see progress
        init = name_to_init[algorithm_name]
        eval_obj = EvaluationObject(algorithm_name=algorithm_name)
        (x_train, x_test, y_test), sample_config = dataset.sample_original_mvtec_split(flatten_images=False)
        algo = init(**name_to_init_params[algorithm_name])
        algo.fit(x_train)
        pred = algo.decision_function(x_test)
        eval_obj.add(ground_truth=y_test, prediction=pred, description=sample_config)
            
        eval_desc = eval_obj.evaluate(print=False, metrics=['roc_auc', 'adjusted_average_precision', 'precision_recall_auc'])
        co.add_evaluation(eval_desc)

In [None]:
co.print_results() # print results in human-readable form

In [None]:
co.print_latex() # print results as latex table