In [None]:
import sys
sys.path.append('../..')

%load_ext autoreload
%autoreload 2

In [None]:
from tqdm import tqdm

# import function to load datasets
from oab.data.load_dataset import load_dataset
# import objects for evaluation
from oab.evaluation import EvaluationObject, ComparisonObject

# import anomaly detection algorithms from pyod
from pyod.models.ocsvm import OCSVM # fit and decision_function
from pyod.models.iforest import IForest
from pyod.models.pca import PCA
from pyod.models.auto_encoder import AutoEncoder
from pyod.models.vae import VAE

In [None]:
# load datasets and store them in a list
boston = load_dataset('boston', semisupervised=True)
pageblocks = load_dataset('page-blocks', semisupervised=True)
pulsar_star = load_dataset('pulsar_star', semisupervised=True)
forest_cover = load_dataset('forest_cover', semisupervised=True)
spambase = load_dataset('spambase', semisupervised=True)
wilt = load_dataset('wilt', semisupervised=True)
nasa = load_dataset('NASA_ground_data', semisupervised=True)


datasets = [
            boston, 
            pageblocks,
            pulsar_star, 
            forest_cover, 
            spambase, 
            wilt, 
            nasa
            ]                

In [None]:
# algorithm parameters
names_to_algorithms = {'ocsvm': OCSVM, 'iforest': IForest, 'ae': AutoEncoder, 'pca': PCA, 'vae': VAE}

names_to_parameters = {
    'ocsvm': {'degree': 3}, # default parameter
    'iforest': {'random_state': 42},
    'pca': {'n_components': 0.9, 'svd_solver': 'full'},
    'ae': {'verbose': 0, 'hidden_neurons': [6, 3, 3, 6], 'random_state': 42},
    'vae': {'encoder_neurons': [6, 3], 'decoder_neurons': [3, 6], 'verbose': 0, 'random_state': 42},
}

algorithm_names = [
                   'ocsvm',
                   'iforest',
                   'pca',
                   'ae',
                   'vae',
]

In [None]:
# sampling parameters
training_split = 0.7
max_contamination_rate = 0.5
n_steps = 10

In [None]:
import tensorflow as tf
import numpy as np

# create comparison object that holds all evaluations
co = ComparisonObject()


# run algorithms on datasets
for dataset in tqdm(datasets):

    for algorithm_name in algorithm_names:
        print(f"-- Dataset name {dataset.classification_dataset.name}, algorithm {algorithm_name}")
        algorithm = names_to_algorithms[algorithm_name]
        param_dict = names_to_parameters[algorithm_name]
        # eval_obj stores predictions and ground truths
        eval_obj = EvaluationObject(algorithm_name=algorithm_name)

        # sample multiple times from each dataset
        for (x_train, x_test, y_test), sample_config in dataset.sample_multiple_with_training_split(training_split=training_split, 
                                                                                                        max_contamination_rate=max_contamination_rate, 
                                                                                                        n_steps=n_steps):
            # instantiate anomaly detection algorithm
            if (algorithm_name == 'ae' or algorithm_name == 'vae'):
                tf.random.set_seed(42)
                np.random.seed(42)
            algo = algorithm(**param_dict)
            # fit data to algorithm
            algo.fit(x_train)
            # get prediction scores
            pred = algo.decision_function(x_test)
            # add ground truth and prediction to evaluation object
            eval_obj.add(ground_truth=y_test, prediction=pred, description=sample_config)
        # calculate mean values for metrics based on previously added ground truths
        # and predictions
        eval_desc = eval_obj.evaluate(print=False, metrics=['roc_auc', 'adjusted_average_precision', 'precision_recall_auc'])
        # add resulting evaluation to the comparison object
        co.add_evaluation(eval_desc)

In [None]:
# print results in easily readable format
co.print_results()

In [None]:
# print results in easily readable format with standard deviations
co.print_results(include_stdevs=True)

In [None]:
# print results in latex format (note: also has parameter include_stdevs)
co.print_latex()