In [1]:
import sys
sys.path.append('../..')

%load_ext autoreload
%autoreload 2

In [2]:
# import function to load datasets
from oab.data.load_dataset import load_dataset
# import objects for evaluation
from oab.evaluation import EvaluationObject, ComparisonObject

# import anomaly detection algorithms from pyod
from pyod.models.knn import KNN # fit and decision_scores_
from pyod.models.lof import LOF
from pyod.models.iforest import IForest

In [3]:
sampling_size = 100
n_steps = 5
contamination_rate = 0.05

In [4]:
# create comparison object that holds all evaluations
co = ComparisonObject()

# load datasets and store them in a list
wilt = load_dataset('wilt')
forest_cover = load_dataset('forest_cover')
nasa = load_dataset('NASA_ground_data')
datasets = [wilt, forest_cover, nasa]

# specify which algorithms to use and what their name is
algorithms = [KNN, LOF, IForest]
algorithm_names = ["kNN (5)", "LOF (5)", "IForest"]

# run algorithms on datasets
for dataset in datasets:
    for algorithm, algorithm_name in zip(algorithms, algorithm_names):
        # eval_obj stores predictions and ground truths
        eval_obj = EvaluationObject(algorithm_name=algorithm_name)

        # sample multiple times from each dataset
        for (x, y), sample_config in dataset.sample_multiple(n=sampling_size, n_steps=n_steps, 
                                                             contamination_rate=contamination_rate):
            # instantiate anomaly detection algorithm
            algo = algorithm()
            # fit data to algorithm
            algo.fit(x)
            # get prediction scores
            pred = algo.decision_scores_
            # add ground truth and prediction to evaluation object
            eval_obj.add(ground_truth=y, prediction=pred, description=sample_config)
        # calculate mean values for metrics based on previously added ground truths
        # and predictions
        eval_desc = eval_obj.evaluate(print=False)
        # add resulting evaluation to the comparison object
        co.add_evaluation(eval_desc)

Credits: Dua, D. and Graff, C. (2019). UCI Machine Learning Repository [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California, School of Information and Computer Science.
Credits: Dua, D. and Graff, C. (2019). UCI Machine Learning Repository [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California, School of Information and Computer Science.
Credits: Sayyad Shirabad, J. and Menzies, T.J. (2005) The PROMISE Repository of Software Engineering Databases. School of Information Technology and Engineering, University of Ottawa, Canada.


In [5]:
# print results in easily readable format
co.print_results()

For roc_auc:
             wilt  forest_cover  NASA_ground_data   Average
kNN (5)  0.535368      0.921263          0.714526  0.723719
LOF (5)  0.443368      0.930105          0.651368  0.674947
IForest  0.445895      0.958737          0.725895  0.710175
Average  0.474877      0.936702          0.697263       NaN
For average_precision:
             wilt  forest_cover  NASA_ground_data   Average
kNN (5)  0.065572      0.350800          0.170956  0.195776
LOF (5)  0.054281      0.365948          0.110461  0.176897
IForest  0.053718      0.563469          0.178292  0.265160
Average  0.057857      0.426739          0.153236       NaN
For adjusted_average_precision:
             wilt  forest_cover  NASA_ground_data   Average
kNN (5)  0.016392      0.316632          0.127322  0.153448
LOF (5)  0.004507      0.332577          0.063643  0.133576
IForest  0.003914      0.540494          0.135044  0.226484
Average  0.008271      0.396568          0.108669       NaN


In [6]:
# print results in easily readable format with standard deviations
co.print_results(include_stdevs=True)

For roc_auc:
                 wilt  forest_cover NASA_ground_data   Average
kNN (5)  0.535+-0.122  0.921+-0.035     0.715+-0.145  0.723719
LOF (5)  0.443+-0.083  0.930+-0.043     0.651+-0.152  0.674947
IForest  0.446+-0.069  0.959+-0.016     0.726+-0.124  0.710175
Average         0.475         0.937            0.697       NaN

For average_precision:
                 wilt  forest_cover NASA_ground_data   Average
kNN (5)  0.066+-0.016  0.351+-0.130     0.171+-0.067  0.195776
LOF (5)  0.054+-0.013  0.366+-0.134     0.110+-0.036  0.176897
IForest  0.054+-0.008  0.563+-0.052     0.178+-0.080  0.265160
Average         0.058         0.427            0.153       NaN

For adjusted_average_precision:
                 wilt  forest_cover NASA_ground_data   Average
kNN (5)  0.016+-0.017  0.317+-0.137     0.127+-0.071  0.153448
LOF (5)  0.005+-0.013  0.333+-0.141     0.064+-0.038  0.133576
IForest  0.004+-0.009  0.540+-0.055     0.135+-0.084  0.226484
Average         0.008         0.397            0

In [7]:
# print results in latex format (note: also has parameter include_stdevs)
co.print_latex()

For roc_auc:
\begin{center}
\begin{tabular}{  c c c c c  }
  & wilt & forest\_cover & NASA\_ground\_data & Average \\
  kNN (5) & \textbf{0.535} & 0.921 & \textit{0.715} & \textbf{0.724} \\
  LOF (5) & 0.443 & \textit{0.930} & 0.651 & 0.675 \\
  IForest & \textit{0.446} & \textbf{0.959} & \textbf{0.726} & \textit{0.710} \\
  Average & 0.475 & 0.937 & 0.697 &    \\
\end{tabular}
\end{center}

For average_precision:
\begin{center}
\begin{tabular}{  c c c c c  }
  & wilt & forest\_cover & NASA\_ground\_data & Average \\
  kNN (5) & \textbf{0.066} & 0.351 & \textit{0.171} & \textit{0.196} \\
  LOF (5) & \textit{0.054} & \textit{0.366} & 0.110 & 0.177 \\
  IForest & 0.054 & \textbf{0.563} & \textbf{0.178} & \textbf{0.265} \\
  Average & 0.058 & 0.427 & 0.153 &    \\
\end{tabular}
\end{center}

For adjusted_average_precision:
\begin{center}
\begin{tabular}{  c c c c c  }
  & wilt & forest\_cover & NASA\_ground\_data & Average \\
  kNN (5) & \textbf{0.016} & 0.317 & \textit{0.127} & \textit{0

In [8]:
# see what the sampling configs are
co.print_sampling_configs()

                      wilt  forest_cover  NASA_ground_data
n                   100.00        100.00            100.00
contamination_rate    0.05          0.05              0.05
sampling_steps        5.00          5.00              5.00


In [9]:
# if we include another algorithm that samples with a different config, it will notice:
from pyod.models.abod import ABOD

# run algorithms on datasets
for dataset in datasets:
    algorithm, algorithm_name = ABOD, "ABOD"
    # eval_obj stores predictions and ground truths
    eval_obj = EvaluationObject(algorithm_name=algorithm_name)

    # sample multiple times from each dataset
    for (x, y), sample_config in dataset.sample_multiple(n=40, n_steps=5, 
                                                             contamination_rate=0.1):
        # instantiate anomaly detection algorithm
        algo = algorithm()
        # fit data to algorithm
        algo.fit(x)
        # get prediction scores
        pred = algo.decision_scores_
        # add ground truth and prediction to evaluation object
        eval_obj.add(ground_truth=y, prediction=pred, description=sample_config)
    # calculate mean values for metrics based on previously added ground truths
    # and predictions
    eval_desc = eval_obj.evaluate(print=False)
    # add resulting evaluation to the comparison object
    co.add_evaluation(eval_desc)

In [10]:
co.print_sampling_configs()

Exception: Evaluation objects are not matching, i.e., not all results from the same dataset are sampled in the same way. More specifically, for dataset wilt, expected value for n was 100.0 but got 40 on algorithm ABOD.