## Import python packages

In [1]:
import os
import sys
import numpy as np
import pandas as pd

from time import time
from sklearn.model_selection import train_test_split
from scipy.io import loadmat

## Import pyod packages and the methods

In [2]:
from pyod.models.pca import PCA
from pyod.models.mcd import MCD
from pyod.models.ocsvm import OCSVM
from pyod.models.lof import LOF
from pyod.models.cblof import CBLOF
from pyod.models.knn import KNN
from pyod.models.hbos import HBOS
from pyod.models.abod import ABOD
from pyod.models.iforest import IForest
from pyod.models.feature_bagging import FeatureBagging



## Import Performance Metrics  packages

In [3]:
from pyod.utils.utility import standardizer
from pyod.utils.utility import precision_n_scores
from sklearn.metrics import roc_auc_score

## Define data file and read X and Y

In [4]:
mat_file_list = ['arrhythmia.mat',
                'cardio.mat',
                'glass.mat',
                'ionosphere.mat',
                'letter.mat',
                'lympho.mat',
                'mnist.mat',
                'musk.mat',
                'optdigits.mat',
                'pendigits.mat',
                'pima.mat',
                'satellite.mat',
                'satimage-2.mat',
                'shuttle.mat',
                'vertebral.mat',
                'vowels.mat',
                'wbc.mat']


### Loading the mat file and display its data

In [5]:
data = loadmat('data/cardio.mat')
data

{'__header__': b'MATLAB 5.0 MAT-file, written by Octave 3.8.0, 2014-12-18 10:48:09 UTC',
 '__version__': '1.0',
 '__globals__': [],
 'X': array([[ 0.00491231,  0.69319077, -0.20364049, ...,  0.23149795,
         -0.28978574, -0.49329397],
        [ 0.11072935, -0.07990259, -0.20364049, ...,  0.09356344,
         -0.25638541, -0.49329397],
        [ 0.21654639, -0.27244466, -0.20364049, ...,  0.02459619,
         -0.25638541,  1.14001753],
        ...,
        [-0.41835583, -0.91998844, -0.16463485, ..., -1.49268341,
          0.24461959, -0.49329397],
        [-0.41835583, -0.91998844, -0.15093411, ..., -1.42371616,
          0.14441859, -0.49329397],
        [-0.41835583, -0.91998844, -0.20364049, ..., -1.28578165,
          3.58465295, -0.49329397]]),
 'y': array([[0.],
        [0.],
        [0.],
        ...,
        [1.],
        [1.],
        [1.]])}

In [6]:
len(data)

5

In [7]:
data.keys()

dict_keys(['__header__', '__version__', '__globals__', 'X', 'y'])

In [8]:
data.values()

dict_values([b'MATLAB 5.0 MAT-file, written by Octave 3.8.0, 2014-12-18 10:48:09 UTC', '1.0', [], array([[ 0.00491231,  0.69319077, -0.20364049, ...,  0.23149795,
        -0.28978574, -0.49329397],
       [ 0.11072935, -0.07990259, -0.20364049, ...,  0.09356344,
        -0.25638541, -0.49329397],
       [ 0.21654639, -0.27244466, -0.20364049, ...,  0.02459619,
        -0.25638541,  1.14001753],
       ...,
       [-0.41835583, -0.91998844, -0.16463485, ..., -1.49268341,
         0.24461959, -0.49329397],
       [-0.41835583, -0.91998844, -0.15093411, ..., -1.42371616,
         0.14441859, -0.49329397],
       [-0.41835583, -0.91998844, -0.20364049, ..., -1.28578165,
         3.58465295, -0.49329397]]), array([[0.],
       [0.],
       [0.],
       ...,
       [1.],
       [1.],
       [1.]])])

## Input (Independent) Feature Shape in mat file format

In [9]:
type(data['X']), data['X'].shape

(numpy.ndarray, (1831, 21))

## Dependent / Target / Output Feature Shape

In [10]:
type(data['y']), data['y'].shape

(numpy.ndarray, (1831, 1))

## Define outlier detection tools  to be compared

In [11]:
df_columns = ['Data', '#Samples', '# Dimensions', 'Outlier Perc', 'ABOD', 'CBLOF', 
              'FB', 'HBOS', 'IForest', 'KNN', 'LOF', 'MCD', 'OCSVM', 'PCA'] 

## Roc  Performance evolution Table

In [12]:
# region of characteristics
roc_df = pd.DataFrame(columns=df_columns)

roc_df

Unnamed: 0,Data,#Samples,# Dimensions,Outlier Perc,ABOD,CBLOF,FB,HBOS,IForest,KNN,LOF,MCD,OCSVM,PCA


## precision_n_scores - Performance evolution table 


In [13]:
prn_df = pd.DataFrame(columns=df_columns)

prn_df

Unnamed: 0,Data,#Samples,# Dimensions,Outlier Perc,ABOD,CBLOF,FB,HBOS,IForest,KNN,LOF,MCD,OCSVM,PCA


## Time dataframe

In [14]:
time_df = pd.DataFrame(columns=df_columns)
time_df

Unnamed: 0,Data,#Samples,# Dimensions,Outlier Perc,ABOD,CBLOF,FB,HBOS,IForest,KNN,LOF,MCD,OCSVM,PCA


# Explorinig all mat files

In [15]:

# Creating random state
random_state = np.random.RandomState(42)

# Processing mat files one by one : 
for mat_file in mat_file_list:
    print("\n... Processing", mat_file, '...')
    mat = loadmat(os.path.join('data', mat_file))

    X = mat['X']
    y = mat['y'].ravel()  #ravel() function converts 2D to 1D
    
    # Counting Outlier :
    
    # Counts the number of non-zero values in the array y and divide by length of y : It gives outlier in fraction
    outliers_fraction = np.count_nonzero(y) / len(y)
    
    # Calculating Outlier percentage
    outliers_percentage = round(outliers_fraction * 100, ndigits=4)

    # Construct containers for saving results
    roc_list = [mat_file[:-4], X.shape[0], X.shape[1], outliers_percentage]
    prn_list = [mat_file[:-4], X.shape[0], X.shape[1], outliers_percentage]
    time_list = [mat_file[:-4], X.shape[0], X.shape[1], outliers_percentage]

    # Spliting Data into : 60% data for training and 40% for testing
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=random_state)
    
    # standardizing data for processing
    X_train_norm, X_test_norm = standardizer(X_train, X_test)

    # Applying all the algorithms and storing thier result in a dictionary format:
    classifiers = {'Angle-based Outlier Detector (ABOD)': ABOD(contamination=outliers_fraction),
                   
                   'Cluster-based Local Outlier Factor': CBLOF(contamination=outliers_fraction, check_estimator=False,
                                                               random_state=random_state),
                   
                   'Feature Bagging': FeatureBagging(contamination=outliers_fraction, random_state=random_state),
                   
                   'Histogram-base Outlier Detection (HBOS)': HBOS(contamination=outliers_fraction),
                   
                   'Isolation Forest': IForest(contamination=outliers_fraction, random_state=random_state),
                   
                   'K Nearest Neighbors (KNN)': KNN(contamination=outliers_fraction),
                   
                   'Local Outlier Factor (LOF)': LOF(contamination=outliers_fraction),
                   
                   'Minimum Covariance Determinant (MCD)': MCD(contamination=outliers_fraction, random_state=random_state),
                   
                   'One-class SVM (OCSVM)': OCSVM(contamination=outliers_fraction),
                   
                   'Principal Component Analysis (PCA)': PCA(contamination=outliers_fraction, random_state=random_state),
   }


    # Calculating Time taken for each algorithm
    for clf_name, clf in classifiers.items():
        # Initialize the start time 
        t0 = time() 
        
        # Fit( Train )the data
        clf.fit(X_train_norm) 
        
        # Predicting Value on Xtest
        test_scores = clf.decision_function(X_test_norm)  
        
        # Final Time
        t1 = time()   
        
        # Total time duration : t1 - t0
        duration = round(t1 - t0, ndigits=4) 
        
        # Append duration in time list
        time_list.append(duration)

        #Calculating roc and precision value of the algorithm
        roc = round(roc_auc_score(y_test, test_scores), ndigits=4)
        prn = round(precision_n_scores(y_test, test_scores), ndigits=4)

        # Print the roc , precision and executing time 
        print('{clf_name} ROC:{roc}, precision @ rank n:{prn}, '
              'execution time: {duration}s'.format(clf_name=clf_name, roc=roc, prn=prn, duration=duration))

        # Append roc and precision value to their respective list
        roc_list.append(roc)
        prn_list.append(prn)

    temp_df = pd.DataFrame(time_list).transpose()
    temp_df.columns = df_columns
    time_df = pd.concat([time_df, temp_df], axis=0)

    temp_df = pd.DataFrame(roc_list).transpose()
    temp_df.columns = df_columns
    roc_df = pd.concat([roc_df, temp_df], axis=0)
    
    temp_df = pd.DataFrame(prn_list).transpose()
    temp_df.columns = df_columns
    prn_df = pd.concat([prn_df, temp_df], axis=0)


... Processing arrhythmia.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.7687, precision @ rank n:0.3571, execution time: 1.0545s




Cluster-based Local Outlier Factor ROC:0.7684, precision @ rank n:0.4643, execution time: 1.0546s
Feature Bagging ROC:0.7799, precision @ rank n:0.5, execution time: 0.5049s
Histogram-base Outlier Detection (HBOS) ROC:0.8511, precision @ rank n:0.5714, execution time: 1.4386s
Isolation Forest ROC:0.8478, precision @ rank n:0.5357, execution time: 0.3801s
K Nearest Neighbors (KNN) ROC:0.782, precision @ rank n:0.5, execution time: 0.0772s
Local Outlier Factor (LOF) ROC:0.7787, precision @ rank n:0.4643, execution time: 0.0665s




Minimum Covariance Determinant (MCD) ROC:0.8228, precision @ rank n:0.4286, execution time: 0.4535s
One-class SVM (OCSVM) ROC:0.7986, precision @ rank n:0.5, execution time: 0.0415s
Principal Component Analysis (PCA) ROC:0.7997, precision @ rank n:0.5, execution time: 0.0302s

... Processing cardio.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.5763, precision @ rank n:0.1875, execution time: 0.3329s
Cluster-based Local Outlier Factor ROC:0.8221, precision @ rank n:0.4844, execution time: 0.1137s




Feature Bagging ROC:0.4879, precision @ rank n:0.1406, execution time: 0.6269s
Histogram-base Outlier Detection (HBOS) ROC:0.8453, precision @ rank n:0.4688, execution time: 0.0057s
Isolation Forest ROC:0.9316, precision @ rank n:0.4531, execution time: 0.3772s
K Nearest Neighbors (KNN) ROC:0.6959, precision @ rank n:0.2812, execution time: 0.1182s
Local Outlier Factor (LOF) ROC:0.4715, precision @ rank n:0.125, execution time: 0.0795s




Minimum Covariance Determinant (MCD) ROC:0.8778, precision @ rank n:0.3906, execution time: 0.4565s
One-class SVM (OCSVM) ROC:0.9507, precision @ rank n:0.5938, execution time: 0.0591s
Principal Component Analysis (PCA) ROC:0.9638, precision @ rank n:0.6875, execution time: 0.0045s

... Processing glass.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.7104, precision @ rank n:0.25, execution time: 0.0328s
Cluster-based Local Outlier Factor ROC:0.8506, precision @ rank n:0.25, execution time: 0.0405s
Feature Bagging ROC:0.7043, precision @ rank n:0.25, execution time: 0.0332s
Histogram-base Outlier Detection (HBOS) ROC:0.6524, precision @ rank n:0.0, execution time: 0.0025s




Isolation Forest ROC:0.7195, precision @ rank n:0.25, execution time: 0.3004s
K Nearest Neighbors (KNN) ROC:0.7805, precision @ rank n:0.25, execution time: 0.0078s
Local Outlier Factor (LOF) ROC:0.7774, precision @ rank n:0.25, execution time: 0.0024s
Minimum Covariance Determinant (MCD) ROC:0.7165, precision @ rank n:0.0, execution time: 0.0342s
One-class SVM (OCSVM) ROC:0.6189, precision @ rank n:0.25, execution time: 0.0014s
Principal Component Analysis (PCA) ROC:0.622, precision @ rank n:0.25, execution time: 0.0028s

... Processing ionosphere.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.9004, precision @ rank n:0.8214, execution time: 0.0552s
Cluster-based Local Outlier Factor ROC:0.8952, precision @ rank n:0.8036, execution time: 0.0419s




Feature Bagging ROC:0.8933, precision @ rank n:0.75, execution time: 0.0709s
Histogram-base Outlier Detection (HBOS) ROC:0.5195, precision @ rank n:0.3393, execution time: 0.0074s
Isolation Forest ROC:0.8294, precision @ rank n:0.6607, execution time: 0.3215s
K Nearest Neighbors (KNN) ROC:0.9134, precision @ rank n:0.8393, execution time: 0.0147s
Local Outlier Factor (LOF) ROC:0.8989, precision @ rank n:0.75, execution time: 0.0062s
Minimum Covariance Determinant (MCD) ROC:0.9399, precision @ rank n:0.8571, execution time: 0.0596s
One-class SVM (OCSVM) ROC:0.8372, precision @ rank n:0.7143, execution time: 0.0053s
Principal Component Analysis (PCA) ROC:0.7971, precision @ rank n:0.5893, execution time: 0.0035s

... Processing letter.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.8465, precision @ rank n:0.275, execution time: 0.3473s
Cluster-based Local Outlier Factor ROC:0.7423, precision @ rank n:0.175, execution time: 0.0955s




Feature Bagging ROC:0.866, precision @ rank n:0.4, execution time: 0.6504s
Histogram-base Outlier Detection (HBOS) ROC:0.5728, precision @ rank n:0.125, execution time: 0.0098s
Isolation Forest ROC:0.5836, precision @ rank n:0.05, execution time: 0.4187s
K Nearest Neighbors (KNN) ROC:0.845, precision @ rank n:0.3, execution time: 0.1241s
Local Outlier Factor (LOF) ROC:0.8409, precision @ rank n:0.325, execution time: 0.0848s
Minimum Covariance Determinant (MCD) ROC:0.7499, precision @ rank n:0.075, execution time: 0.9183s
One-class SVM (OCSVM) ROC:0.5744, precision @ rank n:0.1, execution time: 0.0617s
Principal Component Analysis (PCA) ROC:0.48, precision @ rank n:0.05, execution time: 0.0032s

... Processing lympho.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.9382, precision @ rank n:0.4, execution time: 0.0227s
Cluster-based Local Outlier Factor ROC:0.9709, precision @ rank n:0.6, execution time: 0.039s
Feature Bagging ROC:0.9673, precision @ rank n:0.6, execution time: 0.0293s



Isolation Forest ROC:0.9855, precision @ rank n:0.6, execution time: 0.3031s
K Nearest Neighbors (KNN) ROC:0.9636, precision @ rank n:0.6, execution time: 0.0068s
Local Outlier Factor (LOF) ROC:0.9636, precision @ rank n:0.6, execution time: 0.0022s
Minimum Covariance Determinant (MCD) ROC:0.9164, precision @ rank n:0.6, execution time: 0.0327s
One-class SVM (OCSVM) ROC:0.9636, precision @ rank n:0.6, execution time: 0.0013s
Principal Component Analysis (PCA) ROC:0.9818, precision @ rank n:0.8, execution time: 0.0022s

... Processing mnist.mat ...




Angle-based Outlier Detector (ABOD) ROC:0.7813, precision @ rank n:0.3562, execution time: 6.6281s




Cluster-based Local Outlier Factor ROC:0.8447, precision @ rank n:0.4007, execution time: 0.5156s
Feature Bagging ROC:0.7259, precision @ rank n:0.3664, execution time: 47.386s
Histogram-base Outlier Detection (HBOS) ROC:0.5675, precision @ rank n:0.1199, execution time: 0.0419s
Isolation Forest ROC:0.7813, precision @ rank n:0.3116, execution time: 1.2084s
K Nearest Neighbors (KNN) ROC:0.8409, precision @ rank n:0.4144, execution time: 6.1721s
Local Outlier Factor (LOF) ROC:0.7085, precision @ rank n:0.339, execution time: 5.9689s




Minimum Covariance Determinant (MCD) ROC:0.863, precision @ rank n:0.3973, execution time: 1.6381s
One-class SVM (OCSVM) ROC:0.8417, precision @ rank n:0.3801, execution time: 4.3068s
Principal Component Analysis (PCA) ROC:0.8396, precision @ rank n:0.3767, execution time: 0.0592s

... Processing musk.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.0809, precision @ rank n:0.0333, execution time: 2.0415s
Cluster-based Local Outlier Factor ROC:1.0, precision @ rank n:1.0, execution time: 0.1701s




Feature Bagging ROC:0.5228, precision @ rank n:0.1667, execution time: 12.5647s
Histogram-base Outlier Detection (HBOS) ROC:0.9999, precision @ rank n:0.9667, execution time: 0.0572s
Isolation Forest ROC:0.9992, precision @ rank n:0.9, execution time: 0.7536s
K Nearest Neighbors (KNN) ROC:0.7348, precision @ rank n:0.2333, execution time: 1.717s
Local Outlier Factor (LOF) ROC:0.5323, precision @ rank n:0.1333, execution time: 1.5719s
Minimum Covariance Determinant (MCD) ROC:1.0, precision @ rank n:0.9667, execution time: 6.5458s
One-class SVM (OCSVM) ROC:1.0, precision @ rank n:1.0, execution time: 1.1813s
Principal Component Analysis (PCA) ROC:1.0, precision @ rank n:1.0, execution time: 0.064s

... Processing optdigits.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.4428, precision @ rank n:0.0161, execution time: 2.1754s




Cluster-based Local Outlier Factor ROC:0.7852, precision @ rank n:0.0, execution time: 0.2579s
Feature Bagging ROC:0.4641, precision @ rank n:0.0484, execution time: 12.652s
Histogram-base Outlier Detection (HBOS) ROC:0.8822, precision @ rank n:0.2581, execution time: 0.0263s
Isolation Forest ROC:0.5442, precision @ rank n:0.0161, execution time: 0.7788s
K Nearest Neighbors (KNN) ROC:0.3824, precision @ rank n:0.0, execution time: 1.6282s
Local Outlier Factor (LOF) ROC:0.4584, precision @ rank n:0.0484, execution time: 1.5249s




Minimum Covariance Determinant (MCD) ROC:0.3486, precision @ rank n:0.0, execution time: 0.8743s
One-class SVM (OCSVM) ROC:0.4972, precision @ rank n:0.0, execution time: 1.1959s
Principal Component Analysis (PCA) ROC:0.504, precision @ rank n:0.0, execution time: 0.0203s

... Processing pendigits.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.7008, precision @ rank n:0.0308, execution time: 1.2189s
Cluster-based Local Outlier Factor ROC:0.9609, precision @ rank n:0.3077, execution time: 0.1707s




Feature Bagging ROC:0.4687, precision @ rank n:0.0462, execution time: 3.4684s
Histogram-base Outlier Detection (HBOS) ROC:0.9294, precision @ rank n:0.2615, execution time: 0.0096s
Isolation Forest ROC:0.9482, precision @ rank n:0.2615, execution time: 0.5897s
K Nearest Neighbors (KNN) ROC:0.7602, precision @ rank n:0.0462, execution time: 0.4896s
Local Outlier Factor (LOF) ROC:0.481, precision @ rank n:0.0462, execution time: 0.4553s
Minimum Covariance Determinant (MCD) ROC:0.8271, precision @ rank n:0.0615, execution time: 1.7991s
One-class SVM (OCSVM) ROC:0.93, precision @ rank n:0.2923, execution time: 0.7097s
Principal Component Analysis (PCA) ROC:0.9332, precision @ rank n:0.3385, execution time: 0.0053s

... Processing pima.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.6757, precision @ rank n:0.5106, execution time: 0.1171s
Cluster-based Local Outlier Factor ROC:0.684, precision @ rank n:0.4681, execution time: 0.063s




Feature Bagging ROC:0.6446, precision @ rank n:0.4468, execution time: 0.0864s
Histogram-base Outlier Detection (HBOS) ROC:0.7169, precision @ rank n:0.5213, execution time: 0.0024s
Isolation Forest ROC:0.6777, precision @ rank n:0.4787, execution time: 0.325s
K Nearest Neighbors (KNN) ROC:0.7252, precision @ rank n:0.5106, execution time: 0.0248s
Local Outlier Factor (LOF) ROC:0.6604, precision @ rank n:0.4787, execution time: 0.0088s
Minimum Covariance Determinant (MCD) ROC:0.7047, precision @ rank n:0.4787, execution time: 0.0481s
One-class SVM (OCSVM) ROC:0.6423, precision @ rank n:0.4574, execution time: 0.0068s
Principal Component Analysis (PCA) ROC:0.6639, precision @ rank n:0.5, execution time: 0.0017s

... Processing satellite.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.5681, precision @ rank n:0.3918, execution time: 1.549s
Cluster-based Local Outlier Factor ROC:0.7234, precision @ rank n:0.5574, execution time: 0.1876s




Feature Bagging ROC:0.557, precision @ rank n:0.4051, execution time: 5.5848s
Histogram-base Outlier Detection (HBOS) ROC:0.7393, precision @ rank n:0.5466, execution time: 0.0191s
Isolation Forest ROC:0.7094, precision @ rank n:0.578, execution time: 0.6481s
K Nearest Neighbors (KNN) ROC:0.6781, precision @ rank n:0.4994, execution time: 0.8517s
Local Outlier Factor (LOF) ROC:0.5551, precision @ rank n:0.4051, execution time: 0.7825s
Minimum Covariance Determinant (MCD) ROC:0.792, precision @ rank n:0.6747, execution time: 1.5436s
One-class SVM (OCSVM) ROC:0.636, precision @ rank n:0.5224, execution time: 1.0644s
Principal Component Analysis (PCA) ROC:0.5783, precision @ rank n:0.4559, execution time: 0.0124s

... Processing satimage-2.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.86, precision @ rank n:0.2593, execution time: 1.3337s
Cluster-based Local Outlier Factor ROC:0.9987, precision @ rank n:0.8889, execution time: 0.1692s




Feature Bagging ROC:0.4971, precision @ rank n:0.0741, execution time: 4.8798s
Histogram-base Outlier Detection (HBOS) ROC:0.9837, precision @ rank n:0.5926, execution time: 0.0149s
Isolation Forest ROC:0.9973, precision @ rank n:0.8889, execution time: 0.6116s
K Nearest Neighbors (KNN) ROC:0.9505, precision @ rank n:0.3704, execution time: 0.7077s
Local Outlier Factor (LOF) ROC:0.5006, precision @ rank n:0.0741, execution time: 0.6159s
Minimum Covariance Determinant (MCD) ROC:0.9946, precision @ rank n:0.5185, execution time: 1.551s
One-class SVM (OCSVM) ROC:0.9976, precision @ rank n:0.9259, execution time: 0.8748s
Principal Component Analysis (PCA) ROC:0.9841, precision @ rank n:0.8519, execution time: 0.0121s

... Processing shuttle.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.6186, precision @ rank n:0.1918, execution time: 12.1159s




Cluster-based Local Outlier Factor ROC:0.6286, precision @ rank n:0.2336, execution time: 0.4254s
Feature Bagging ROC:0.5211, precision @ rank n:0.111, execution time: 43.4471s
Histogram-base Outlier Detection (HBOS) ROC:0.9851, precision @ rank n:0.9857, execution time: 0.0168s
Isolation Forest ROC:0.9972, precision @ rank n:0.9337, execution time: 2.2366s
K Nearest Neighbors (KNN) ROC:0.645, precision @ rank n:0.2199, execution time: 7.0271s
Local Outlier Factor (LOF) ROC:0.5347, precision @ rank n:0.1406, execution time: 9.3012s






Minimum Covariance Determinant (MCD) ROC:0.9903, precision @ rank n:0.7534, execution time: 9.1995s
One-class SVM (OCSVM) ROC:0.9922, precision @ rank n:0.9553, execution time: 35.9928s
Principal Component Analysis (PCA) ROC:0.9902, precision @ rank n:0.9503, execution time: 0.0196s

... Processing vertebral.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.2797, precision @ rank n:0.0, execution time: 0.036s
Cluster-based Local Outlier Factor ROC:0.3908, precision @ rank n:0.0, execution time: 0.0424s
Feature Bagging ROC:0.3027, precision @ rank n:0.0, execution time: 0.0323s
Histogram-base Outlier Detection (HBOS) ROC:0.2695, precision @ rank n:0.0, execution time: 0.0018s




Isolation Forest ROC:0.3576, precision @ rank n:0.0, execution time: 0.3052s
K Nearest Neighbors (KNN) ROC:0.318, precision @ rank n:0.0, execution time: 0.0089s
Local Outlier Factor (LOF) ROC:0.318, precision @ rank n:0.0, execution time: 0.0026s
Minimum Covariance Determinant (MCD) ROC:0.3308, precision @ rank n:0.0, execution time: 0.0436s
One-class SVM (OCSVM) ROC:0.4087, precision @ rank n:0.0, execution time: 0.0015s
Principal Component Analysis (PCA) ROC:0.3397, precision @ rank n:0.0, execution time: 0.0016s

... Processing vowels.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.9521, precision @ rank n:0.4706, execution time: 0.2279s
Cluster-based Local Outlier Factor ROC:0.9278, precision @ rank n:0.4118, execution time: 0.0701s




Feature Bagging ROC:0.9385, precision @ rank n:0.3529, execution time: 0.2394s
Histogram-base Outlier Detection (HBOS) ROC:0.6758, precision @ rank n:0.1765, execution time: 0.0037s
Isolation Forest ROC:0.7469, precision @ rank n:0.1176, execution time: 0.357s
K Nearest Neighbors (KNN) ROC:0.9568, precision @ rank n:0.5294, execution time: 0.061s
Local Outlier Factor (LOF) ROC:0.9345, precision @ rank n:0.4118, execution time: 0.0284s
Minimum Covariance Determinant (MCD) ROC:0.6779, precision @ rank n:0.0, execution time: 0.6659s
One-class SVM (OCSVM) ROC:0.7415, precision @ rank n:0.2941, execution time: 0.0278s
Principal Component Analysis (PCA) ROC:0.5787, precision @ rank n:0.1176, execution time: 0.0019s

... Processing wbc.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.9232, precision @ rank n:0.3, execution time: 0.0589s
Cluster-based Local Outlier Factor ROC:0.9063, precision @ rank n:0.6, execution time: 0.0551s




Feature Bagging ROC:0.9415, precision @ rank n:0.5, execution time: 0.0684s
Histogram-base Outlier Detection (HBOS) ROC:0.9592, precision @ rank n:0.7, execution time: 0.0078s
Isolation Forest ROC:0.9451, precision @ rank n:0.5, execution time: 0.3063s
K Nearest Neighbors (KNN) ROC:0.9437, precision @ rank n:0.5, execution time: 0.0145s
Local Outlier Factor (LOF) ROC:0.9352, precision @ rank n:0.4, execution time: 0.0067s
Minimum Covariance Determinant (MCD) ROC:0.8986, precision @ rank n:0.4, execution time: 0.0537s
One-class SVM (OCSVM) ROC:0.9408, precision @ rank n:0.5, execution time: 0.0043s
Principal Component Analysis (PCA) ROC:0.9324, precision @ rank n:0.6, execution time: 0.0036s


## ROC Dataframe

In [16]:
roc_df

Unnamed: 0,Data,#Samples,# Dimensions,Outlier Perc,ABOD,CBLOF,FB,HBOS,IForest,KNN,LOF,MCD,OCSVM,PCA
0,arrhythmia,452,274,14.6018,0.7687,0.7684,0.7799,0.8511,0.8478,0.782,0.7787,0.8228,0.7986,0.7997
0,cardio,1831,21,9.6122,0.5763,0.8221,0.4879,0.8453,0.9316,0.6959,0.4715,0.8778,0.9507,0.9638
0,glass,214,9,4.2056,0.7104,0.8506,0.7043,0.6524,0.7195,0.7805,0.7774,0.7165,0.6189,0.622
0,ionosphere,351,33,35.8974,0.9004,0.8952,0.8933,0.5195,0.8294,0.9134,0.8989,0.9399,0.8372,0.7971
0,letter,1600,32,6.25,0.8465,0.7423,0.866,0.5728,0.5836,0.845,0.8409,0.7499,0.5744,0.48
0,lympho,148,18,4.0541,0.9382,0.9709,0.9673,0.9964,0.9855,0.9636,0.9636,0.9164,0.9636,0.9818
0,mnist,7603,100,9.2069,0.7813,0.8447,0.7259,0.5675,0.7813,0.8409,0.7085,0.863,0.8417,0.8396
0,musk,3062,166,3.1679,0.0809,1.0,0.5228,0.9999,0.9992,0.7348,0.5323,1.0,1.0,1.0
0,optdigits,5216,64,2.8758,0.4428,0.7852,0.4641,0.8822,0.5442,0.3824,0.4584,0.3486,0.4972,0.504
0,pendigits,6870,16,2.2707,0.7008,0.9609,0.4687,0.9294,0.9482,0.7602,0.481,0.8271,0.93,0.9332


## Precision Dataframe

In [17]:
prn_df

Unnamed: 0,Data,#Samples,# Dimensions,Outlier Perc,ABOD,CBLOF,FB,HBOS,IForest,KNN,LOF,MCD,OCSVM,PCA
0,arrhythmia,452,274,14.6018,0.3571,0.4643,0.5,0.5714,0.5357,0.5,0.4643,0.4286,0.5,0.5
0,cardio,1831,21,9.6122,0.1875,0.4844,0.1406,0.4688,0.4531,0.2812,0.125,0.3906,0.5938,0.6875
0,glass,214,9,4.2056,0.25,0.25,0.25,0.0,0.25,0.25,0.25,0.0,0.25,0.25
0,ionosphere,351,33,35.8974,0.8214,0.8036,0.75,0.3393,0.6607,0.8393,0.75,0.8571,0.7143,0.5893
0,letter,1600,32,6.25,0.275,0.175,0.4,0.125,0.05,0.3,0.325,0.075,0.1,0.05
0,lympho,148,18,4.0541,0.4,0.6,0.6,0.8,0.6,0.6,0.6,0.6,0.6,0.8
0,mnist,7603,100,9.2069,0.3562,0.4007,0.3664,0.1199,0.3116,0.4144,0.339,0.3973,0.3801,0.3767
0,musk,3062,166,3.1679,0.0333,1.0,0.1667,0.9667,0.9,0.2333,0.1333,0.9667,1.0,1.0
0,optdigits,5216,64,2.8758,0.0161,0.0,0.0484,0.2581,0.0161,0.0,0.0484,0.0,0.0,0.0
0,pendigits,6870,16,2.2707,0.0308,0.3077,0.0462,0.2615,0.2615,0.0462,0.0462,0.0615,0.2923,0.3385


## Execution Time Dataframe

In [18]:
time_df

Unnamed: 0,Data,#Samples,# Dimensions,Outlier Perc,ABOD,CBLOF,FB,HBOS,IForest,KNN,LOF,MCD,OCSVM,PCA
0,arrhythmia,452,274,14.6018,1.0545,1.0546,0.5049,1.4386,0.3801,0.0772,0.0665,0.4535,0.0415,0.0302
0,cardio,1831,21,9.6122,0.3329,0.1137,0.6269,0.0057,0.3772,0.1182,0.0795,0.4565,0.0591,0.0045
0,glass,214,9,4.2056,0.0328,0.0405,0.0332,0.0025,0.3004,0.0078,0.0024,0.0342,0.0014,0.0028
0,ionosphere,351,33,35.8974,0.0552,0.0419,0.0709,0.0074,0.3215,0.0147,0.0062,0.0596,0.0053,0.0035
0,letter,1600,32,6.25,0.3473,0.0955,0.6504,0.0098,0.4187,0.1241,0.0848,0.9183,0.0617,0.0032
0,lympho,148,18,4.0541,0.0227,0.039,0.0293,0.0041,0.3031,0.0068,0.0022,0.0327,0.0013,0.0022
0,mnist,7603,100,9.2069,6.6281,0.5156,47.386,0.0419,1.2084,6.1721,5.9689,1.6381,4.3068,0.0592
0,musk,3062,166,3.1679,2.0415,0.1701,12.5647,0.0572,0.7536,1.717,1.5719,6.5458,1.1813,0.064
0,optdigits,5216,64,2.8758,2.1754,0.2579,12.652,0.0263,0.7788,1.6282,1.5249,0.8743,1.1959,0.0203
0,pendigits,6870,16,2.2707,1.2189,0.1707,3.4684,0.0096,0.5897,0.4896,0.4553,1.7991,0.7097,0.0053
