In [1]:
import os
import sys
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from scipy.io import loadmat



# Import Pyod and the methods

In [2]:
from pyod.models.pca import PCA
from pyod.models.mcd import MCD
from pyod.models.ocsvm import OCSVM
from pyod.models.lof import LOF
from pyod.models.cblof import CBLOF
from pyod.models.knn import KNN
from pyod.models.hbos import HBOS
from pyod.models.abod import ABOD
from pyod.models.iforest import IForest
from pyod.models.feature_bagging import FeatureBagging



# Import Metrics Package

In [3]:
from pyod.utils.utility import standardizer
from pyod.utils.utility import precision_n_scores
from sklearn.metrics import roc_auc_score


In [4]:
mat_file_list=['arrhythmia.mat','cardio.mat','glass.mat','ionosphere.mat','letter.mat','lympho.mat','mnist.mat','musk.mat','optdigits.mat','pendigits.mat','pima.mat','satellite.mat','satimage-2.mat','shuttle.mat','shuttle.mat','vertebral.mat','vowels.mat','wbc.mat']

In [5]:
mat_file_list

['arrhythmia.mat',
 'cardio.mat',
 'glass.mat',
 'ionosphere.mat',
 'letter.mat',
 'lympho.mat',
 'mnist.mat',
 'musk.mat',
 'optdigits.mat',
 'pendigits.mat',
 'pima.mat',
 'satellite.mat',
 'satimage-2.mat',
 'shuttle.mat',
 'shuttle.mat',
 'vertebral.mat',
 'vowels.mat',
 'wbc.mat']

# load Mat File

In [6]:
from scipy.io import loadmat

In [7]:
data=loadmat('data/cardio.mat')

In [8]:
data

{'__header__': b'MATLAB 5.0 MAT-file, written by Octave 3.8.0, 2014-12-18 10:48:09 UTC',
 '__version__': '1.0',
 '__globals__': [],
 'X': array([[ 0.00491231,  0.69319077, -0.20364049, ...,  0.23149795,
         -0.28978574, -0.49329397],
        [ 0.11072935, -0.07990259, -0.20364049, ...,  0.09356344,
         -0.25638541, -0.49329397],
        [ 0.21654639, -0.27244466, -0.20364049, ...,  0.02459619,
         -0.25638541,  1.14001753],
        ...,
        [-0.41835583, -0.91998844, -0.16463485, ..., -1.49268341,
          0.24461959, -0.49329397],
        [-0.41835583, -0.91998844, -0.15093411, ..., -1.42371616,
          0.14441859, -0.49329397],
        [-0.41835583, -0.91998844, -0.20364049, ..., -1.28578165,
          3.58465295, -0.49329397]]),
 'y': array([[0.],
        [0.],
        [0.],
        ...,
        [1.],
        [1.],
        [1.]])}

In [9]:
len(data)

5

In [10]:
data.keys()

dict_keys(['__header__', '__version__', '__globals__', 'X', 'y'])

In [11]:
data.values()

dict_values([b'MATLAB 5.0 MAT-file, written by Octave 3.8.0, 2014-12-18 10:48:09 UTC', '1.0', [], array([[ 0.00491231,  0.69319077, -0.20364049, ...,  0.23149795,
        -0.28978574, -0.49329397],
       [ 0.11072935, -0.07990259, -0.20364049, ...,  0.09356344,
        -0.25638541, -0.49329397],
       [ 0.21654639, -0.27244466, -0.20364049, ...,  0.02459619,
        -0.25638541,  1.14001753],
       ...,
       [-0.41835583, -0.91998844, -0.16463485, ..., -1.49268341,
         0.24461959, -0.49329397],
       [-0.41835583, -0.91998844, -0.15093411, ..., -1.42371616,
         0.14441859, -0.49329397],
       [-0.41835583, -0.91998844, -0.20364049, ..., -1.28578165,
         3.58465295, -0.49329397]]), array([[0.],
       [0.],
       [0.],
       ...,
       [1.],
       [1.],
       [1.]])])

# Input(Independent) Feature Shape in Mat file format

In [12]:
type(data['X']),data['X'].shape

(numpy.ndarray, (1831, 21))

# Dependent/ Target /Output Feature shape

In [13]:
type(data['y']),data['y'].shape

(numpy.ndarray, (1831, 1))

In [14]:
df_columns = ['Data', '#Samples', '# Dimensions', 'Outlier Perc',
              'ABOD', 'CBLOF', 'FB', 'HBOS', 'IForest', 'KNN', 'LOF', 'MCD',
              'OCSVM', 'PCA']

# ROC Performance evulotion table

In [15]:
roc_df = pd.DataFrame(columns=df_columns)

In [16]:
roc_df

Unnamed: 0,Data,#Samples,# Dimensions,Outlier Perc,ABOD,CBLOF,FB,HBOS,IForest,KNN,LOF,MCD,OCSVM,PCA


# precision_n_scores - Performance evulotion table

In [19]:
prn_df = pd.DataFrame(columns=df_columns)
prn_df

Unnamed: 0,Data,#Samples,# Dimensions,Outlier Perc,ABOD,CBLOF,FB,HBOS,IForest,KNN,LOF,MCD,OCSVM,PCA


# Time dataframe

In [20]:
time_df = pd.DataFrame(columns=df_columns)
time_df

Unnamed: 0,Data,#Samples,# Dimensions,Outlier Perc,ABOD,CBLOF,FB,HBOS,IForest,KNN,LOF,MCD,OCSVM,PCA


# Exploraing All Mat files

In [21]:
from time import time
random_state = np.random.RandomState(42)

for mat_file in mat_file_list:
    print("\n... Processing", mat_file, '...')
    mat = loadmat(os.path.join('data', mat_file))

    X = mat['X']
    y = mat['y'].ravel()
    outliers_fraction = np.count_nonzero(y) / len(y)
    outliers_percentage = round(outliers_fraction * 100, ndigits=4)

    # construct containers for saving results
    roc_list = [mat_file[:-4], X.shape[0], X.shape[1], outliers_percentage]
    prn_list = [mat_file[:-4], X.shape[0], X.shape[1], outliers_percentage]
    time_list = [mat_file[:-4], X.shape[0], X.shape[1], outliers_percentage]

    # 60% data for training and 40% for testing
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4,
                                                        random_state=random_state)

    # standardizing data for processing
    X_train_norm, X_test_norm = standardizer(X_train, X_test)

    classifiers = {'Angle-based Outlier Detector (ABOD)': ABOD(
        contamination=outliers_fraction),
        'Cluster-based Local Outlier Factor': CBLOF(
            contamination=outliers_fraction, check_estimator=False,
            random_state=random_state),
        'Feature Bagging': FeatureBagging(contamination=outliers_fraction,
                                          random_state=random_state),
        'Histogram-base Outlier Detection (HBOS)': HBOS(
            contamination=outliers_fraction),
        'Isolation Forest': IForest(contamination=outliers_fraction,
                                    random_state=random_state),
        'K Nearest Neighbors (KNN)': KNN(contamination=outliers_fraction),
        'Local Outlier Factor (LOF)': LOF(
            contamination=outliers_fraction),
        'Minimum Covariance Determinant (MCD)': MCD(
            contamination=outliers_fraction, random_state=random_state),
        'One-class SVM (OCSVM)': OCSVM(contamination=outliers_fraction),
        'Principal Component Analysis (PCA)': PCA(
            contamination=outliers_fraction, random_state=random_state),
    }

    for clf_name, clf in classifiers.items():
        t0 = time()
        clf.fit(X_train_norm)
        test_scores = clf.decision_function(X_test_norm)
        t1 = time()
        duration = round(t1 - t0, ndigits=4)
        time_list.append(duration)

        roc = round(roc_auc_score(y_test, test_scores), ndigits=4)
        prn = round(precision_n_scores(y_test, test_scores), ndigits=4)

        print('{clf_name} ROC:{roc}, precision @ rank n:{prn}, '
              'execution time: {duration}s'.format(
            clf_name=clf_name, roc=roc, prn=prn, duration=duration))

        roc_list.append(roc)
        prn_list.append(prn)

    temp_df = pd.DataFrame(time_list).transpose()
    temp_df.columns = df_columns
    time_df = pd.concat([time_df, temp_df], axis=0)

    temp_df = pd.DataFrame(roc_list).transpose()
    temp_df.columns = df_columns
    roc_df = pd.concat([roc_df, temp_df], axis=0)

    temp_df = pd.DataFrame(prn_list).transpose()
    temp_df.columns = df_columns
    prn_df = pd.concat([prn_df, temp_df], axis=0)


... Processing arrhythmia.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.7687, precision @ rank n:0.3571, execution time: 0.1201s




Cluster-based Local Outlier Factor ROC:0.7684, precision @ rank n:0.4643, execution time: 0.0887s
Feature Bagging ROC:0.7799, precision @ rank n:0.5, execution time: 0.5274s
Histogram-base Outlier Detection (HBOS) ROC:0.8511, precision @ rank n:0.5714, execution time: 0.0469s
Isolation Forest ROC:0.8478, precision @ rank n:0.5357, execution time: 0.3629s
K Nearest Neighbors (KNN) ROC:0.782, precision @ rank n:0.5, execution time: 0.0748s
Local Outlier Factor (LOF) ROC:0.7787, precision @ rank n:0.4643, execution time: 0.0619s




Minimum Covariance Determinant (MCD) ROC:0.8228, precision @ rank n:0.4286, execution time: 0.5303s
One-class SVM (OCSVM) ROC:0.7986, precision @ rank n:0.5, execution time: 0.0456s
Principal Component Analysis (PCA) ROC:0.7997, precision @ rank n:0.5, execution time: 0.0519s

... Processing cardio.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.5763, precision @ rank n:0.1875, execution time: 0.3744s
Cluster-based Local Outlier Factor ROC:0.8221, precision @ rank n:0.4844, execution time: 0.1353s




Feature Bagging ROC:0.4879, precision @ rank n:0.1406, execution time: 0.7584s
Histogram-base Outlier Detection (HBOS) ROC:0.8453, precision @ rank n:0.4688, execution time: 0.006s
Isolation Forest ROC:0.9316, precision @ rank n:0.4531, execution time: 0.333s
K Nearest Neighbors (KNN) ROC:0.6959, precision @ rank n:0.2812, execution time: 0.1195s
Local Outlier Factor (LOF) ROC:0.4715, precision @ rank n:0.125, execution time: 0.0938s




Minimum Covariance Determinant (MCD) ROC:0.8778, precision @ rank n:0.3906, execution time: 0.4738s
One-class SVM (OCSVM) ROC:0.9507, precision @ rank n:0.5938, execution time: 0.1032s
Principal Component Analysis (PCA) ROC:0.9638, precision @ rank n:0.6875, execution time: 0.003s

... Processing glass.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.7104, precision @ rank n:0.25, execution time: 0.0369s




Cluster-based Local Outlier Factor ROC:0.8506, precision @ rank n:0.25, execution time: 0.0459s
Feature Bagging ROC:0.7043, precision @ rank n:0.25, execution time: 0.0359s
Histogram-base Outlier Detection (HBOS) ROC:0.6524, precision @ rank n:0.0, execution time: 0.003s
Isolation Forest ROC:0.7195, precision @ rank n:0.25, execution time: 0.2554s
K Nearest Neighbors (KNN) ROC:0.7805, precision @ rank n:0.25, execution time: 0.008s
Local Outlier Factor (LOF) ROC:0.7774, precision @ rank n:0.25, execution time: 0.003s
Minimum Covariance Determinant (MCD) ROC:0.7165, precision @ rank n:0.0, execution time: 0.0289s
One-class SVM (OCSVM) ROC:0.6189, precision @ rank n:0.25, execution time: 0.001s
Principal Component Analysis (PCA) ROC:0.622, precision @ rank n:0.25, execution time: 0.002s

... Processing ionosphere.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.9004, precision @ rank n:0.8214, execution time: 0.0808s




Cluster-based Local Outlier Factor ROC:0.8952, precision @ rank n:0.8036, execution time: 0.0568s
Feature Bagging ROC:0.8933, precision @ rank n:0.75, execution time: 0.0848s
Histogram-base Outlier Detection (HBOS) ROC:0.5195, precision @ rank n:0.3393, execution time: 0.0156s
Isolation Forest ROC:0.8294, precision @ rank n:0.6607, execution time: 0.2951s
K Nearest Neighbors (KNN) ROC:0.9134, precision @ rank n:0.8393, execution time: 0.014s
Local Outlier Factor (LOF) ROC:0.8989, precision @ rank n:0.75, execution time: 0.005s
Minimum Covariance Determinant (MCD) ROC:0.9399, precision @ rank n:0.8571, execution time: 0.0638s
One-class SVM (OCSVM) ROC:0.8372, precision @ rank n:0.7143, execution time: 0.004s
Principal Component Analysis (PCA) ROC:0.7971, precision @ rank n:0.5893, execution time: 0.002s

... Processing letter.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.8465, precision @ rank n:0.275, execution time: 0.3367s
Cluster-based Local Outlier Factor ROC:0.7423, precision 



Feature Bagging ROC:0.866, precision @ rank n:0.4, execution time: 0.6938s
Histogram-base Outlier Detection (HBOS) ROC:0.5728, precision @ rank n:0.125, execution time: 0.001s
Isolation Forest ROC:0.5836, precision @ rank n:0.05, execution time: 0.3437s
K Nearest Neighbors (KNN) ROC:0.845, precision @ rank n:0.3, execution time: 0.1093s
Local Outlier Factor (LOF) ROC:0.8409, precision @ rank n:0.325, execution time: 0.0781s
Minimum Covariance Determinant (MCD) ROC:0.7499, precision @ rank n:0.075, execution time: 1.0679s
One-class SVM (OCSVM) ROC:0.5744, precision @ rank n:0.1, execution time: 0.0937s
Principal Component Analysis (PCA) ROC:0.48, precision @ rank n:0.05, execution time: 0.0s

... Processing lympho.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.9382, precision @ rank n:0.4, execution time: 0.0156s
Cluster-based Local Outlier Factor ROC:0.9709, precision @ rank n:0.6, execution time: 0.0312s
Feature Bagging ROC:0.9673, precision @ rank n:0.6, execution time: 0.0313s




Histogram-base Outlier Detection (HBOS) ROC:0.9964, precision @ rank n:0.8, execution time: 0.015s
Isolation Forest ROC:0.9855, precision @ rank n:0.6, execution time: 0.3012s
K Nearest Neighbors (KNN) ROC:0.9636, precision @ rank n:0.6, execution time: 0.0s
Local Outlier Factor (LOF) ROC:0.9636, precision @ rank n:0.6, execution time: 0.0s
Minimum Covariance Determinant (MCD) ROC:0.9164, precision @ rank n:0.6, execution time: 0.0313s
One-class SVM (OCSVM) ROC:0.9636, precision @ rank n:0.6, execution time: 0.0s
Principal Component Analysis (PCA) ROC:0.9818, precision @ rank n:0.8, execution time: 0.0156s

... Processing mnist.mat ...




Angle-based Outlier Detector (ABOD) ROC:0.7813, precision @ rank n:0.3562, execution time: 6.4872s




Cluster-based Local Outlier Factor ROC:0.8447, precision @ rank n:0.4007, execution time: 0.5746s
Feature Bagging ROC:0.7259, precision @ rank n:0.3664, execution time: 45.7333s
Histogram-base Outlier Detection (HBOS) ROC:0.5675, precision @ rank n:0.1199, execution time: 0.0399s
Isolation Forest ROC:0.7813, precision @ rank n:0.3116, execution time: 1.9078s
K Nearest Neighbors (KNN) ROC:0.8409, precision @ rank n:0.4144, execution time: 6.3023s
Local Outlier Factor (LOF) ROC:0.7085, precision @ rank n:0.339, execution time: 5.6043s




Minimum Covariance Determinant (MCD) ROC:0.863, precision @ rank n:0.3973, execution time: 2.5375s
One-class SVM (OCSVM) ROC:0.8417, precision @ rank n:0.3801, execution time: 3.7022s
Principal Component Analysis (PCA) ROC:0.8396, precision @ rank n:0.3767, execution time: 0.1176s

... Processing musk.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.0809, precision @ rank n:0.0333, execution time: 1.9202s




Cluster-based Local Outlier Factor ROC:1.0, precision @ rank n:1.0, execution time: 0.208s
Feature Bagging ROC:0.5228, precision @ rank n:0.1667, execution time: 11.6599s
Histogram-base Outlier Detection (HBOS) ROC:0.9999, precision @ rank n:0.9667, execution time: 0.0489s
Isolation Forest ROC:0.9992, precision @ rank n:0.9, execution time: 0.943s
K Nearest Neighbors (KNN) ROC:0.7348, precision @ rank n:0.2333, execution time: 1.4699s
Local Outlier Factor (LOF) ROC:0.5323, precision @ rank n:0.1333, execution time: 1.3888s
Minimum Covariance Determinant (MCD) ROC:1.0, precision @ rank n:0.9667, execution time: 9.8289s
One-class SVM (OCSVM) ROC:1.0, precision @ rank n:1.0, execution time: 1.0436s
Principal Component Analysis (PCA) ROC:1.0, precision @ rank n:1.0, execution time: 0.1237s

... Processing optdigits.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.4428, precision @ rank n:0.0161, execution time: 2.1752s




Cluster-based Local Outlier Factor ROC:0.7852, precision @ rank n:0.0, execution time: 0.2682s
Feature Bagging ROC:0.4641, precision @ rank n:0.0484, execution time: 11.9614s
Histogram-base Outlier Detection (HBOS) ROC:0.8822, precision @ rank n:0.2581, execution time: 0.0269s
Isolation Forest ROC:0.5442, precision @ rank n:0.0161, execution time: 0.7431s
K Nearest Neighbors (KNN) ROC:0.3824, precision @ rank n:0.0, execution time: 1.5379s
Local Outlier Factor (LOF) ROC:0.4584, precision @ rank n:0.0484, execution time: 1.4239s




Minimum Covariance Determinant (MCD) ROC:0.3486, precision @ rank n:0.0, execution time: 1.1388s
One-class SVM (OCSVM) ROC:0.4972, precision @ rank n:0.0, execution time: 1.2072s
Principal Component Analysis (PCA) ROC:0.504, precision @ rank n:0.0, execution time: 0.0434s

... Processing pendigits.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.7008, precision @ rank n:0.0308, execution time: 1.4831s




Cluster-based Local Outlier Factor ROC:0.9609, precision @ rank n:0.3077, execution time: 0.2593s
Feature Bagging ROC:0.4687, precision @ rank n:0.0462, execution time: 4.7927s
Histogram-base Outlier Detection (HBOS) ROC:0.9294, precision @ rank n:0.2615, execution time: 0.013s
Isolation Forest ROC:0.9482, precision @ rank n:0.2615, execution time: 0.7027s
K Nearest Neighbors (KNN) ROC:0.7602, precision @ rank n:0.0462, execution time: 0.6449s
Local Outlier Factor (LOF) ROC:0.481, precision @ rank n:0.0462, execution time: 0.5225s
Minimum Covariance Determinant (MCD) ROC:0.8271, precision @ rank n:0.0615, execution time: 1.9894s
One-class SVM (OCSVM) ROC:0.93, precision @ rank n:0.2923, execution time: 0.8339s
Principal Component Analysis (PCA) ROC:0.9332, precision @ rank n:0.3385, execution time: 0.007s

... Processing pima.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.6757, precision @ rank n:0.5106, execution time: 0.185s
Cluster-based Local Outlier Factor ROC:0.684, precision 



Histogram-base Outlier Detection (HBOS) ROC:0.7169, precision @ rank n:0.5213, execution time: 0.003s
Isolation Forest ROC:0.6777, precision @ rank n:0.4787, execution time: 0.3306s
K Nearest Neighbors (KNN) ROC:0.7252, precision @ rank n:0.5106, execution time: 0.0339s
Local Outlier Factor (LOF) ROC:0.6604, precision @ rank n:0.4787, execution time: 0.009s
Minimum Covariance Determinant (MCD) ROC:0.7047, precision @ rank n:0.4787, execution time: 0.0608s
One-class SVM (OCSVM) ROC:0.6423, precision @ rank n:0.4574, execution time: 0.013s
Principal Component Analysis (PCA) ROC:0.6639, precision @ rank n:0.5, execution time: 0.002s

... Processing satellite.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.5681, precision @ rank n:0.3918, execution time: 1.6485s




Cluster-based Local Outlier Factor ROC:0.7234, precision @ rank n:0.5574, execution time: 0.2234s
Feature Bagging ROC:0.557, precision @ rank n:0.4051, execution time: 6.8119s
Histogram-base Outlier Detection (HBOS) ROC:0.7393, precision @ rank n:0.5466, execution time: 0.0239s
Isolation Forest ROC:0.7094, precision @ rank n:0.578, execution time: 0.9108s
K Nearest Neighbors (KNN) ROC:0.6781, precision @ rank n:0.4994, execution time: 0.9241s
Local Outlier Factor (LOF) ROC:0.5551, precision @ rank n:0.4051, execution time: 0.8553s
Minimum Covariance Determinant (MCD) ROC:0.792, precision @ rank n:0.6747, execution time: 1.9334s
One-class SVM (OCSVM) ROC:0.636, precision @ rank n:0.5224, execution time: 1.2766s
Principal Component Analysis (PCA) ROC:0.5783, precision @ rank n:0.4559, execution time: 0.0189s

... Processing satimage-2.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.86, precision @ rank n:0.2593, execution time: 1.6295s




Cluster-based Local Outlier Factor ROC:0.9987, precision @ rank n:0.8889, execution time: 0.2304s
Feature Bagging ROC:0.4971, precision @ rank n:0.0741, execution time: 5.5044s
Histogram-base Outlier Detection (HBOS) ROC:0.9837, precision @ rank n:0.5926, execution time: 0.0139s
Isolation Forest ROC:0.9973, precision @ rank n:0.8889, execution time: 0.6483s
K Nearest Neighbors (KNN) ROC:0.9505, precision @ rank n:0.3704, execution time: 0.7181s
Local Outlier Factor (LOF) ROC:0.5006, precision @ rank n:0.0741, execution time: 0.6782s
Minimum Covariance Determinant (MCD) ROC:0.9946, precision @ rank n:0.5185, execution time: 1.9025s
One-class SVM (OCSVM) ROC:0.9976, precision @ rank n:0.9259, execution time: 1.0644s
Principal Component Analysis (PCA) ROC:0.9841, precision @ rank n:0.8519, execution time: 0.0239s

... Processing shuttle.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.6186, precision @ rank n:0.1918, execution time: 14.2908s




Cluster-based Local Outlier Factor ROC:0.6286, precision @ rank n:0.2336, execution time: 0.6084s
Feature Bagging ROC:0.5211, precision @ rank n:0.111, execution time: 48.568s
Histogram-base Outlier Detection (HBOS) ROC:0.9851, precision @ rank n:0.9857, execution time: 0.023s
Isolation Forest ROC:0.9972, precision @ rank n:0.9337, execution time: 4.0562s
K Nearest Neighbors (KNN) ROC:0.645, precision @ rank n:0.2199, execution time: 9.0017s
Local Outlier Factor (LOF) ROC:0.5347, precision @ rank n:0.1406, execution time: 11.8303s






Minimum Covariance Determinant (MCD) ROC:0.9903, precision @ rank n:0.7534, execution time: 11.9686s
One-class SVM (OCSVM) ROC:0.9922, precision @ rank n:0.9553, execution time: 42.3464s
Principal Component Analysis (PCA) ROC:0.9902, precision @ rank n:0.9503, execution time: 0.0389s

... Processing shuttle.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.6227, precision @ rank n:0.1938, execution time: 13.7536s




Cluster-based Local Outlier Factor ROC:0.6578, precision @ rank n:0.2262, execution time: 0.7037s
Feature Bagging ROC:0.5023, precision @ rank n:0.0669, execution time: 54.7988s
Histogram-base Outlier Detection (HBOS) ROC:0.9868, precision @ rank n:0.9386, execution time: 0.017s
Isolation Forest ROC:0.9975, precision @ rank n:0.9563, execution time: 2.7312s
K Nearest Neighbors (KNN) ROC:0.6549, precision @ rank n:0.2213, execution time: 9.2001s
Local Outlier Factor (LOF) ROC:0.5336, precision @ rank n:0.155, execution time: 10.6339s








Minimum Covariance Determinant (MCD) ROC:0.9897, precision @ rank n:0.7477, execution time: 11.2318s
One-class SVM (OCSVM) ROC:0.9923, precision @ rank n:0.9605, execution time: 40.7413s
Principal Component Analysis (PCA) ROC:0.9907, precision @ rank n:0.9577, execution time: 0.0339s

... Processing vertebral.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.3688, precision @ rank n:0.0, execution time: 0.0549s
Cluster-based Local Outlier Factor ROC:0.3891, precision @ rank n:0.0, execution time: 0.0479s




Feature Bagging ROC:0.3688, precision @ rank n:0.0, execution time: 0.0449s
Histogram-base Outlier Detection (HBOS) ROC:0.3617, precision @ rank n:0.0, execution time: 0.001s
Isolation Forest ROC:0.3539, precision @ rank n:0.0, execution time: 0.2673s
K Nearest Neighbors (KNN) ROC:0.3562, precision @ rank n:0.0, execution time: 0.009s
Local Outlier Factor (LOF) ROC:0.3562, precision @ rank n:0.0, execution time: 0.003s
Minimum Covariance Determinant (MCD) ROC:0.4203, precision @ rank n:0.0, execution time: 0.0389s
One-class SVM (OCSVM) ROC:0.4078, precision @ rank n:0.0, execution time: 0.002s
Principal Component Analysis (PCA) ROC:0.3633, precision @ rank n:0.0625, execution time: 0.0009s

... Processing vowels.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.8982, precision @ rank n:0.2857, execution time: 0.2853s
Cluster-based Local Outlier Factor ROC:0.8608, precision @ rank n:0.2143, execution time: 0.1017s




Feature Bagging ROC:0.9124, precision @ rank n:0.2143, execution time: 0.2934s
Histogram-base Outlier Detection (HBOS) ROC:0.7567, precision @ rank n:0.2143, execution time: 0.003s
Isolation Forest ROC:0.7758, precision @ rank n:0.2143, execution time: 0.3185s
K Nearest Neighbors (KNN) ROC:0.9746, precision @ rank n:0.3571, execution time: 0.0678s
Local Outlier Factor (LOF) ROC:0.9192, precision @ rank n:0.2857, execution time: 0.0339s
Minimum Covariance Determinant (MCD) ROC:0.6966, precision @ rank n:0.0714, execution time: 0.8462s
One-class SVM (OCSVM) ROC:0.8426, precision @ rank n:0.2857, execution time: 0.0319s
Principal Component Analysis (PCA) ROC:0.6907, precision @ rank n:0.2143, execution time: 0.002s

... Processing wbc.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.9573, precision @ rank n:0.5556, execution time: 0.0769s




Cluster-based Local Outlier Factor ROC:0.9751, precision @ rank n:0.6667, execution time: 0.0668s
Feature Bagging ROC:0.979, precision @ rank n:0.7778, execution time: 0.1142s
Histogram-base Outlier Detection (HBOS) ROC:0.9876, precision @ rank n:0.6667, execution time: 0.012s
Isolation Forest ROC:0.9674, precision @ rank n:0.5556, execution time: 0.2973s
K Nearest Neighbors (KNN) ROC:0.9713, precision @ rank n:0.4444, execution time: 0.016s
Local Outlier Factor (LOF) ROC:0.979, precision @ rank n:0.6667, execution time: 0.006s
Minimum Covariance Determinant (MCD) ROC:0.9658, precision @ rank n:0.5556, execution time: 0.0549s
One-class SVM (OCSVM) ROC:0.9782, precision @ rank n:0.6667, execution time: 0.008s
Principal Component Analysis (PCA) ROC:0.9775, precision @ rank n:0.6667, execution time: 0.004s


In [22]:
roc_df


Unnamed: 0,Data,#Samples,# Dimensions,Outlier Perc,ABOD,CBLOF,FB,HBOS,IForest,KNN,LOF,MCD,OCSVM,PCA
0,arrhythmia,452,274,14.6018,0.7687,0.7684,0.7799,0.8511,0.8478,0.782,0.7787,0.8228,0.7986,0.7997
0,cardio,1831,21,9.6122,0.5763,0.8221,0.4879,0.8453,0.9316,0.6959,0.4715,0.8778,0.9507,0.9638
0,glass,214,9,4.2056,0.7104,0.8506,0.7043,0.6524,0.7195,0.7805,0.7774,0.7165,0.6189,0.622
0,ionosphere,351,33,35.8974,0.9004,0.8952,0.8933,0.5195,0.8294,0.9134,0.8989,0.9399,0.8372,0.7971
0,letter,1600,32,6.25,0.8465,0.7423,0.866,0.5728,0.5836,0.845,0.8409,0.7499,0.5744,0.48
0,lympho,148,18,4.0541,0.9382,0.9709,0.9673,0.9964,0.9855,0.9636,0.9636,0.9164,0.9636,0.9818
0,mnist,7603,100,9.2069,0.7813,0.8447,0.7259,0.5675,0.7813,0.8409,0.7085,0.863,0.8417,0.8396
0,musk,3062,166,3.1679,0.0809,1.0,0.5228,0.9999,0.9992,0.7348,0.5323,1.0,1.0,1.0
0,optdigits,5216,64,2.8758,0.4428,0.7852,0.4641,0.8822,0.5442,0.3824,0.4584,0.3486,0.4972,0.504
0,pendigits,6870,16,2.2707,0.7008,0.9609,0.4687,0.9294,0.9482,0.7602,0.481,0.8271,0.93,0.9332


In [23]:
prn_df


Unnamed: 0,Data,#Samples,# Dimensions,Outlier Perc,ABOD,CBLOF,FB,HBOS,IForest,KNN,LOF,MCD,OCSVM,PCA
0,arrhythmia,452,274,14.6018,0.3571,0.4643,0.5,0.5714,0.5357,0.5,0.4643,0.4286,0.5,0.5
0,cardio,1831,21,9.6122,0.1875,0.4844,0.1406,0.4688,0.4531,0.2812,0.125,0.3906,0.5938,0.6875
0,glass,214,9,4.2056,0.25,0.25,0.25,0.0,0.25,0.25,0.25,0.0,0.25,0.25
0,ionosphere,351,33,35.8974,0.8214,0.8036,0.75,0.3393,0.6607,0.8393,0.75,0.8571,0.7143,0.5893
0,letter,1600,32,6.25,0.275,0.175,0.4,0.125,0.05,0.3,0.325,0.075,0.1,0.05
0,lympho,148,18,4.0541,0.4,0.6,0.6,0.8,0.6,0.6,0.6,0.6,0.6,0.8
0,mnist,7603,100,9.2069,0.3562,0.4007,0.3664,0.1199,0.3116,0.4144,0.339,0.3973,0.3801,0.3767
0,musk,3062,166,3.1679,0.0333,1.0,0.1667,0.9667,0.9,0.2333,0.1333,0.9667,1.0,1.0
0,optdigits,5216,64,2.8758,0.0161,0.0,0.0484,0.2581,0.0161,0.0,0.0484,0.0,0.0,0.0
0,pendigits,6870,16,2.2707,0.0308,0.3077,0.0462,0.2615,0.2615,0.0462,0.0462,0.0615,0.2923,0.3385


In [24]:
time_df


Unnamed: 0,Data,#Samples,# Dimensions,Outlier Perc,ABOD,CBLOF,FB,HBOS,IForest,KNN,LOF,MCD,OCSVM,PCA
0,arrhythmia,452,274,14.6018,0.1201,0.0887,0.5274,0.0469,0.3629,0.0748,0.0619,0.5303,0.0456,0.0519
0,cardio,1831,21,9.6122,0.3744,0.1353,0.7584,0.006,0.333,0.1195,0.0938,0.4738,0.1032,0.003
0,glass,214,9,4.2056,0.0369,0.0459,0.0359,0.003,0.2554,0.008,0.003,0.0289,0.001,0.002
0,ionosphere,351,33,35.8974,0.0808,0.0568,0.0848,0.0156,0.2951,0.014,0.005,0.0638,0.004,0.002
0,letter,1600,32,6.25,0.3367,0.0925,0.6938,0.001,0.3437,0.1093,0.0781,1.0679,0.0937,0.0
0,lympho,148,18,4.0541,0.0156,0.0312,0.0313,0.015,0.3012,0.0,0.0,0.0313,0.0,0.0156
0,mnist,7603,100,9.2069,6.4872,0.5746,45.7333,0.0399,1.9078,6.3023,5.6043,2.5375,3.7022,0.1176
0,musk,3062,166,3.1679,1.9202,0.208,11.6599,0.0489,0.943,1.4699,1.3888,9.8289,1.0436,0.1237
0,optdigits,5216,64,2.8758,2.1752,0.2682,11.9614,0.0269,0.7431,1.5379,1.4239,1.1388,1.2072,0.0434
0,pendigits,6870,16,2.2707,1.4831,0.2593,4.7927,0.013,0.7027,0.6449,0.5225,1.9894,0.8339,0.007
