#                                                        Project 1

In [1]:
import os 
import sys
import numpy as np
import pandas as pd 
from sklearn.model_selection import train_test_split
from scipy.io import loadmat

Import Pyod packages

In [2]:
from pyod.models.pca import PCA
from pyod.models.mcd import MCD
from pyod.models.ocsvm import OCSVM
from pyod.models.lof import LOF
from pyod.models.cblof import CBLOF
from pyod.models.knn import KNN
from pyod.models.hbos import HBOS
from pyod.models.abod import ABOD
from pyod.models.iforest import IForest
from pyod.models.feature_bagging import FeatureBagging



Import matrice packages

In [3]:
from pyod.utils.utility import standardizer
from pyod.utils.utility import precision_n_scores
from sklearn.metrics import roc_auc_score

In [4]:
mat_file_list=['arrhythmia','cardio.mat','glass.mat','ionosphere.mat','letter.mat','lympho.mat','mnist.mat',
               'musk.mat','optdigits.mat','pendigits.mat','pima.mat','satellite.mat','satimage-2.mat','shuttle.mat'
               ,'shuttle.mat','vertebral.mat','vowels.mat','wbc.mat']
mat_file_list

['arrhythmia',
 'cardio.mat',
 'glass.mat',
 'ionosphere.mat',
 'letter.mat',
 'lympho.mat',
 'mnist.mat',
 'musk.mat',
 'optdigits.mat',
 'pendigits.mat',
 'pima.mat',
 'satellite.mat',
 'satimage-2.mat',
 'shuttle.mat',
 'shuttle.mat',
 'vertebral.mat',
 'vowels.mat',
 'wbc.mat']

Loading Mat file

In [5]:
data=loadmat('Anamoly_detec_data/cardio.mat')

In [6]:
data

{'__header__': b'MATLAB 5.0 MAT-file, written by Octave 3.8.0, 2014-12-18 10:48:09 UTC',
 '__version__': '1.0',
 '__globals__': [],
 'X': array([[ 0.00491231,  0.69319077, -0.20364049, ...,  0.23149795,
         -0.28978574, -0.49329397],
        [ 0.11072935, -0.07990259, -0.20364049, ...,  0.09356344,
         -0.25638541, -0.49329397],
        [ 0.21654639, -0.27244466, -0.20364049, ...,  0.02459619,
         -0.25638541,  1.14001753],
        ...,
        [-0.41835583, -0.91998844, -0.16463485, ..., -1.49268341,
          0.24461959, -0.49329397],
        [-0.41835583, -0.91998844, -0.15093411, ..., -1.42371616,
          0.14441859, -0.49329397],
        [-0.41835583, -0.91998844, -0.20364049, ..., -1.28578165,
          3.58465295, -0.49329397]]),
 'y': array([[0.],
        [0.],
        [0.],
        ...,
        [1.],
        [1.],
        [1.]])}

In [7]:
len(data)

5

In [8]:
data.keys()


dict_keys(['__header__', '__version__', '__globals__', 'X', 'y'])

In [9]:
data.values()

dict_values([b'MATLAB 5.0 MAT-file, written by Octave 3.8.0, 2014-12-18 10:48:09 UTC', '1.0', [], array([[ 0.00491231,  0.69319077, -0.20364049, ...,  0.23149795,
        -0.28978574, -0.49329397],
       [ 0.11072935, -0.07990259, -0.20364049, ...,  0.09356344,
        -0.25638541, -0.49329397],
       [ 0.21654639, -0.27244466, -0.20364049, ...,  0.02459619,
        -0.25638541,  1.14001753],
       ...,
       [-0.41835583, -0.91998844, -0.16463485, ..., -1.49268341,
         0.24461959, -0.49329397],
       [-0.41835583, -0.91998844, -0.15093411, ..., -1.42371616,
         0.14441859, -0.49329397],
       [-0.41835583, -0.91998844, -0.20364049, ..., -1.28578165,
         3.58465295, -0.49329397]]), array([[0.],
       [0.],
       [0.],
       ...,
       [1.],
       [1.],
       [1.]])])

Input(Independent) Features in MAT file

In [10]:
type(data['X']),data['X'].shape

(numpy.ndarray, (1831, 21))

Dependent / Target / Output Feature shape

In [11]:
type(data['y']),data['y'].shape

(numpy.ndarray, (1831, 1))

In [12]:
df_columns=['Data','#Sample','#Dimensions','Outlier Perc','PCA','MCD','OCSVM',
            'LOF','CBLOF','KNN','HBOS','ABOD','IFOREST','FEATUREBAGGING']

Precision Time and Roc evolution tables creation

In [13]:
roc_df=pd.DataFrame(columns=df_columns)
prn_df=pd.DataFrame(columns=df_columns)
time_df=pd.DataFrame(columns=df_columns)
print(roc_df,prn_df,time_df)

Empty DataFrame
Columns: [Data, #Sample, #Dimensions, Outlier Perc, PCA, MCD, OCSVM, LOF, CBLOF, KNN, HBOS, ABOD, IFOREST, FEATUREBAGGING]
Index: [] Empty DataFrame
Columns: [Data, #Sample, #Dimensions, Outlier Perc, PCA, MCD, OCSVM, LOF, CBLOF, KNN, HBOS, ABOD, IFOREST, FEATUREBAGGING]
Index: [] Empty DataFrame
Columns: [Data, #Sample, #Dimensions, Outlier Perc, PCA, MCD, OCSVM, LOF, CBLOF, KNN, HBOS, ABOD, IFOREST, FEATUREBAGGING]
Index: []


Exploring Mat files

In [14]:
from time import time
random_state = np.random.RandomState(42)

for mat_file in mat_file_list:
    print("\n... Processing", mat_file, '...')
    mat = loadmat(os.path.join('Anamoly_detec_data/', mat_file))

    X = mat['X']
    y = mat['y'].ravel()
    outliers_fraction = np.count_nonzero(y) / len(y)
    outliers_percentage = round(outliers_fraction * 100, ndigits=4)

   # construct containers for saving results
    roc_list = [mat_file[:-4], X.shape[0], X.shape[1], outliers_percentage]
    prn_list = [mat_file[:-4], X.shape[0], X.shape[1], outliers_percentage]
    time_list = [mat_file[:-4], X.shape[0], X.shape[1], outliers_percentage]

   # 60% data for training and 40% for testing
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4,
                                                       random_state=random_state)

   # standardizing data for processing
    X_train_norm, X_test_norm = standardizer(X_train, X_test)

    classifiers = {'Angle-based Outlier Detector (ABOD)': ABOD(
       contamination=outliers_fraction),
       'Cluster-based Local Outlier Factor': CBLOF(
           contamination=outliers_fraction, check_estimator=False,
           random_state=random_state),
       'Feature Bagging': FeatureBagging(contamination=outliers_fraction,
                                         random_state=random_state),
       'Histogram-base Outlier Detection (HBOS)': HBOS(
           contamination=outliers_fraction),
       'Isolation Forest': IForest(contamination=outliers_fraction,
                                   random_state=random_state),
       'K Nearest Neighbors (KNN)': KNN(contamination=outliers_fraction),
       'Local Outlier Factor (LOF)': LOF(
           contamination=outliers_fraction),
       'Minimum Covariance Determinant (MCD)': MCD(
           contamination=outliers_fraction, random_state=random_state),
       'One-class SVM (OCSVM)': OCSVM(contamination=outliers_fraction),
       'Principal Component Analysis (PCA)': PCA(
           contamination=outliers_fraction, random_state=random_state),
   }

    for clf_name, clf in classifiers.items():
        t0 = time()
        clf.fit(X_train_norm)
        test_scores = clf.decision_function(X_test_norm)
        t1 = time()
        duration = round(t1 - t0, ndigits=4)
        time_list.append(duration)

        roc = round(roc_auc_score(y_test, test_scores), ndigits=4)
        prn = round(precision_n_scores(y_test, test_scores), ndigits=4)

        print('{clf_name} ROC:{roc}, precision @ rank n:{prn}, '
              'execution time: {duration}s'.format(
           clf_name=clf_name, roc=roc, prn=prn, duration=duration))

        roc_list.append(roc)
        prn_list.append(prn)

    temp_df = pd.DataFrame(time_list).transpose()
    temp_df.columns = df_columns
    time_df = pd.concat([time_df, temp_df], axis=0)

    temp_df = pd.DataFrame(roc_list).transpose()
    temp_df.columns = df_columns
    roc_df = pd.concat([roc_df, temp_df], axis=0)

    temp_df = pd.DataFrame(prn_list).transpose()
    temp_df.columns = df_columns
    prn_df = pd.concat([prn_df, temp_df], axis=0)


... Processing arrhythmia ...
Angle-based Outlier Detector (ABOD) ROC:0.7687, precision @ rank n:0.3571, execution time: 18.638s




Cluster-based Local Outlier Factor ROC:0.7684, precision @ rank n:0.4643, execution time: 12.3635s
Feature Bagging ROC:0.7799, precision @ rank n:0.5, execution time: 2.513s
Histogram-base Outlier Detection (HBOS) ROC:0.8511, precision @ rank n:0.5714, execution time: 7.9579s
Isolation Forest ROC:0.8478, precision @ rank n:0.5357, execution time: 2.4276s
K Nearest Neighbors (KNN) ROC:0.782, precision @ rank n:0.5, execution time: 0.375s
Local Outlier Factor (LOF) ROC:0.7787, precision @ rank n:0.4643, execution time: 0.2656s




Minimum Covariance Determinant (MCD) ROC:0.8228, precision @ rank n:0.4286, execution time: 12.1065s
One-class SVM (OCSVM) ROC:0.7986, precision @ rank n:0.5, execution time: 0.125s
Principal Component Analysis (PCA) ROC:0.7997, precision @ rank n:0.5, execution time: 3.2024s

... Processing cardio.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.5763, precision @ rank n:0.1875, execution time: 2.1762s




Cluster-based Local Outlier Factor ROC:0.8221, precision @ rank n:0.4844, execution time: 1.238s
Feature Bagging ROC:0.4879, precision @ rank n:0.1406, execution time: 5.4637s
Histogram-base Outlier Detection (HBOS) ROC:0.8453, precision @ rank n:0.4688, execution time: 0.052s
Isolation Forest ROC:0.9316, precision @ rank n:0.4531, execution time: 2.086s
K Nearest Neighbors (KNN) ROC:0.6959, precision @ rank n:0.2812, execution time: 0.862s
Local Outlier Factor (LOF) ROC:0.4715, precision @ rank n:0.125, execution time: 0.436s




Minimum Covariance Determinant (MCD) ROC:0.8226, precision @ rank n:0.4062, execution time: 3.6772s
One-class SVM (OCSVM) ROC:0.9507, precision @ rank n:0.5938, execution time: 0.2344s
Principal Component Analysis (PCA) ROC:0.9638, precision @ rank n:0.6875, execution time: 0.1993s

... Processing glass.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.7104, precision @ rank n:0.25, execution time: 0.227s




Cluster-based Local Outlier Factor ROC:0.8506, precision @ rank n:0.25, execution time: 0.298s
Feature Bagging ROC:0.7043, precision @ rank n:0.25, execution time: 0.162s
Histogram-base Outlier Detection (HBOS) ROC:0.6524, precision @ rank n:0.0, execution time: 0.01s
Isolation Forest ROC:0.7195, precision @ rank n:0.25, execution time: 1.04s
K Nearest Neighbors (KNN) ROC:0.7805, precision @ rank n:0.25, execution time: 0.027s
Local Outlier Factor (LOF) ROC:0.7774, precision @ rank n:0.25, execution time: 0.007s
Minimum Covariance Determinant (MCD) ROC:0.7165, precision @ rank n:0.0, execution time: 0.726s
One-class SVM (OCSVM) ROC:0.6189, precision @ rank n:0.25, execution time: 0.015s
Principal Component Analysis (PCA) ROC:0.622, precision @ rank n:0.25, execution time: 0.171s

... Processing ionosphere.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.9004, precision @ rank n:0.8214, execution time: 0.4s




Cluster-based Local Outlier Factor ROC:0.8952, precision @ rank n:0.8036, execution time: 0.383s
Feature Bagging ROC:0.8933, precision @ rank n:0.75, execution time: 0.292s
Histogram-base Outlier Detection (HBOS) ROC:0.5195, precision @ rank n:0.3393, execution time: 0.054s
Isolation Forest ROC:0.8294, precision @ rank n:0.6607, execution time: 1.268s
K Nearest Neighbors (KNN) ROC:0.9134, precision @ rank n:0.8393, execution time: 0.092s
Local Outlier Factor (LOF) ROC:0.8989, precision @ rank n:0.75, execution time: 0.017s
Minimum Covariance Determinant (MCD) ROC:0.9399, precision @ rank n:0.8571, execution time: 0.8319s
One-class SVM (OCSVM) ROC:0.8372, precision @ rank n:0.7143, execution time: 0.011s
Principal Component Analysis (PCA) ROC:0.7971, precision @ rank n:0.5893, execution time: 0.0699s

... Processing letter.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.8465, precision @ rank n:0.275, execution time: 2.3784s




Cluster-based Local Outlier Factor ROC:0.7423, precision @ rank n:0.175, execution time: 0.5999s
Feature Bagging ROC:0.866, precision @ rank n:0.4, execution time: 3.5289s
Histogram-base Outlier Detection (HBOS) ROC:0.5728, precision @ rank n:0.125, execution time: 0.063s
Isolation Forest ROC:0.5836, precision @ rank n:0.05, execution time: 2.0143s
K Nearest Neighbors (KNN) ROC:0.845, precision @ rank n:0.3, execution time: 0.6664s
Local Outlier Factor (LOF) ROC:0.8409, precision @ rank n:0.325, execution time: 0.4076s
Minimum Covariance Determinant (MCD) ROC:0.7499, precision @ rank n:0.075, execution time: 7.2715s
One-class SVM (OCSVM) ROC:0.5744, precision @ rank n:0.1, execution time: 0.1719s
Principal Component Analysis (PCA) ROC:0.48, precision @ rank n:0.05, execution time: 0.0625s

... Processing lympho.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.9382, precision @ rank n:0.4, execution time: 0.0937s
Cluster-based Local Outlier Factor ROC:0.9709, precision @ rank n:0.6, ex



Feature Bagging ROC:0.9673, precision @ rank n:0.6, execution time: 0.072s
Histogram-base Outlier Detection (HBOS) ROC:0.9964, precision @ rank n:0.8, execution time: 0.012s
Isolation Forest ROC:0.9855, precision @ rank n:0.6, execution time: 1.061s
K Nearest Neighbors (KNN) ROC:0.9636, precision @ rank n:0.6, execution time: 0.035s
Local Outlier Factor (LOF) ROC:0.9636, precision @ rank n:0.6, execution time: 0.006s
Minimum Covariance Determinant (MCD) ROC:0.9164, precision @ rank n:0.6, execution time: 0.47s
One-class SVM (OCSVM) ROC:0.9636, precision @ rank n:0.6, execution time: 0.004s
Principal Component Analysis (PCA) ROC:0.9818, precision @ rank n:0.8, execution time: 0.052s

... Processing mnist.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.7813, precision @ rank n:0.3562, execution time: 41.5927s




Cluster-based Local Outlier Factor ROC:0.8447, precision @ rank n:0.4007, execution time: 3.609s
Feature Bagging ROC:0.7259, precision @ rank n:0.3664, execution time: 388.2172s
Histogram-base Outlier Detection (HBOS) ROC:0.5675, precision @ rank n:0.1199, execution time: 0.275s
Isolation Forest ROC:0.7813, precision @ rank n:0.3116, execution time: 20.64s
K Nearest Neighbors (KNN) ROC:0.8409, precision @ rank n:0.4144, execution time: 42.6872s
Local Outlier Factor (LOF) ROC:0.7085, precision @ rank n:0.339, execution time: 54.9887s




Minimum Covariance Determinant (MCD) ROC:0.863, precision @ rank n:0.3973, execution time: 58.931s
One-class SVM (OCSVM) ROC:0.8417, precision @ rank n:0.3801, execution time: 16.91s
Principal Component Analysis (PCA) ROC:0.8396, precision @ rank n:0.3767, execution time: 1.039s

... Processing musk.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.0809, precision @ rank n:0.0333, execution time: 15.769s




Cluster-based Local Outlier Factor ROC:1.0, precision @ rank n:1.0, execution time: 7.524s
Feature Bagging ROC:0.5228, precision @ rank n:0.1667, execution time: 94.2616s
Histogram-base Outlier Detection (HBOS) ROC:0.9999, precision @ rank n:0.9667, execution time: 0.58s
Isolation Forest ROC:0.9992, precision @ rank n:0.9, execution time: 15.966s
K Nearest Neighbors (KNN) ROC:0.7348, precision @ rank n:0.2333, execution time: 10.584s
Local Outlier Factor (LOF) ROC:0.5323, precision @ rank n:0.1333, execution time: 10.083s
Minimum Covariance Determinant (MCD) ROC:1.0, precision @ rank n:0.9667, execution time: 214.4342s
One-class SVM (OCSVM) ROC:1.0, precision @ rank n:1.0, execution time: 2.505s
Principal Component Analysis (PCA) ROC:1.0, precision @ rank n:1.0, execution time: 0.479s

... Processing optdigits.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.4428, precision @ rank n:0.0161, execution time: 13.1502s




Cluster-based Local Outlier Factor ROC:0.7852, precision @ rank n:0.0, execution time: 1.359s
Feature Bagging ROC:0.4641, precision @ rank n:0.0484, execution time: 62.9381s
Histogram-base Outlier Detection (HBOS) ROC:0.8822, precision @ rank n:0.2581, execution time: 0.182s
Isolation Forest ROC:0.5442, precision @ rank n:0.0161, execution time: 4.294s
K Nearest Neighbors (KNN) ROC:0.3824, precision @ rank n:0.0, execution time: 9.335s
Local Outlier Factor (LOF) ROC:0.4584, precision @ rank n:0.0484, execution time: 8.708s




Minimum Covariance Determinant (MCD) ROC:0.3486, precision @ rank n:0.0, execution time: 81.069s
One-class SVM (OCSVM) ROC:0.4972, precision @ rank n:0.0, execution time: 5.818s
Principal Component Analysis (PCA) ROC:0.504, precision @ rank n:0.0, execution time: 0.164s

... Processing pendigits.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.7008, precision @ rank n:0.0308, execution time: 7.42s




Cluster-based Local Outlier Factor ROC:0.9609, precision @ rank n:0.3077, execution time: 1.241s
Feature Bagging ROC:0.4687, precision @ rank n:0.0462, execution time: 22.54s
Histogram-base Outlier Detection (HBOS) ROC:0.9294, precision @ rank n:0.2615, execution time: 0.035s
Isolation Forest ROC:0.9482, precision @ rank n:0.2615, execution time: 3.86s
K Nearest Neighbors (KNN) ROC:0.7602, precision @ rank n:0.0462, execution time: 3.153s
Local Outlier Factor (LOF) ROC:0.481, precision @ rank n:0.0462, execution time: 3.675s
Minimum Covariance Determinant (MCD) ROC:0.8271, precision @ rank n:0.0615, execution time: 56.805s
One-class SVM (OCSVM) ROC:0.93, precision @ rank n:0.2923, execution time: 3.379s
Principal Component Analysis (PCA) ROC:0.9332, precision @ rank n:0.3385, execution time: 0.045s

... Processing pima.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.6757, precision @ rank n:0.5106, execution time: 1.417s




Cluster-based Local Outlier Factor ROC:0.684, precision @ rank n:0.4681, execution time: 0.547s
Feature Bagging ROC:0.6446, precision @ rank n:0.4468, execution time: 0.291s
Histogram-base Outlier Detection (HBOS) ROC:0.7169, precision @ rank n:0.5213, execution time: 0.006s
Isolation Forest ROC:0.6777, precision @ rank n:0.4787, execution time: 0.958s
K Nearest Neighbors (KNN) ROC:0.7252, precision @ rank n:0.5106, execution time: 0.114s
Local Outlier Factor (LOF) ROC:0.6604, precision @ rank n:0.4787, execution time: 0.027s
Minimum Covariance Determinant (MCD) ROC:0.7047, precision @ rank n:0.4787, execution time: 0.581s
One-class SVM (OCSVM) ROC:0.6423, precision @ rank n:0.4574, execution time: 0.056s
Principal Component Analysis (PCA) ROC:0.6639, precision @ rank n:0.5, execution time: 0.078s

... Processing satellite.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.5681, precision @ rank n:0.3918, execution time: 11.574s




Cluster-based Local Outlier Factor ROC:0.7234, precision @ rank n:0.5574, execution time: 23.731s
Feature Bagging ROC:0.557, precision @ rank n:0.4051, execution time: 26.2529s
Histogram-base Outlier Detection (HBOS) ROC:0.7393, precision @ rank n:0.5466, execution time: 0.067s
Isolation Forest ROC:0.7094, precision @ rank n:0.578, execution time: 4.4058s
K Nearest Neighbors (KNN) ROC:0.6781, precision @ rank n:0.4994, execution time: 3.907s
Local Outlier Factor (LOF) ROC:0.5551, precision @ rank n:0.4051, execution time: 3.1563s
Minimum Covariance Determinant (MCD) ROC:0.792, precision @ rank n:0.6747, execution time: 63.9748s
One-class SVM (OCSVM) ROC:0.636, precision @ rank n:0.5224, execution time: 4.457s
Principal Component Analysis (PCA) ROC:0.5783, precision @ rank n:0.4559, execution time: 0.121s

... Processing satimage-2.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.86, precision @ rank n:0.2593, execution time: 7.633s




Cluster-based Local Outlier Factor ROC:0.9987, precision @ rank n:0.8889, execution time: 1.168s
Feature Bagging ROC:0.4971, precision @ rank n:0.0741, execution time: 21.4345s
Histogram-base Outlier Detection (HBOS) ROC:0.9837, precision @ rank n:0.5926, execution time: 0.051s
Isolation Forest ROC:0.9973, precision @ rank n:0.8889, execution time: 2.036s
K Nearest Neighbors (KNN) ROC:0.9505, precision @ rank n:0.3704, execution time: 2.413s
Local Outlier Factor (LOF) ROC:0.5006, precision @ rank n:0.0741, execution time: 1.989s
Minimum Covariance Determinant (MCD) ROC:0.9946, precision @ rank n:0.5185, execution time: 50.431s
One-class SVM (OCSVM) ROC:0.9976, precision @ rank n:0.9259, execution time: 2.515s
Principal Component Analysis (PCA) ROC:0.9841, precision @ rank n:0.8519, execution time: 0.097s

... Processing shuttle.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.6186, precision @ rank n:0.1918, execution time: 61.213s




Cluster-based Local Outlier Factor ROC:0.6286, precision @ rank n:0.2336, execution time: 74.61s
Feature Bagging ROC:0.5211, precision @ rank n:0.111, execution time: 289.419s
Histogram-base Outlier Detection (HBOS) ROC:0.9851, precision @ rank n:0.9857, execution time: 0.244s
Isolation Forest ROC:0.9972, precision @ rank n:0.9337, execution time: 20.528s
K Nearest Neighbors (KNN) ROC:0.645, precision @ rank n:0.2199, execution time: 64.3341s
Local Outlier Factor (LOF) ROC:0.5347, precision @ rank n:0.1406, execution time: 67.5642s






Minimum Covariance Determinant (MCD) ROC:0.9903, precision @ rank n:0.7534, execution time: 182.4515s
One-class SVM (OCSVM) ROC:0.9922, precision @ rank n:0.9553, execution time: 282.3092s
Principal Component Analysis (PCA) ROC:0.9902, precision @ rank n:0.9503, execution time: 2.377s

... Processing shuttle.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.6227, precision @ rank n:0.1938, execution time: 96.3355s




Cluster-based Local Outlier Factor ROC:0.6578, precision @ rank n:0.2262, execution time: 59.9268s
Feature Bagging ROC:0.5023, precision @ rank n:0.0669, execution time: 278.4678s
Histogram-base Outlier Detection (HBOS) ROC:0.9868, precision @ rank n:0.9386, execution time: 0.0625s
Isolation Forest ROC:0.9975, precision @ rank n:0.9563, execution time: 12.1869s
K Nearest Neighbors (KNN) ROC:0.6549, precision @ rank n:0.2213, execution time: 46.2706s
Local Outlier Factor (LOF) ROC:0.5336, precision @ rank n:0.155, execution time: 61.1354s






Minimum Covariance Determinant (MCD) ROC:0.9897, precision @ rank n:0.7477, execution time: 140.5098s
One-class SVM (OCSVM) ROC:0.9923, precision @ rank n:0.9605, execution time: 279.741s
Principal Component Analysis (PCA) ROC:0.9907, precision @ rank n:0.9577, execution time: 1.9278s

... Processing vertebral.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.3688, precision @ rank n:0.0, execution time: 0.2273s




Cluster-based Local Outlier Factor ROC:0.3891, precision @ rank n:0.0, execution time: 9.6211s
Feature Bagging ROC:0.3688, precision @ rank n:0.0, execution time: 0.212s
Histogram-base Outlier Detection (HBOS) ROC:0.3617, precision @ rank n:0.0, execution time: 0.007s
Isolation Forest ROC:0.3539, precision @ rank n:0.0, execution time: 1.2736s
K Nearest Neighbors (KNN) ROC:0.3562, precision @ rank n:0.0, execution time: 0.031s
Local Outlier Factor (LOF) ROC:0.3562, precision @ rank n:0.0, execution time: 0.012s
Minimum Covariance Determinant (MCD) ROC:0.4203, precision @ rank n:0.0, execution time: 10.6685s
One-class SVM (OCSVM) ROC:0.4078, precision @ rank n:0.0, execution time: 0.005s
Principal Component Analysis (PCA) ROC:0.3633, precision @ rank n:0.0625, execution time: 0.199s

... Processing vowels.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.8982, precision @ rank n:0.2857, execution time: 1.357s




Cluster-based Local Outlier Factor ROC:0.8608, precision @ rank n:0.2143, execution time: 8.5005s
Feature Bagging ROC:0.9124, precision @ rank n:0.2143, execution time: 1.179s
Histogram-base Outlier Detection (HBOS) ROC:0.7567, precision @ rank n:0.2143, execution time: 0.024s
Isolation Forest ROC:0.7758, precision @ rank n:0.2143, execution time: 1.699s
K Nearest Neighbors (KNN) ROC:0.9746, precision @ rank n:0.3571, execution time: 0.376s
Local Outlier Factor (LOF) ROC:0.9192, precision @ rank n:0.2857, execution time: 0.142s
Minimum Covariance Determinant (MCD) ROC:0.6966, precision @ rank n:0.0714, execution time: 22.3273s
One-class SVM (OCSVM) ROC:0.8426, precision @ rank n:0.2857, execution time: 0.0937s
Principal Component Analysis (PCA) ROC:0.6907, precision @ rank n:0.2143, execution time: 0.0313s

... Processing wbc.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.9573, precision @ rank n:0.5556, execution time: 0.2303s




Cluster-based Local Outlier Factor ROC:0.9751, precision @ rank n:0.6667, execution time: 0.297s
Feature Bagging ROC:0.979, precision @ rank n:0.7778, execution time: 0.179s
Histogram-base Outlier Detection (HBOS) ROC:0.9876, precision @ rank n:0.6667, execution time: 0.021s
Isolation Forest ROC:0.9674, precision @ rank n:0.5556, execution time: 0.734s
K Nearest Neighbors (KNN) ROC:0.9713, precision @ rank n:0.4444, execution time: 0.051s
Local Outlier Factor (LOF) ROC:0.979, precision @ rank n:0.6667, execution time: 0.018s
Minimum Covariance Determinant (MCD) ROC:0.9658, precision @ rank n:0.5556, execution time: 0.586s
One-class SVM (OCSVM) ROC:0.9782, precision @ rank n:0.6667, execution time: 0.011s
Principal Component Analysis (PCA) ROC:0.9775, precision @ rank n:0.6667, execution time: 0.049s


In [15]:
roc_df

Unnamed: 0,Data,#Sample,#Dimensions,Outlier Perc,PCA,MCD,OCSVM,LOF,CBLOF,KNN,HBOS,ABOD,IFOREST,FEATUREBAGGING
0,arrhyt,452,274,14.6018,0.7687,0.7684,0.7799,0.8511,0.8478,0.782,0.7787,0.8228,0.7986,0.7997
0,cardio,1831,21,9.6122,0.5763,0.8221,0.4879,0.8453,0.9316,0.6959,0.4715,0.8226,0.9507,0.9638
0,glass,214,9,4.2056,0.7104,0.8506,0.7043,0.6524,0.7195,0.7805,0.7774,0.7165,0.6189,0.622
0,ionosphere,351,33,35.8974,0.9004,0.8952,0.8933,0.5195,0.8294,0.9134,0.8989,0.9399,0.8372,0.7971
0,letter,1600,32,6.25,0.8465,0.7423,0.866,0.5728,0.5836,0.845,0.8409,0.7499,0.5744,0.48
0,lympho,148,18,4.0541,0.9382,0.9709,0.9673,0.9964,0.9855,0.9636,0.9636,0.9164,0.9636,0.9818
0,mnist,7603,100,9.2069,0.7813,0.8447,0.7259,0.5675,0.7813,0.8409,0.7085,0.863,0.8417,0.8396
0,musk,3062,166,3.1679,0.0809,1.0,0.5228,0.9999,0.9992,0.7348,0.5323,1.0,1.0,1.0
0,optdigits,5216,64,2.8758,0.4428,0.7852,0.4641,0.8822,0.5442,0.3824,0.4584,0.3486,0.4972,0.504
0,pendigits,6870,16,2.2707,0.7008,0.9609,0.4687,0.9294,0.9482,0.7602,0.481,0.8271,0.93,0.9332


In [16]:
prn_df

Unnamed: 0,Data,#Sample,#Dimensions,Outlier Perc,PCA,MCD,OCSVM,LOF,CBLOF,KNN,HBOS,ABOD,IFOREST,FEATUREBAGGING
0,arrhyt,452,274,14.6018,0.3571,0.4643,0.5,0.5714,0.5357,0.5,0.4643,0.4286,0.5,0.5
0,cardio,1831,21,9.6122,0.1875,0.4844,0.1406,0.4688,0.4531,0.2812,0.125,0.4062,0.5938,0.6875
0,glass,214,9,4.2056,0.25,0.25,0.25,0.0,0.25,0.25,0.25,0.0,0.25,0.25
0,ionosphere,351,33,35.8974,0.8214,0.8036,0.75,0.3393,0.6607,0.8393,0.75,0.8571,0.7143,0.5893
0,letter,1600,32,6.25,0.275,0.175,0.4,0.125,0.05,0.3,0.325,0.075,0.1,0.05
0,lympho,148,18,4.0541,0.4,0.6,0.6,0.8,0.6,0.6,0.6,0.6,0.6,0.8
0,mnist,7603,100,9.2069,0.3562,0.4007,0.3664,0.1199,0.3116,0.4144,0.339,0.3973,0.3801,0.3767
0,musk,3062,166,3.1679,0.0333,1.0,0.1667,0.9667,0.9,0.2333,0.1333,0.9667,1.0,1.0
0,optdigits,5216,64,2.8758,0.0161,0.0,0.0484,0.2581,0.0161,0.0,0.0484,0.0,0.0,0.0
0,pendigits,6870,16,2.2707,0.0308,0.3077,0.0462,0.2615,0.2615,0.0462,0.0462,0.0615,0.2923,0.3385


In [17]:
time_df

Unnamed: 0,Data,#Sample,#Dimensions,Outlier Perc,PCA,MCD,OCSVM,LOF,CBLOF,KNN,HBOS,ABOD,IFOREST,FEATUREBAGGING
0,arrhyt,452,274,14.6018,18.638,12.3635,2.513,7.9579,2.4276,0.375,0.2656,12.1065,0.125,3.2024
0,cardio,1831,21,9.6122,2.1762,1.238,5.4637,0.052,2.086,0.862,0.436,3.6772,0.2344,0.1993
0,glass,214,9,4.2056,0.227,0.298,0.162,0.01,1.04,0.027,0.007,0.726,0.015,0.171
0,ionosphere,351,33,35.8974,0.4,0.383,0.292,0.054,1.268,0.092,0.017,0.8319,0.011,0.0699
0,letter,1600,32,6.25,2.3784,0.5999,3.5289,0.063,2.0143,0.6664,0.4076,7.2715,0.1719,0.0625
0,lympho,148,18,4.0541,0.0937,0.119,0.072,0.012,1.061,0.035,0.006,0.47,0.004,0.052
0,mnist,7603,100,9.2069,41.5927,3.609,388.217,0.275,20.64,42.6872,54.9887,58.931,16.91,1.039
0,musk,3062,166,3.1679,15.769,7.524,94.2616,0.58,15.966,10.584,10.083,214.434,2.505,0.479
0,optdigits,5216,64,2.8758,13.1502,1.359,62.9381,0.182,4.294,9.335,8.708,81.069,5.818,0.164
0,pendigits,6870,16,2.2707,7.42,1.241,22.54,0.035,3.86,3.153,3.675,56.805,3.379,0.045
