In [36]:

import os
import sys
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split


In [37]:
from scipy.io import loadmat

In [38]:
from pyod.models.pca import PCA
from pyod.models.mcd import MCD
from pyod.models.ocsvm import OCSVM
from pyod.models.lof import LOF
from pyod.models.cblof import CBLOF
from pyod.models.knn import KNN
from pyod.models.hbos import HBOS
from pyod.models.abod import ABOD
from pyod.models.iforest import IForest
from pyod.models.feature_bagging import FeatureBagging

### Import Metrics Package

In [39]:
from pyod.utils.utility import standardizer
from pyod.utils.utility import precision_n_scores
from sklearn.metrics import roc_auc_score

#### Defining data file

In [40]:
mat_file_list=['arrhythmia.mat','cardio.mat','glass.mat','ionosphere.mat','letter.mat','lympho.mat','mnist.mat','musk.mat', 'optdigits.mat', 'pendigits.mat', 'pima.mat', 
                 'satellite.mat', 'satimage-2.mat', 'shuttle.mat', 'vertebral.mat', 'vowels.mat', 'wbc.mat' ]     
mat_file_list

['arrhythmia.mat',
 'cardio.mat',
 'glass.mat',
 'ionosphere.mat',
 'letter.mat',
 'lympho.mat',
 'mnist.mat',
 'musk.mat',
 'optdigits.mat',
 'pendigits.mat',
 'pima.mat',
 'satellite.mat',
 'satimage-2.mat',
 'shuttle.mat',
 'vertebral.mat',
 'vowels.mat',
 'wbc.mat']

In [41]:
mat_file_list=['arrhythmia.mat','cardio.mat','glass.mat','ionosphere.mat','letter.mat','lympho.mat','mnist.mat','musk.mat', 'optdigits.mat', 'pendigits.mat', 'pima.mat', 
                 'satellite.mat', 'satimage-2.mat', 'shuttle.mat', 'vertebral.mat', 'vowels.mat', 'wbc.mat' ]     
mat_file_list

['arrhythmia.mat',
 'cardio.mat',
 'glass.mat',
 'ionosphere.mat',
 'letter.mat',
 'lympho.mat',
 'mnist.mat',
 'musk.mat',
 'optdigits.mat',
 'pendigits.mat',
 'pima.mat',
 'satellite.mat',
 'satimage-2.mat',
 'shuttle.mat',
 'vertebral.mat',
 'vowels.mat',
 'wbc.mat']

In [42]:
data=loadmat(r'C:\Users\ASUS\Anamoly_detec_data\cardio.mat')
data

{'__header__': b'MATLAB 5.0 MAT-file, written by Octave 3.8.0, 2014-12-18 10:48:09 UTC',
 '__version__': '1.0',
 '__globals__': [],
 'X': array([[ 0.00491231,  0.69319077, -0.20364049, ...,  0.23149795,
         -0.28978574, -0.49329397],
        [ 0.11072935, -0.07990259, -0.20364049, ...,  0.09356344,
         -0.25638541, -0.49329397],
        [ 0.21654639, -0.27244466, -0.20364049, ...,  0.02459619,
         -0.25638541,  1.14001753],
        ...,
        [-0.41835583, -0.91998844, -0.16463485, ..., -1.49268341,
          0.24461959, -0.49329397],
        [-0.41835583, -0.91998844, -0.15093411, ..., -1.42371616,
          0.14441859, -0.49329397],
        [-0.41835583, -0.91998844, -0.20364049, ..., -1.28578165,
          3.58465295, -0.49329397]]),
 'y': array([[0.],
        [0.],
        [0.],
        ...,
        [1.],
        [1.],
        [1.]])}

In [43]:
len(data)

5

In [44]:
data.keys()

dict_keys(['__header__', '__version__', '__globals__', 'X', 'y'])

In [45]:
data.values()

dict_values([b'MATLAB 5.0 MAT-file, written by Octave 3.8.0, 2014-12-18 10:48:09 UTC', '1.0', [], array([[ 0.00491231,  0.69319077, -0.20364049, ...,  0.23149795,
        -0.28978574, -0.49329397],
       [ 0.11072935, -0.07990259, -0.20364049, ...,  0.09356344,
        -0.25638541, -0.49329397],
       [ 0.21654639, -0.27244466, -0.20364049, ...,  0.02459619,
        -0.25638541,  1.14001753],
       ...,
       [-0.41835583, -0.91998844, -0.16463485, ..., -1.49268341,
         0.24461959, -0.49329397],
       [-0.41835583, -0.91998844, -0.15093411, ..., -1.42371616,
         0.14441859, -0.49329397],
       [-0.41835583, -0.91998844, -0.20364049, ..., -1.28578165,
         3.58465295, -0.49329397]]), array([[0.],
       [0.],
       [0.],
       ...,
       [1.],
       [1.],
       [1.]])])

In [46]:

type(data['X']),data['X'].shape

(numpy.ndarray, (1831, 21))

In [47]:
type(data['y']),data['y'].shape

(numpy.ndarray, (1831, 1))

In [48]:
df_columns = ['Data','#Samples','# Dimensions','Outlier Perc','ABOD','CBLOF','FB','HBOS','IForest','KNN','LOF','MCD','OCSVM','PCA']


#### Roc DataFrame



In [49]:
roc_df=pd.DataFrame(columns=df_columns)
roc_df

Unnamed: 0,Data,#Samples,# Dimensions,Outlier Perc,ABOD,CBLOF,FB,HBOS,IForest,KNN,LOF,MCD,OCSVM,PCA


In [50]:
prn_df=pd.DataFrame(columns=df_columns)
prn_df

Unnamed: 0,Data,#Samples,# Dimensions,Outlier Perc,ABOD,CBLOF,FB,HBOS,IForest,KNN,LOF,MCD,OCSVM,PCA


### Time DataFrame



In [51]:
time_df=pd.DataFrame(columns=df_columns)
time_df

Unnamed: 0,Data,#Samples,# Dimensions,Outlier Perc,ABOD,CBLOF,FB,HBOS,IForest,KNN,LOF,MCD,OCSVM,PCA


In [52]:
matfiles=r'C:\Users\ASUS\Anamoly_detec_data'

# Exploring MAT Files

In [53]:
from time import time
random_state = np.random.RandomState(42)

for mat_file in mat_file_list:
   print("\n... Processing", mat_file, '...')
   mat = loadmat(os.path.join(r"C:\Users\ASUS\Anamoly_detec_data", mat_file))

   X = mat['X']
   y = mat['y'].ravel()
   outliers_fraction = np.count_nonzero(y) / len(y)
   outliers_percentage = round(outliers_fraction * 100, ndigits=4)

   # construct containers for saving results
   roc_list = [mat_file[:-4], X.shape[0], X.shape[1], outliers_percentage]
   prn_list = [mat_file[:-4], X.shape[0], X.shape[1], outliers_percentage]
   time_list = [mat_file[:-4], X.shape[0], X.shape[1], outliers_percentage]

   # 60% data for training and 40% for testing
   X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4,
                                                       random_state=random_state)

   # standardizing data for processing
   X_train_norm, X_test_norm = standardizer(X_train, X_test)

   classifiers = {'Angle-based Outlier Detector (ABOD)': ABOD(
       contamination=outliers_fraction),
       'Cluster-based Local Outlier Factor': CBLOF(
           contamination=outliers_fraction, check_estimator=False,
           random_state=random_state),
       'Feature Bagging': FeatureBagging(contamination=outliers_fraction,
                                         random_state=random_state),
       'Histogram-base Outlier Detection (HBOS)': HBOS(
           contamination=outliers_fraction),
       'Isolation Forest': IForest(contamination=outliers_fraction,
                                   random_state=random_state),
       'K Nearest Neighbors (KNN)': KNN(contamination=outliers_fraction),
       'Local Outlier Factor (LOF)': LOF(
           contamination=outliers_fraction),
       'Minimum Covariance Determinant (MCD)': MCD(
           contamination=outliers_fraction, random_state=random_state),
       'One-class SVM (OCSVM)': OCSVM(contamination=outliers_fraction),
       'Principal Component Analysis (PCA)': PCA(
           contamination=outliers_fraction, random_state=random_state),
   }

   for clf_name, clf in classifiers.items():
       t0 = time()
       clf.fit(X_train_norm)
       test_scores = clf.decision_function(X_test_norm)
       t1 = time()
       duration = round(t1 - t0, ndigits=4)
       time_list.append(duration)

       roc = round(roc_auc_score(y_test, test_scores), ndigits=4)
       prn = round(precision_n_scores(y_test, test_scores), ndigits=4)

       print('{clf_name} ROC:{roc}, precision @ rank n:{prn}, '
             'execution time: {duration}s'.format(
           clf_name=clf_name, roc=roc, prn=prn, duration=duration))

       roc_list.append(roc)
       prn_list.append(prn)

   temp_df = pd.DataFrame(time_list).transpose()
   temp_df.columns = df_columns
   time_df = pd.concat([time_df, temp_df], axis=0)

   temp_df = pd.DataFrame(roc_list).transpose()
   temp_df.columns = df_columns
   roc_df = pd.concat([roc_df, temp_df], axis=0)

   temp_df = pd.DataFrame(prn_list).transpose()
   temp_df.columns = df_columns
   prn_df = pd.concat([prn_df, temp_df], axis=0)


... Processing arrhythmia.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.7687, precision @ rank n:0.3571, execution time: 0.1017s
Cluster-based Local Outlier Factor ROC:0.7789, precision @ rank n:0.4643, execution time: 0.0848s
Feature Bagging ROC:0.7796, precision @ rank n:0.4643, execution time: 0.406s
Histogram-base Outlier Detection (HBOS) ROC:0.8511, precision @ rank n:0.5714, execution time: 0.0379s




Isolation Forest ROC:0.8637, precision @ rank n:0.6071, execution time: 0.2064s
K Nearest Neighbors (KNN) ROC:0.782, precision @ rank n:0.5, execution time: 0.0598s
Local Outlier Factor (LOF) ROC:0.7787, precision @ rank n:0.4643, execution time: 0.0489s




Minimum Covariance Determinant (MCD) ROC:0.8228, precision @ rank n:0.4286, execution time: 0.4039s
One-class SVM (OCSVM) ROC:0.7986, precision @ rank n:0.5, execution time: 0.0339s
Principal Component Analysis (PCA) ROC:0.7997, precision @ rank n:0.5, execution time: 0.0409s

... Processing cardio.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.5892, precision @ rank n:0.1918, execution time: 0.2464s
Cluster-based Local Outlier Factor ROC:0.8845, precision @ rank n:0.4932, execution time: 0.0857s
Feature Bagging ROC:0.6385, precision @ rank n:0.1781, execution time: 0.5655s
Histogram-base Outlier Detection (HBOS) ROC:0.8373, precision @ rank n:0.4521, execution time: 0.004s
Isolation Forest ROC:0.951, precision @ rank n:0.6027, execution time: 0.1971s




K Nearest Neighbors (KNN) ROC:0.734, precision @ rank n:0.3562, execution time: 0.0978s
Local Outlier Factor (LOF) ROC:0.588, precision @ rank n:0.1507, execution time: 0.0638s




Minimum Covariance Determinant (MCD) ROC:0.811, precision @ rank n:0.4658, execution time: 0.373s
One-class SVM (OCSVM) ROC:0.9478, precision @ rank n:0.5342, execution time: 0.0539s
Principal Component Analysis (PCA) ROC:0.9616, precision @ rank n:0.6849, execution time: 0.003s

... Processing glass.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.6951, precision @ rank n:0.25, execution time: 0.0229s
Cluster-based Local Outlier Factor ROC:0.811, precision @ rank n:0.25, execution time: 0.0219s
Feature Bagging ROC:0.7073, precision @ rank n:0.25, execution time: 0.0209s
Histogram-base Outlier Detection (HBOS) ROC:0.7073, precision @ rank n:0.0, execution time: 0.002s




Isolation Forest ROC:0.7134, precision @ rank n:0.25, execution time: 0.1376s
K Nearest Neighbors (KNN) ROC:0.8384, precision @ rank n:0.25, execution time: 0.006s
Local Outlier Factor (LOF) ROC:0.7043, precision @ rank n:0.25, execution time: 0.002s
Minimum Covariance Determinant (MCD) ROC:0.8293, precision @ rank n:0.0, execution time: 0.0269s
One-class SVM (OCSVM) ROC:0.6585, precision @ rank n:0.25, execution time: 0.001s
Principal Component Analysis (PCA) ROC:0.686, precision @ rank n:0.25, execution time: 0.001s

... Processing ionosphere.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.9181, precision @ rank n:0.8431, execution time: 0.0409s
Cluster-based Local Outlier Factor ROC:0.9176, precision @ rank n:0.8039, execution time: 0.0299s
Feature Bagging ROC:0.9303, precision @ rank n:0.8039, execution time: 0.0429s
Histogram-base Outlier Detection (HBOS) ROC:0.6052, precision @ rank n:0.3922, execution time: 0.005s




Isolation Forest ROC:0.8516, precision @ rank n:0.6078, execution time: 0.1526s
K Nearest Neighbors (KNN) ROC:0.932, precision @ rank n:0.8824, execution time: 0.011s
Local Outlier Factor (LOF) ROC:0.9227, precision @ rank n:0.7843, execution time: 0.005s
Minimum Covariance Determinant (MCD) ROC:0.9669, precision @ rank n:0.8627, execution time: 0.0449s
One-class SVM (OCSVM) ROC:0.8257, precision @ rank n:0.6863, execution time: 0.004s
Principal Component Analysis (PCA) ROC:0.7941, precision @ rank n:0.5686, execution time: 0.002s

... Processing letter.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.8783, precision @ rank n:0.4375, execution time: 0.2394s
Cluster-based Local Outlier Factor ROC:0.7783, precision @ rank n:0.1875, execution time: 0.0908s
Feature Bagging ROC:0.8947, precision @ rank n:0.4062, execution time: 0.5057s
Histogram-base Outlier Detection (HBOS) ROC:0.6063, precision @ rank n:0.0938, execution time: 0.0056s




Isolation Forest ROC:0.6279, precision @ rank n:0.0625, execution time: 0.2084s
K Nearest Neighbors (KNN) ROC:0.8573, precision @ rank n:0.3125, execution time: 0.0947s
Local Outlier Factor (LOF) ROC:0.8765, precision @ rank n:0.3438, execution time: 0.0608s
Minimum Covariance Determinant (MCD) ROC:0.8061, precision @ rank n:0.1875, execution time: 0.7181s
One-class SVM (OCSVM) ROC:0.5927, precision @ rank n:0.125, execution time: 0.0549s
Principal Component Analysis (PCA) ROC:0.5216, precision @ rank n:0.125, execution time: 0.003s

... Processing lympho.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.9831, precision @ rank n:0.0, execution time: 0.017s
Cluster-based Local Outlier Factor ROC:1.0, precision @ rank n:1.0, execution time: 0.0219s
Feature Bagging ROC:1.0, precision @ rank n:1.0, execution time: 0.018s
Histogram-base Outlier Detection (HBOS) ROC:1.0, precision @ rank n:1.0, execution time: 0.002s




Isolation Forest ROC:1.0, precision @ rank n:1.0, execution time: 0.1307s
K Nearest Neighbors (KNN) ROC:1.0, precision @ rank n:1.0, execution time: 0.0039s
Local Outlier Factor (LOF) ROC:1.0, precision @ rank n:1.0, execution time: 0.001s
Minimum Covariance Determinant (MCD) ROC:1.0, precision @ rank n:1.0, execution time: 0.0219s
One-class SVM (OCSVM) ROC:1.0, precision @ rank n:1.0, execution time: 0.0s
Principal Component Analysis (PCA) ROC:1.0, precision @ rank n:1.0, execution time: 0.002s

... Processing mnist.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.7628, precision @ rank n:0.3367, execution time: 4.7543s
Cluster-based Local Outlier Factor ROC:0.8389, precision @ rank n:0.3912, execution time: 0.6931s
Feature Bagging ROC:0.7157, precision @ rank n:0.3741, execution time: 32.2278s
Histogram-base Outlier Detection (HBOS) ROC:0.5766, precision @ rank n:0.1361, execution time: 0.0369s




Isolation Forest ROC:0.7915, precision @ rank n:0.2687, execution time: 1.2138s
K Nearest Neighbors (KNN) ROC:0.8498, precision @ rank n:0.432, execution time: 4.7563s
Local Outlier Factor (LOF) ROC:0.7195, precision @ rank n:0.3673, execution time: 4.7871s




Minimum Covariance Determinant (MCD) ROC:0.8713, precision @ rank n:0.2653, execution time: 1.9318s
One-class SVM (OCSVM) ROC:0.854, precision @ rank n:0.3946, execution time: 3.2832s
Principal Component Analysis (PCA) ROC:0.8534, precision @ rank n:0.3878, execution time: 0.0928s

... Processing musk.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.2161, precision @ rank n:0.1, execution time: 1.494s
Cluster-based Local Outlier Factor ROC:1.0, precision @ rank n:1.0, execution time: 0.2184s
Feature Bagging ROC:0.473, precision @ rank n:0.125, execution time: 8.8024s
Histogram-base Outlier Detection (HBOS) ROC:0.9999, precision @ rank n:0.975, execution time: 0.0369s




Isolation Forest ROC:1.0, precision @ rank n:1.0, execution time: 0.7251s
K Nearest Neighbors (KNN) ROC:0.8009, precision @ rank n:0.175, execution time: 1.1579s
Local Outlier Factor (LOF) ROC:0.4629, precision @ rank n:0.125, execution time: 1.1429s
Minimum Covariance Determinant (MCD) ROC:1.0, precision @ rank n:1.0, execution time: 7.9727s
One-class SVM (OCSVM) ROC:1.0, precision @ rank n:1.0, execution time: 0.9545s
Principal Component Analysis (PCA) ROC:1.0, precision @ rank n:1.0, execution time: 0.1007s

... Processing optdigits.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.4894, precision @ rank n:0.0152, execution time: 1.6815s
Cluster-based Local Outlier Factor ROC:0.7901, precision @ rank n:0.0, execution time: 0.3371s
Feature Bagging ROC:0.5062, precision @ rank n:0.0303, execution time: 9.1056s
Histogram-base Outlier Detection (HBOS) ROC:0.8774, precision @ rank n:0.2121, execution time: 0.0209s




Isolation Forest ROC:0.686, precision @ rank n:0.0303, execution time: 0.5595s
K Nearest Neighbors (KNN) ROC:0.406, precision @ rank n:0.0, execution time: 1.1998s
Local Outlier Factor (LOF) ROC:0.5277, precision @ rank n:0.0303, execution time: 1.1759s




Minimum Covariance Determinant (MCD) ROC:0.3822, precision @ rank n:0.0, execution time: 1.0799s
One-class SVM (OCSVM) ROC:0.5171, precision @ rank n:0.0, execution time: 1.0781s
Principal Component Analysis (PCA) ROC:0.526, precision @ rank n:0.0, execution time: 0.0309s

... Processing pendigits.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.667, precision @ rank n:0.0526, execution time: 0.9984s
Cluster-based Local Outlier Factor ROC:0.8082, precision @ rank n:0.1579, execution time: 0.1656s
Feature Bagging ROC:0.4889, precision @ rank n:0.0526, execution time: 3.1027s
Histogram-base Outlier Detection (HBOS) ROC:0.9348, precision @ rank n:0.2632, execution time: 0.007s




Isolation Forest ROC:0.939, precision @ rank n:0.3333, execution time: 0.4708s
K Nearest Neighbors (KNN) ROC:0.7371, precision @ rank n:0.0702, execution time: 0.4149s
Local Outlier Factor (LOF) ROC:0.4965, precision @ rank n:0.0702, execution time: 0.4198s
Minimum Covariance Determinant (MCD) ROC:0.8204, precision @ rank n:0.0877, execution time: 1.4661s
One-class SVM (OCSVM) ROC:0.9235, precision @ rank n:0.3158, execution time: 0.7351s
Principal Component Analysis (PCA) ROC:0.9309, precision @ rank n:0.3158, execution time: 0.0059s

... Processing pima.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.7163, precision @ rank n:0.5253, execution time: 0.1007s
Cluster-based Local Outlier Factor ROC:0.67, precision @ rank n:0.4949, execution time: 0.0638s
Feature Bagging ROC:0.6448, precision @ rank n:0.4444, execution time: 0.0759s
Histogram-base Outlier Detection (HBOS) ROC:0.711, precision @ rank n:0.5354, execution time: 0.002s




Isolation Forest ROC:0.6829, precision @ rank n:0.5253, execution time: 0.1775s
K Nearest Neighbors (KNN) ROC:0.7395, precision @ rank n:0.5859, execution time: 0.023s
Local Outlier Factor (LOF) ROC:0.6574, precision @ rank n:0.4646, execution time: 0.008s
Minimum Covariance Determinant (MCD) ROC:0.7175, precision @ rank n:0.5152, execution time: 0.0399s
One-class SVM (OCSVM) ROC:0.6561, precision @ rank n:0.5051, execution time: 0.008s
Principal Component Analysis (PCA) ROC:0.6762, precision @ rank n:0.5354, execution time: 0.001s

... Processing satellite.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.5653, precision @ rank n:0.3962, execution time: 1.2607s
Cluster-based Local Outlier Factor ROC:0.7241, precision @ rank n:0.5412, execution time: 0.2922s
Feature Bagging ROC:0.572, precision @ rank n:0.4, execution time: 6.0019s
Histogram-base Outlier Detection (HBOS) ROC:0.7486, precision @ rank n:0.57, execution time: 0.014s




Isolation Forest ROC:0.6838, precision @ rank n:0.5812, execution time: 0.5396s
K Nearest Neighbors (KNN) ROC:0.6853, precision @ rank n:0.4988, execution time: 0.7121s
Local Outlier Factor (LOF) ROC:0.572, precision @ rank n:0.395, execution time: 0.7001s
Minimum Covariance Determinant (MCD) ROC:0.8055, precision @ rank n:0.6762, execution time: 1.6237s
One-class SVM (OCSVM) ROC:0.6478, precision @ rank n:0.5225, execution time: 1.0462s
Principal Component Analysis (PCA) ROC:0.5923, precision @ rank n:0.465, execution time: 0.0169s

... Processing satimage-2.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.8432, precision @ rank n:0.2333, execution time: 1.126s
Cluster-based Local Outlier Factor ROC:0.9998, precision @ rank n:0.9333, execution time: 0.2433s
Feature Bagging ROC:0.5235, precision @ rank n:0.1667, execution time: 4.8909s
Histogram-base Outlier Detection (HBOS) ROC:0.9784, precision @ rank n:0.6, execution time: 0.013s




Isolation Forest ROC:0.9955, precision @ rank n:0.8667, execution time: 0.4548s
K Nearest Neighbors (KNN) ROC:0.9515, precision @ rank n:0.4333, execution time: 0.5874s
Local Outlier Factor (LOF) ROC:0.5257, precision @ rank n:0.1667, execution time: 0.5485s
Minimum Covariance Determinant (MCD) ROC:0.9963, precision @ rank n:0.6667, execution time: 1.3963s
One-class SVM (OCSVM) ROC:0.9997, precision @ rank n:0.9, execution time: 0.8717s
Principal Component Analysis (PCA) ROC:0.9816, precision @ rank n:0.7333, execution time: 0.0139s

... Processing shuttle.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.6171, precision @ rank n:0.2003, execution time: 10.1658s
Cluster-based Local Outlier Factor ROC:0.6273, precision @ rank n:0.2025, execution time: 0.9797s
Feature Bagging ROC:0.4725, precision @ rank n:0.0257, execution time: 49.4509s
Histogram-base Outlier Detection (HBOS) ROC:0.9871, precision @ rank n:0.9985, execution time: 0.012s




Isolation Forest ROC:0.9976, precision @ rank n:0.9501, execution time: 3.7469s
K Nearest Neighbors (KNN) ROC:0.6507, precision @ rank n:0.212, execution time: 5.3776s
Local Outlier Factor (LOF) ROC:0.5556, precision @ rank n:0.1548, execution time: 7.3224s






Minimum Covariance Determinant (MCD) ROC:0.9899, precision @ rank n:0.7395, execution time: 8.3128s
One-class SVM (OCSVM) ROC:0.9934, precision @ rank n:0.956, execution time: 31.4947s
Principal Component Analysis (PCA) ROC:0.9915, precision @ rank n:0.9516, execution time: 0.0219s

... Processing vertebral.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.5366, precision @ rank n:0.2143, execution time: 0.0319s
Cluster-based Local Outlier Factor ROC:0.439, precision @ rank n:0.0714, execution time: 0.0299s
Feature Bagging ROC:0.5279, precision @ rank n:0.1429, execution time: 0.0239s
Histogram-base Outlier Detection (HBOS) ROC:0.3506, precision @ rank n:0.0, execution time: 0.002s




Isolation Forest ROC:0.3789, precision @ rank n:0.0, execution time: 0.1586s
K Nearest Neighbors (KNN) ROC:0.4573, precision @ rank n:0.0714, execution time: 0.007s
Local Outlier Factor (LOF) ROC:0.4983, precision @ rank n:0.1429, execution time: 0.002s
Minimum Covariance Determinant (MCD) ROC:0.4085, precision @ rank n:0.0714, execution time: 0.0319s
One-class SVM (OCSVM) ROC:0.4686, precision @ rank n:0.0714, execution time: 0.002s
Principal Component Analysis (PCA) ROC:0.4085, precision @ rank n:0.0, execution time: 0.001s

... Processing vowels.mat ...




Angle-based Outlier Detector (ABOD) ROC:0.9616, precision @ rank n:0.6316, execution time: 0.2074s
Cluster-based Local Outlier Factor ROC:0.8963, precision @ rank n:0.3158, execution time: 0.0698s
Feature Bagging ROC:0.9365, precision @ rank n:0.3684, execution time: 0.2464s
Histogram-base Outlier Detection (HBOS) ROC:0.6876, precision @ rank n:0.1579, execution time: 0.003s




Isolation Forest ROC:0.8214, precision @ rank n:0.1579, execution time: 0.2264s
K Nearest Neighbors (KNN) ROC:0.9734, precision @ rank n:0.4737, execution time: 0.0519s
Local Outlier Factor (LOF) ROC:0.9398, precision @ rank n:0.3684, execution time: 0.025s
Minimum Covariance Determinant (MCD) ROC:0.7243, precision @ rank n:0.1053, execution time: 0.5934s
One-class SVM (OCSVM) ROC:0.8163, precision @ rank n:0.2632, execution time: 0.0329s
Principal Component Analysis (PCA) ROC:0.6297, precision @ rank n:0.1579, execution time: 0.001s

... Processing wbc.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.921, precision @ rank n:0.375, execution time: 0.0618s
Cluster-based Local Outlier Factor ROC:0.9149, precision @ rank n:0.375, execution time: 0.0449s
Feature Bagging ROC:0.9271, precision @ rank n:0.375, execution time: 0.0658s
Histogram-base Outlier Detection (HBOS) ROC:0.9479, precision @ rank n:0.5, execution time: 0.007s




Isolation Forest ROC:0.9418, precision @ rank n:0.625, execution time: 0.1925s
K Nearest Neighbors (KNN) ROC:0.9444, precision @ rank n:0.5, execution time: 0.015s
Local Outlier Factor (LOF) ROC:0.9227, precision @ rank n:0.375, execution time: 0.007s
Minimum Covariance Determinant (MCD) ROC:0.9288, precision @ rank n:0.5, execution time: 0.0499s
One-class SVM (OCSVM) ROC:0.9358, precision @ rank n:0.375, execution time: 0.004s
Principal Component Analysis (PCA) ROC:0.9262, precision @ rank n:0.375, execution time: 0.002s


In [54]:
roc_df

Unnamed: 0,Data,#Samples,# Dimensions,Outlier Perc,ABOD,CBLOF,FB,HBOS,IForest,KNN,LOF,MCD,OCSVM,PCA
0,arrhythmia,452,274,14.6018,0.7687,0.7789,0.7796,0.8511,0.8637,0.782,0.7787,0.8228,0.7986,0.7997
0,cardio,1831,21,9.6122,0.5892,0.8845,0.6385,0.8373,0.951,0.734,0.588,0.811,0.9478,0.9616
0,glass,214,9,4.2056,0.6951,0.811,0.7073,0.7073,0.7134,0.8384,0.7043,0.8293,0.6585,0.686
0,ionosphere,351,33,35.8974,0.9181,0.9176,0.9303,0.6052,0.8516,0.932,0.9227,0.9669,0.8257,0.7941
0,letter,1600,32,6.25,0.8783,0.7783,0.8947,0.6063,0.6279,0.8573,0.8765,0.8061,0.5927,0.5216
0,lympho,148,18,4.0541,0.9831,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
0,mnist,7603,100,9.2069,0.7628,0.8389,0.7157,0.5766,0.7915,0.8498,0.7195,0.8713,0.854,0.8534
0,musk,3062,166,3.1679,0.2161,1.0,0.473,0.9999,1.0,0.8009,0.4629,1.0,1.0,1.0
0,optdigits,5216,64,2.8758,0.4894,0.7901,0.5062,0.8774,0.686,0.406,0.5277,0.3822,0.5171,0.526
0,pendigits,6870,16,2.2707,0.667,0.8082,0.4889,0.9348,0.939,0.7371,0.4965,0.8204,0.9235,0.9309


In [55]:
prn_df

Unnamed: 0,Data,#Samples,# Dimensions,Outlier Perc,ABOD,CBLOF,FB,HBOS,IForest,KNN,LOF,MCD,OCSVM,PCA
0,arrhythmia,452,274,14.6018,0.3571,0.4643,0.4643,0.5714,0.6071,0.5,0.4643,0.4286,0.5,0.5
0,cardio,1831,21,9.6122,0.1918,0.4932,0.1781,0.4521,0.6027,0.3562,0.1507,0.4658,0.5342,0.6849
0,glass,214,9,4.2056,0.25,0.25,0.25,0.0,0.25,0.25,0.25,0.0,0.25,0.25
0,ionosphere,351,33,35.8974,0.8431,0.8039,0.8039,0.3922,0.6078,0.8824,0.7843,0.8627,0.6863,0.5686
0,letter,1600,32,6.25,0.4375,0.1875,0.4062,0.0938,0.0625,0.3125,0.3438,0.1875,0.125,0.125
0,lympho,148,18,4.0541,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
0,mnist,7603,100,9.2069,0.3367,0.3912,0.3741,0.1361,0.2687,0.432,0.3673,0.2653,0.3946,0.3878
0,musk,3062,166,3.1679,0.1,1.0,0.125,0.975,1.0,0.175,0.125,1.0,1.0,1.0
0,optdigits,5216,64,2.8758,0.0152,0.0,0.0303,0.2121,0.0303,0.0,0.0303,0.0,0.0,0.0
0,pendigits,6870,16,2.2707,0.0526,0.1579,0.0526,0.2632,0.3333,0.0702,0.0702,0.0877,0.3158,0.3158


In [56]:
time_df

Unnamed: 0,Data,#Samples,# Dimensions,Outlier Perc,ABOD,CBLOF,FB,HBOS,IForest,KNN,LOF,MCD,OCSVM,PCA
0,arrhythmia,452,274,14.6018,0.1017,0.0848,0.406,0.0379,0.2064,0.0598,0.0489,0.4039,0.0339,0.0409
0,cardio,1831,21,9.6122,0.2464,0.0857,0.5655,0.004,0.1971,0.0978,0.0638,0.373,0.0539,0.003
0,glass,214,9,4.2056,0.0229,0.0219,0.0209,0.002,0.1376,0.006,0.002,0.0269,0.001,0.001
0,ionosphere,351,33,35.8974,0.0409,0.0299,0.0429,0.005,0.1526,0.011,0.005,0.0449,0.004,0.002
0,letter,1600,32,6.25,0.2394,0.0908,0.5057,0.0056,0.2084,0.0947,0.0608,0.7181,0.0549,0.003
0,lympho,148,18,4.0541,0.017,0.0219,0.018,0.002,0.1307,0.0039,0.001,0.0219,0.0,0.002
0,mnist,7603,100,9.2069,4.7543,0.6931,32.2278,0.0369,1.2138,4.7563,4.7871,1.9318,3.2832,0.0928
0,musk,3062,166,3.1679,1.494,0.2184,8.8024,0.0369,0.7251,1.1579,1.1429,7.9727,0.9545,0.1007
0,optdigits,5216,64,2.8758,1.6815,0.3371,9.1056,0.0209,0.5595,1.1998,1.1759,1.0799,1.0781,0.0309
0,pendigits,6870,16,2.2707,0.9984,0.1656,3.1027,0.007,0.4708,0.4149,0.4198,1.4661,0.7351,0.0059
