# Import Python packages


In [1]:
import os
import sys
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from scipy.io import loadmat

# Import Pyod packages and the methods

In [2]:
from pyod.models.pca import PCA
from pyod.models.mcd import MCD
from pyod.models.ocsvm import OCSVM
from pyod.models.lof import LOF
from pyod.models.cblof import CBLOF
from pyod.models.knn import KNN
from pyod.models.hbos import HBOS
from pyod.models.abod import ABOD
from pyod.models.iforest import IForest
from pyod.models.feature_bagging import FeatureBagging

In [3]:
from pyod.utils.utility import standardizer
from pyod.utils.utility import precision_n_scores
from sklearn.metrics import roc_auc_score

In [4]:
mat_file_list = ['arrhythmia.mat',
                 'cardio.mat',
                 'glass.mat',
                 'ionosphere.mat',
                 'letter.mat',
                 'lympho.mat',
                 'mnist.mat',
                 'musk.mat',
                 'optdigits.mat',
                 'pendigits.mat',
                 'pima.mat',
                 'satellite.mat',
                 'satimage-2.mat',
                 'shuttle.mat',
                 'vertebral.mat',
                 'vowels.mat',
                 'wbc.mat']

In [5]:
data = loadmat('data\cardio.mat')
data

{'__header__': b'MATLAB 5.0 MAT-file, written by Octave 3.8.0, 2014-12-18 10:48:09 UTC',
 '__version__': '1.0',
 '__globals__': [],
 'X': array([[ 0.00491231,  0.69319077, -0.20364049, ...,  0.23149795,
         -0.28978574, -0.49329397],
        [ 0.11072935, -0.07990259, -0.20364049, ...,  0.09356344,
         -0.25638541, -0.49329397],
        [ 0.21654639, -0.27244466, -0.20364049, ...,  0.02459619,
         -0.25638541,  1.14001753],
        ...,
        [-0.41835583, -0.91998844, -0.16463485, ..., -1.49268341,
          0.24461959, -0.49329397],
        [-0.41835583, -0.91998844, -0.15093411, ..., -1.42371616,
          0.14441859, -0.49329397],
        [-0.41835583, -0.91998844, -0.20364049, ..., -1.28578165,
          3.58465295, -0.49329397]]),
 'y': array([[0.],
        [0.],
        [0.],
        ...,
        [1.],
        [1.],
        [1.]])}

In [6]:
len(data)

5

In [7]:
data.keys()

dict_keys(['__header__', '__version__', '__globals__', 'X', 'y'])

In [8]:
data.values()

dict_values([b'MATLAB 5.0 MAT-file, written by Octave 3.8.0, 2014-12-18 10:48:09 UTC', '1.0', [], array([[ 0.00491231,  0.69319077, -0.20364049, ...,  0.23149795,
        -0.28978574, -0.49329397],
       [ 0.11072935, -0.07990259, -0.20364049, ...,  0.09356344,
        -0.25638541, -0.49329397],
       [ 0.21654639, -0.27244466, -0.20364049, ...,  0.02459619,
        -0.25638541,  1.14001753],
       ...,
       [-0.41835583, -0.91998844, -0.16463485, ..., -1.49268341,
         0.24461959, -0.49329397],
       [-0.41835583, -0.91998844, -0.15093411, ..., -1.42371616,
         0.14441859, -0.49329397],
       [-0.41835583, -0.91998844, -0.20364049, ..., -1.28578165,
         3.58465295, -0.49329397]]), array([[0.],
       [0.],
       [0.],
       ...,
       [1.],
       [1.],
       [1.]])])

In [9]:
type(data['X']),data['X'].shape

(numpy.ndarray, (1831, 21))

In [10]:
type(data['y']),data['y'].shape

(numpy.ndarray, (1831, 1))

In [11]:
df_columns = ['Data','#Samples','#Dimensions','Outlier Perc','ABOD', 'CBLOF','FB','HBOS','IForest','KNN','LOF','MCD'
             'OCSVM','PCA']

roc_df = pd.DataFrame(columns = df_columns)
prn_df = pd.DataFrame(columns = df_columns)
time_df = pd.DataFrame(columns = df_columns)


from time import time
random_state = np.random.RandomState(42)

for mat_file in mat_file_list:
    print("\n... Processing", mat_file, '...')
    mat = loadmat(os.path.join('data',mat_file))
    
    X = mat['X']
    y = mat['y'].ravel()
    outliers_fraction = np.count_nonzero(y)/len(y)
    outliers_percentage = round(outliers_fraction * 100,ndigits = 4)
    
    #construct containers for saving results
    roc_list = [mat_file[:-4], X.shape[0], X.shape[1], outliers_percentage]
    prn_list = [mat_file[:-4], X.shape[0], X.shape[1], outliers_percentage]
    time_list = [mat_file[:-4], X.shape[0], X.shape[1], outliers_percentage]
    
    #60% data fro training and 40% data for testing
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.4, random_state = random_state)
    
    #standardizing data for processing
    X_train_norm, X_test_norm = standardizer(X_train, X_test)
    
    classifiers = {'Angle-based Outlier Detector(ABOD)' : ABOD(contamination=outliers_fraction),
                  'Cluster-based Local Outlier Fraction': CBLOF(contamination=outliers_fraction,check_estimator = False,
                                                               random_state = random_state),
                  'Histogram-base Outlier Detection (HBOS)' : HBOS(contamination=outliers_fraction),
                  'Isolation Forest':IForest(contamination=outliers_fraction,random_state = random_state),
                  'K Nearest Neighbour (KNN)' : KNN(contamination=outliers_fraction),
                  'Local Outlier Factor (LOF)' : LOF(contamination=outliers_fraction),
                  'Minimum Covariance Determinant (MCD)' : MCD(contamination=outliers_fraction,random_state = random_state),
                  'One-class SVM (OCSVM)' : OCSVM(contamination=outliers_fraction),
                  'Principal Component Analysis (PCA)' : PCA(contamination=outliers_fraction, random_state = random_state),
                  }
    for clf_name, clf in classifiers.items():
        t0 = time()
        clf.fit(X_train_norm)
        test_scores = clf.decision_function(X_test_norm)
        t1 = time()
        duration = round(t1-t0,ndigits = 4)
        time_list.append(duration)
        
        roc = round(roc_auc_score(y_test,test_scores), ndigits = 4)
        prn = round(precision_n_scores(y_test,test_scores), ndigits = 4)
        
        print('{clf_name} ROC : {roc},precision @ rank n {prn} , '
              'execution time : {duration}s'.format(clf_name = clf_name,roc = roc,prn = prn,duration = duration))
        
        roc_list.append(roc)
        prn_list.append(prn)
        
    temp_df = pd.DataFrame(time_list).transpose()
    temp_df.columns = df_columns 
    time_df = pd.concat([time_df,temp_df],axis = 0)

    temp_df = pd.DataFrame(roc_list).transpose()
    temp_df.columns = df_columns
    roc_df = pd.concat([roc_df,temp_df],axis = 0)
                        
    temp_df = pd.DataFrame(prn_list).transpose()
    temp_df.columns = df_columns
    prn_df = pd.concat([prn_df,temp_df],axis = 0)


... Processing arrhythmia.mat ...
Angle-based Outlier Detector(ABOD) ROC : 0.7687,precision @ rank n 0.3571 , execution time : 3.547s
Cluster-based Local Outlier Fraction ROC : 0.7789,precision @ rank n 0.4643 , execution time : 3.1095s
Histogram-base Outlier Detection (HBOS) ROC : 0.8511,precision @ rank n 0.5714 , execution time : 2.6876s




Isolation Forest ROC : 0.8625,precision @ rank n 0.6071 , execution time : 0.7188s
K Nearest Neighbour (KNN) ROC : 0.782,precision @ rank n 0.5 , execution time : 0.1719s
Local Outlier Factor (LOF) ROC : 0.7787,precision @ rank n 0.4643 , execution time : 0.1094s




Minimum Covariance Determinant (MCD) ROC : 0.8228,precision @ rank n 0.4286 , execution time : 1.5313s
One-class SVM (OCSVM) ROC : 0.7986,precision @ rank n 0.5 , execution time : 0.0781s
Principal Component Analysis (PCA) ROC : 0.8,precision @ rank n 0.5 , execution time : 0.0938s

... Processing cardio.mat ...
Angle-based Outlier Detector(ABOD) ROC : 0.5615,precision @ rank n 0.1791 , execution time : 0.9063s
Cluster-based Local Outlier Fraction ROC : 0.8414,precision @ rank n 0.5522 , execution time : 0.2813s
Histogram-base Outlier Detection (HBOS) ROC : 0.8394,precision @ rank n 0.4925 , execution time : 0.0156s




Isolation Forest ROC : 0.9281,precision @ rank n 0.4925 , execution time : 0.5156s
K Nearest Neighbour (KNN) ROC : 0.7296,precision @ rank n 0.3582 , execution time : 0.2813s
Local Outlier Factor (LOF) ROC : 0.5641,precision @ rank n 0.1791 , execution time : 0.2344s




Minimum Covariance Determinant (MCD) ROC : 0.8295,precision @ rank n 0.4179 , execution time : 1.1876s
One-class SVM (OCSVM) ROC : 0.9341,precision @ rank n 0.5075 , execution time : 0.1563s
Principal Component Analysis (PCA) ROC : 0.9512,precision @ rank n 0.5821 , execution time : 0.1563s

... Processing glass.mat ...
Angle-based Outlier Detector(ABOD) ROC : 0.6951,precision @ rank n 0.25 , execution time : 0.0938s
Cluster-based Local Outlier Fraction ROC : 0.811,precision @ rank n 0.25 , execution time : 0.0781s
Histogram-base Outlier Detection (HBOS) ROC : 0.7073,precision @ rank n 0.0 , execution time : 0.0s




Isolation Forest ROC : 0.7165,precision @ rank n 0.25 , execution time : 0.3438s
K Nearest Neighbour (KNN) ROC : 0.8384,precision @ rank n 0.25 , execution time : 0.0156s
Local Outlier Factor (LOF) ROC : 0.7043,precision @ rank n 0.25 , execution time : 0.0156s
Minimum Covariance Determinant (MCD) ROC : 0.8293,precision @ rank n 0.0 , execution time : 0.125s
One-class SVM (OCSVM) ROC : 0.6585,precision @ rank n 0.25 , execution time : 0.0s
Principal Component Analysis (PCA) ROC : 0.686,precision @ rank n 0.25 , execution time : 0.0156s

... Processing



 ionosphere.mat ...
Angle-based Outlier Detector(ABOD) ROC : 0.9181,precision @ rank n 0.8431 , execution time : 0.1719s
Cluster-based Local Outlier Fraction ROC : 0.9176,precision @ rank n 0.8039 , execution time : 0.0938s
Histogram-base Outlier Detection (HBOS) ROC : 0.6052,precision @ rank n 0.3922 , execution time : 0.0156s




Isolation Forest ROC : 0.8442,precision @ rank n 0.6078 , execution time : 0.3125s
K Nearest Neighbour (KNN) ROC : 0.932,precision @ rank n 0.8824 , execution time : 0.0313s
Local Outlier Factor (LOF) ROC : 0.9227,precision @ rank n 0.7843 , execution time : 0.0156s
Minimum Covariance Determinant (MCD) ROC : 0.9669,precision @ rank n 0.8627 , execution time : 0.1406s
One-class SVM (OCSVM) ROC : 0.8257,precision @ rank n 0.6863 , execution time : 0.0156s
Principal Component Analysis (PCA) ROC : 0.7941,precision @ rank n 0.5686 , execution time : 0.0312s

... Processing letter.mat ...




Angle-based Outlier Detector(ABOD) ROC : 0.8783,precision @ rank n 0.4375 , execution time : 0.8125s
Cluster-based Local Outlier Fraction ROC : 0.7783,precision @ rank n 0.1875 , execution time : 0.2344s
Histogram-base Outlier Detection (HBOS) ROC : 0.6063,precision @ rank n 0.0938 , execution time : 0.0156s




Isolation Forest ROC : 0.6214,precision @ rank n 0.0625 , execution time : 0.5s
K Nearest Neighbour (KNN) ROC : 0.8573,precision @ rank n 0.3125 , execution time : 0.2344s
Local Outlier Factor (LOF) ROC : 0.8765,precision @ rank n 0.3438 , execution time : 0.1406s
Minimum Covariance Determinant (MCD) ROC : 0.8142,precision @ rank n 0.2188 , execution time : 1.9845s
One-class SVM (OCSVM) ROC : 0.5927,precision @ rank n 0.125 , execution time : 0.125s
Principal Component Analysis (PCA) ROC : 0.5216,precision @ rank n 0.125 , execution time : 0.0156s

... Processing lympho.mat ...
Angle-based Outlier Detector(ABOD) ROC : 0.9831,precision @ rank n 0.0 , execution time : 0.0625s
Cluster-based Local Outlier Fraction ROC : 1.0,precision @ rank n 1.0 , execution time : 0.0625s
Histogram-base Outlier Detection (HBOS) ROC : 1.0,precision @ rank n 1.0 , execution time : 0.0156s




Isolation Forest ROC : 1.0,precision @ rank n 1.0 , execution time : 0.2813s
K Nearest Neighbour (KNN) ROC : 1.0,precision @ rank n 1.0 , execution time : 0.0156s
Local Outlier Factor (LOF) ROC : 1.0,precision @ rank n 1.0 , execution time : 0.0s
Minimum Covariance Determinant (MCD) ROC : 1.0,precision @ rank n 1.0 , execution time : 0.0781s
One-class SVM (OCSVM) ROC : 1.0,precision @ rank n 1.0 , execution time : 0.0s
Principal Component Analysis (PCA) ROC : 1.0,precision @ rank n 1.0 , execution time : 0.0s

... Processing mnist.mat ...




Angle-based Outlier Detector(ABOD) ROC : 0.805,precision @ rank n 0.4246 , execution time : 12.1412s
Cluster-based Local Outlier Fraction ROC : 0.8512,precision @ rank n 0.4386 , execution time : 1.9532s
Histogram-base Outlier Detection (HBOS) ROC : 0.5811,precision @ rank n 0.1123 , execution time : 0.1094s




Isolation Forest ROC : 0.8349,precision @ rank n 0.393 , execution time : 2.5157s
K Nearest Neighbour (KNN) ROC : 0.853,precision @ rank n 0.4561 , execution time : 10.0161s
Local Outlier Factor (LOF) ROC : 0.7469,precision @ rank n 0.3579 , execution time : 9.5161s




Minimum Covariance Determinant (MCD) ROC : 0.8497,precision @ rank n 0.2105 , execution time : 4.5627s
One-class SVM (OCSVM) ROC : 0.8497,precision @ rank n 0.4105 , execution time : 6.6253s
Principal Component Analysis (PCA) ROC : 0.8488,precision @ rank n 0.4035 , execution time : 0.2813s

... Processing musk.mat ...
Angle-based Outlier Detector(ABOD) ROC : 0.2161,precision @ rank n 0.1 , execution time : 4.0314s
Cluster-based Local Outlier Fraction ROC : 1.0,precision @ rank n 1.0 , execution time : 0.5625s
Histogram-base Outlier Detection (HBOS) ROC : 0.9999,precision @ rank n 0.975 , execution time : 0.1094s




Isolation Forest ROC : 1.0,precision @ rank n 1.0 , execution time : 1.5001s
K Nearest Neighbour (KNN) ROC : 0.8009,precision @ rank n 0.175 , execution time : 2.5939s
Local Outlier Factor (LOF) ROC : 0.4629,precision @ rank n 0.125 , execution time : 2.3907s
Minimum Covariance Determinant (MCD) ROC : 0.9998,precision @ rank n 0.95 , execution time : 19.4853s
One-class SVM (OCSVM) ROC : 1.0,precision @ rank n 1.0 , execution time : 1.6251s
Principal Component Analysis (PCA) ROC : 1.0,precision @ rank n 1.0 , execution time : 0.2031s

... Processing optdigits.mat ...
Angle-based Outlier Detector(ABOD) ROC : 0.4894,precision @ rank n 0.0152 , execution time : 4.9534s
Cluster-based Local Outlier Fraction ROC : 0.7901,precision @ rank n 0.0 , execution time : 0.7657s
Histogram-base Outlier Detection (HBOS) ROC : 0.8774,precision @ rank n 0.2121 , execution time : 0.0469s




Isolation Forest ROC : 0.6802,precision @ rank n 0.0303 , execution time : 1.1407s
K Nearest Neighbour (KNN) ROC : 0.406,precision @ rank n 0.0 , execution time : 2.7501s
Local Outlier Factor (LOF) ROC : 0.5277,precision @ rank n 0.0303 , execution time : 2.5157s




Minimum Covariance Determinant (MCD) ROC : 0.4453,precision @ rank n 0.0 , execution time : 2.2025s
One-class SVM (OCSVM) ROC : 0.5171,precision @ rank n 0.0 , execution time : 2.1564s
Principal Component Analysis (PCA) ROC : 0.526,precision @ rank n 0.0 , execution time : 0.125s

... Processing pendigits.mat ...
Angle-based Outlier Detector(ABOD) ROC : 0.667,precision @ rank n 0.0526 , execution time : 3.7971s
Cluster-based Local Outlier Fraction ROC : 0.8082,precision @ rank n 0.1579 , execution time : 0.4063s
Histogram-base Outlier Detection (HBOS) ROC : 0.9348,precision @ rank n 0.2632 , execution time : 0.0156s




Isolation Forest ROC : 0.9394,precision @ rank n 0.3509 , execution time : 0.9375s
K Nearest Neighbour (KNN) ROC : 0.7371,precision @ rank n 0.0702 , execution time : 1.0469s
Local Outlier Factor (LOF) ROC : 0.4965,precision @ rank n 0.0702 , execution time : 1.0157s
Minimum Covariance Determinant (MCD) ROC : 0.8204,precision @ rank n 0.0877 , execution time : 3.3283s
One-class SVM (OCSVM) ROC : 0.9235,precision @ rank n 0.3158 , execution time : 1.7188s
Principal Component Analysis (PCA) ROC : 0.9309,precision @ rank n 0.3158 , execution time : 0.0156s

... Processing pima.mat ...
Angle-based Outlier Detector(ABOD) ROC : 0.7163,precision @ rank n 0.5253 , execution time : 0.3281s
Cluster-based Local Outlier Fraction ROC : 0.67,precision @ rank n 0.4949 , execution time : 0.1406s
Histogram-base Outlier Detection (HBOS) ROC : 0.711,precision @ rank n 0.5354 , execution time : 0.0156s




Isolation Forest ROC : 0.6845,precision @ rank n 0.5152 , execution time : 0.3594s
K Nearest Neighbour (KNN) ROC : 0.7395,precision @ rank n 0.5859 , execution time : 0.0469s
Local Outlier Factor (LOF) ROC : 0.6574,precision @ rank n 0.4646 , execution time : 0.0156s
Minimum Covariance Determinant (MCD) ROC : 0.7175,precision @ rank n 0.5152 , execution time : 0.0938s
One-class SVM (OCSVM) ROC : 0.6561,precision @ rank n 0.5051 , execution time : 0.0313s
Principal Component Analysis (PCA) ROC : 0.6762,precision @ rank n 0.5354 , execution time : 0.0s

... Processing satellite.mat ...




Angle-based Outlier Detector(ABOD) ROC : 0.5653,precision @ rank n 0.3962 , execution time : 3.9064s
Cluster-based Local Outlier Fraction ROC : 0.7241,precision @ rank n 0.5412 , execution time : 0.8594s
Histogram-base Outlier Detection (HBOS) ROC : 0.7486,precision @ rank n 0.57 , execution time : 0.0469s




Isolation Forest ROC : 0.6854,precision @ rank n 0.5775 , execution time : 1.1563s
K Nearest Neighbour (KNN) ROC : 0.6853,precision @ rank n 0.4988 , execution time : 1.7501s
Local Outlier Factor (LOF) ROC : 0.572,precision @ rank n 0.395 , execution time : 1.6407s
Minimum Covariance Determinant (MCD) ROC : 0.8055,precision @ rank n 0.6762 , execution time : 4.0471s
One-class SVM (OCSVM) ROC : 0.6478,precision @ rank n 0.5225 , execution time : 2.2189s
Principal Component Analysis (PCA) ROC : 0.5923,precision @ rank n 0.465 , execution time : 0.0313s

... Processing satimage-2.mat ...
Angle-based Outlier Detector(ABOD) ROC : 0.8432,precision @ rank n 0.2333 , execution time : 3.4064s
Cluster-based Local Outlier Fraction ROC : 0.9998,precision @ rank n 0.9333 , execution time : 0.5156s
Histogram-base Outlier Detection (HBOS) ROC : 0.9784,precision @ rank n 0.6 , execution time : 0.0313s




Isolation Forest ROC : 0.9952,precision @ rank n 0.8667 , execution time : 1.1876s
K Nearest Neighbour (KNN) ROC : 0.9515,precision @ rank n 0.4333 , execution time : 1.4532s
Local Outlier Factor (LOF) ROC : 0.5257,precision @ rank n 0.1667 , execution time : 1.2501s
Minimum Covariance Determinant (MCD) ROC : 0.9964,precision @ rank n 0.6667 , execution time : 3.5939s
One-class SVM (OCSVM) ROC : 0.9997,precision @ rank n 0.9 , execution time : 1.8126s
Principal Component Analysis (PCA) ROC : 0.9816,precision @ rank n 0.7333 , execution time : 0.0313s

... Processing shuttle.mat ...




Angle-based Outlier Detector(ABOD) ROC : 0.6171,precision @ rank n 0.2003 , execution time : 32.5641s
Cluster-based Local Outlier Fraction ROC : 0.6273,precision @ rank n 0.2025 , execution time : 1.3751s
Histogram-base Outlier Detection (HBOS) ROC : 0.9871,precision @ rank n 0.9985 , execution time : 0.0313s




Isolation Forest ROC : 0.9977,precision @ rank n 0.9618 , execution time : 4.6096s
K Nearest Neighbour (KNN) ROC : 0.6507,precision @ rank n 0.212 , execution time : 15.282s
Local Outlier Factor (LOF) ROC : 0.5556,precision @ rank n 0.1548 , execution time : 20.8448s






Minimum Covariance Determinant (MCD) ROC : 0.9899,precision @ rank n 0.7395 , execution time : 18.6728s
One-class SVM (OCSVM) ROC : 0.9934,precision @ rank n 0.956 , execution time : 95.4733s
Principal Component Analysis (PCA) ROC : 0.9915,precision @ rank n 0.9516 , execution time : 0.0625s

... Processing vertebral.mat ...
Angle-based Outlier Detector(ABOD) ROC : 0.5366,precision @ rank n 0.2143 , execution time : 0.1094s
Cluster-based Local Outlier Fraction ROC : 0.439,precision @ rank n 0.0714 , execution time : 0.0781s
Histogram-base Outlier Detection (HBOS) ROC : 0.3506,precision @ rank n 0.0 , execution time : 0.0156s




Isolation Forest ROC : 0.3894,precision @ rank n 0.0 , execution time : 0.2813s
K Nearest Neighbour (KNN) ROC : 0.4573,precision @ rank n 0.0714 , execution time : 0.0156s
Local Outlier Factor (LOF) ROC : 0.4983,precision @ rank n 0.1429 , execution time : 0.0s
Minimum Covariance Determinant (MCD) ROC : 0.4338,precision @ rank n 0.0714 , execution time : 0.0781s
One-class SVM (OCSVM) ROC : 0.4686,precision @ rank n 0.0714 , execution time : 0.0156s
Principal Component Analysis (PCA) ROC : 0.4085,precision @ rank n 0.0 , execution time : 0.0s

... Processing vowels.mat ...




Angle-based Outlier Detector(ABOD) ROC : 0.9616,precision @ rank n 0.6316 , execution time : 0.625s
Cluster-based Local Outlier Fraction ROC : 0.8963,precision @ rank n 0.3158 , execution time : 0.1406s
Histogram-base Outlier Detection (HBOS) ROC : 0.6876,precision @ rank n 0.1579 , execution time : 0.0s




Isolation Forest ROC : 0.8183,precision @ rank n 0.1579 , execution time : 0.4531s
K Nearest Neighbour (KNN) ROC : 0.9734,precision @ rank n 0.4737 , execution time : 0.1406s
Local Outlier Factor (LOF) ROC : 0.9398,precision @ rank n 0.3684 , execution time : 0.0625s
Minimum Covariance Determinant (MCD) ROC : 0.7243,precision @ rank n 0.1053 , execution time : 1.3126s
One-class SVM (OCSVM) ROC : 0.8163,precision @ rank n 0.2632 , execution time : 0.0625s
Principal Component Analysis (PCA) ROC : 0.6297,precision @ rank n 0.1579 , execution time : 0.0s

... Processing wbc.mat ...
Angle-based Outlier Detector(ABOD) ROC : 0.921,precision @ rank n 0.375 , execution time : 0.1563s
Cluster-based Local Outlier Fraction ROC : 0.9149,precision @ rank n 0.375 , execution time : 0.1094s
Histogram-base Outlier Detection (HBOS) ROC : 0.9479,precision @ rank n 0.5 , execution time : 0.0156s




Isolation Forest ROC : 0.9314,precision @ rank n 0.5 , execution time : 0.3125s
K Nearest Neighbour (KNN) ROC : 0.9444,precision @ rank n 0.5 , execution time : 0.0313s
Local Outlier Factor (LOF) ROC : 0.9227,precision @ rank n 0.375 , execution time : 0.0156s
Minimum Covariance Determinant (MCD) ROC : 0.9288,precision @ rank n 0.5 , execution time : 0.1094s
One-class SVM (OCSVM) ROC : 0.9358,precision @ rank n 0.375 , execution time : 0.0156s
Principal Component Analysis (PCA) ROC : 0.9262,precision @ rank n 0.375 , execution time : 0.0s


In [12]:
time_df

Unnamed: 0,Data,#Samples,#Dimensions,Outlier Perc,ABOD,CBLOF,FB,HBOS,IForest,KNN,LOF,MCDOCSVM,PCA
0,arrhythmia,452,274,14.6018,3.547,3.1095,2.6876,0.7188,0.1719,0.1094,1.5313,0.0781,0.0938
0,cardio,1831,21,9.6122,0.9063,0.2813,0.0156,0.5156,0.2813,0.2344,1.1876,0.1563,0.1563
0,glass,214,9,4.2056,0.0938,0.0781,0.0,0.3438,0.0156,0.0156,0.125,0.0,0.0156
0,ionosphere,351,33,35.8974,0.1719,0.0938,0.0156,0.3125,0.0313,0.0156,0.1406,0.0156,0.0312
0,letter,1600,32,6.25,0.8125,0.2344,0.0156,0.5,0.2344,0.1406,1.9845,0.125,0.0156
0,lympho,148,18,4.0541,0.0625,0.0625,0.0156,0.2813,0.0156,0.0,0.0781,0.0,0.0
0,mnist,7603,100,9.2069,12.1412,1.9532,0.1094,2.5157,10.0161,9.5161,4.5627,6.6253,0.2813
0,musk,3062,166,3.1679,4.0314,0.5625,0.1094,1.5001,2.5939,2.3907,19.4853,1.6251,0.2031
0,optdigits,5216,64,2.8758,4.9534,0.7657,0.0469,1.1407,2.7501,2.5157,2.2025,2.1564,0.125
0,pendigits,6870,16,2.2707,3.7971,0.4063,0.0156,0.9375,1.0469,1.0157,3.3283,1.7188,0.0156


In [13]:
roc_df

Unnamed: 0,Data,#Samples,#Dimensions,Outlier Perc,ABOD,CBLOF,FB,HBOS,IForest,KNN,LOF,MCDOCSVM,PCA
0,arrhythmia,452,274,14.6018,0.7687,0.7789,0.8511,0.8625,0.782,0.7787,0.8228,0.7986,0.8
0,cardio,1831,21,9.6122,0.5615,0.8414,0.8394,0.9281,0.7296,0.5641,0.8295,0.9341,0.9512
0,glass,214,9,4.2056,0.6951,0.811,0.7073,0.7165,0.8384,0.7043,0.8293,0.6585,0.686
0,ionosphere,351,33,35.8974,0.9181,0.9176,0.6052,0.8442,0.932,0.9227,0.9669,0.8257,0.7941
0,letter,1600,32,6.25,0.8783,0.7783,0.6063,0.6214,0.8573,0.8765,0.8142,0.5927,0.5216
0,lympho,148,18,4.0541,0.9831,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
0,mnist,7603,100,9.2069,0.805,0.8512,0.5811,0.8349,0.853,0.7469,0.8497,0.8497,0.8488
0,musk,3062,166,3.1679,0.2161,1.0,0.9999,1.0,0.8009,0.4629,0.9998,1.0,1.0
0,optdigits,5216,64,2.8758,0.4894,0.7901,0.8774,0.6802,0.406,0.5277,0.4453,0.5171,0.526
0,pendigits,6870,16,2.2707,0.667,0.8082,0.9348,0.9394,0.7371,0.4965,0.8204,0.9235,0.9309


In [14]:
prn_df

Unnamed: 0,Data,#Samples,#Dimensions,Outlier Perc,ABOD,CBLOF,FB,HBOS,IForest,KNN,LOF,MCDOCSVM,PCA
0,arrhythmia,452,274,14.6018,0.3571,0.4643,0.5714,0.6071,0.5,0.4643,0.4286,0.5,0.5
0,cardio,1831,21,9.6122,0.1791,0.5522,0.4925,0.4925,0.3582,0.1791,0.4179,0.5075,0.5821
0,glass,214,9,4.2056,0.25,0.25,0.0,0.25,0.25,0.25,0.0,0.25,0.25
0,ionosphere,351,33,35.8974,0.8431,0.8039,0.3922,0.6078,0.8824,0.7843,0.8627,0.6863,0.5686
0,letter,1600,32,6.25,0.4375,0.1875,0.0938,0.0625,0.3125,0.3438,0.2188,0.125,0.125
0,lympho,148,18,4.0541,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
0,mnist,7603,100,9.2069,0.4246,0.4386,0.1123,0.393,0.4561,0.3579,0.2105,0.4105,0.4035
0,musk,3062,166,3.1679,0.1,1.0,0.975,1.0,0.175,0.125,0.95,1.0,1.0
0,optdigits,5216,64,2.8758,0.0152,0.0,0.2121,0.0303,0.0,0.0303,0.0,0.0,0.0
0,pendigits,6870,16,2.2707,0.0526,0.1579,0.2632,0.3509,0.0702,0.0702,0.0877,0.3158,0.3158
