# Python Package 

In [1]:
import os
import sys
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from scipy.io import loadmat

# Import Pycod Package 

In [16]:

from pyod.models.pca import PCA
from pyod.models.mcd import MCD
from pyod.models.ocsvm import OCSVM
from pyod.models.lof import LOF
from pyod.models.cblof import CBLOF
from pyod.models.knn import KNN
from pyod.models.hbos import HBOS
from pyod.models.abod import ABOD
from pyod.models.iforest import IForest
from pyod.models.feature_bagging import FeatureBagging

# Import Metrics Package

In [3]:
from pyod.utils.utility import standardizer
from pyod.utils.utility import precision_n_scores
from sklearn.metrics import roc_auc_score

# Define data file and read x and y 

In [4]:
mat_file_list=['arrhythmia.mat','cardio.mat','glass.mat',
               'ionosphere.mat','letter.mat','lympho.mat',
               'mnist.mat','musk.mat','optdigits.mat',
               'pendigits.mat','pima.mat','satellite.mat','satimage-2.mat',
               'shuttle.mat','shuttle.mat','vertebral.mat','vowels.mat','wbc.mat']

In [5]:
mat_file_list

['arrhythmia.mat',
 'cardio.mat',
 'glass.mat',
 'ionosphere.mat',
 'letter.mat',
 'lympho.mat',
 'mnist.mat',
 'musk.mat',
 'optdigits.mat',
 'pendigits.mat',
 'pima.mat',
 'satellite.mat',
 'satimage-2.mat',
 'shuttle.mat',
 'shuttle.mat',
 'vertebral.mat',
 'vowels.mat',
 'wbc.mat']

# Loading mat file

In [6]:
df_columns=['Data','#Sample','#Dimensions','Outlier Perc',
            'PCA','MCD','OCSVM','LOF','CBLOF','KNN','HBOS',
            'ABOD','IFOREST','FEATUREBAGGING']

In [7]:
roc_df=pd.DataFrame(columns=df_columns)
prn_df=pd.DataFrame(columns=df_columns)
time_df=pd.DataFrame(columns=df_columns)
print(roc_df,prn_df,time_df)

Empty DataFrame
Columns: [Data, #Sample, #Dimensions, Outlier Perc, PCA, MCD, OCSVM, LOF, CBLOF, KNN, HBOS, ABOD, IFOREST, FEATUREBAGGING]
Index: [] Empty DataFrame
Columns: [Data, #Sample, #Dimensions, Outlier Perc, PCA, MCD, OCSVM, LOF, CBLOF, KNN, HBOS, ABOD, IFOREST, FEATUREBAGGING]
Index: [] Empty DataFrame
Columns: [Data, #Sample, #Dimensions, Outlier Perc, PCA, MCD, OCSVM, LOF, CBLOF, KNN, HBOS, ABOD, IFOREST, FEATUREBAGGING]
Index: []


In [8]:
roc_df

Unnamed: 0,Data,#Sample,#Dimensions,Outlier Perc,PCA,MCD,OCSVM,LOF,CBLOF,KNN,HBOS,ABOD,IFOREST,FEATUREBAGGING


In [18]:
from time import time
random_state = np.random.RandomState(42)

for mat_file in mat_file_list:
   print("\n... Processing", mat_file, '...')
   mat = loadmat(os.path.join(mat_file))

   X = mat['X']
   y = mat['y'].ravel()
   outliers_fraction = np.count_nonzero(y) / len(y)
   outliers_percentage = round(outliers_fraction * 100, ndigits=4)

   # construct containers for saving results
   roc_list = [mat_file[:-4], X.shape[0], X.shape[1], outliers_percentage]
   prn_list = [mat_file[:-4], X.shape[0], X.shape[1], outliers_percentage]
   time_list = [mat_file[:-4], X.shape[0], X.shape[1], outliers_percentage]

   # 60% data for training and 40% for testing
   X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4,
                                                       random_state=random_state)

   # standardizing data for processing
   X_train_norm, X_test_norm = standardizer(X_train, X_test)

   classifiers = {'Angle-based Outlier Detector (ABOD)': ABOD(
       contamination=outliers_fraction),
       'Cluster-based Local Outlier Factor': CBLOF(
           contamination=outliers_fraction, check_estimator=False,
           random_state=random_state),
       'Feature Bagging': FeatureBagging(contamination=outliers_fraction,
                                         random_state=random_state),
       'Histogram-base Outlier Detection (HBOS)': HBOS(
           contamination=outliers_fraction),
       'Isolation Forest': IForest(contamination=outliers_fraction,
                                   random_state=random_state),
       'K Nearest Neighbors (KNN)': KNN(contamination=outliers_fraction),
       'Local Outlier Factor (LOF)': LOF(
           contamination=outliers_fraction),
       'Minimum Covariance Determinant (MCD)': MCD(
           contamination=outliers_fraction, random_state=random_state),
       'One-class SVM (OCSVM)': OCSVM(contamination=outliers_fraction),
       'Principal Component Analysis (PCA)': PCA(
           contamination=outliers_fraction, random_state=random_state),
   }

   for clf_name, clf in classifiers.items():
       t0 = time()
       clf.fit(X_train_norm)
       test_scores = clf.decision_function(X_test_norm)
       t1 = time()
       duration = round(t1 - t0, ndigits=4)
       time_list.append(duration)

       roc = round(roc_auc_score(y_test, test_scores), ndigits=4)
       prn = round(precision_n_scores(y_test, test_scores), ndigits=4)

       print('{clf_name} ROC:{roc}, precision @ rank n:{prn}, '
             'execution time: {duration}s'.format(
           clf_name=clf_name, roc=roc, prn=prn, duration=duration))

       roc_list.append(roc)
       prn_list.append(prn)

   temp_df = pd.DataFrame(time_list).transpose()
   temp_df.columns = df_columns
   time_df = pd.concat([time_df, temp_df], axis=0)

   temp_df = pd.DataFrame(roc_list).transpose()
   temp_df.columns = df_columns
   roc_df = pd.concat([roc_df, temp_df], axis=0)

   temp_df = pd.DataFrame(prn_list).transpose()
   temp_df.columns = df_columns
   prn_df = pd.concat([prn_df, temp_df], axis=0)



... Processing arrhythmia.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.7687, precision @ rank n:0.3571, execution time: 3.501s
Cluster-based Local Outlier Factor ROC:0.7789, precision @ rank n:0.4643, execution time: 2.1392s
Feature Bagging ROC:0.7796, precision @ rank n:0.4643, execution time: 0.7181s
Histogram-base Outlier Detection (HBOS) ROC:0.8511, precision @ rank n:0.5714, execution time: 1.9139s




Isolation Forest ROC:0.8637, precision @ rank n:0.6071, execution time: 0.5963s
K Nearest Neighbors (KNN) ROC:0.782, precision @ rank n:0.5, execution time: 0.1048s
Local Outlier Factor (LOF) ROC:0.7787, precision @ rank n:0.4643, execution time: 0.0888s




Minimum Covariance Determinant (MCD) ROC:0.8228, precision @ rank n:0.4286, execution time: 1.2725s
One-class SVM (OCSVM) ROC:0.7986, precision @ rank n:0.5, execution time: 0.0738s
Principal Component Analysis (PCA) ROC:0.7997, precision @ rank n:0.5, execution time: 0.1353s

... Processing cardio.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.5892, precision @ rank n:0.1918, execution time: 0.4638s
Cluster-based Local Outlier Factor ROC:0.8845, precision @ rank n:0.4932, execution time: 0.1376s
Feature Bagging ROC:0.6385, precision @ rank n:0.1781, execution time: 1.0283s
Histogram-base Outlier Detection (HBOS) ROC:0.8373, precision @ rank n:0.4521, execution time: 0.008s




Isolation Forest ROC:0.951, precision @ rank n:0.6027, execution time: 0.376s
K Nearest Neighbors (KNN) ROC:0.734, precision @ rank n:0.3562, execution time: 0.1795s
Local Outlier Factor (LOF) ROC:0.588, precision @ rank n:0.1507, execution time: 0.1217s




Minimum Covariance Determinant (MCD) ROC:0.811, precision @ rank n:0.4658, execution time: 0.7422s
One-class SVM (OCSVM) ROC:0.9478, precision @ rank n:0.5342, execution time: 0.0967s
Principal Component Analysis (PCA) ROC:0.9616, precision @ rank n:0.6849, execution time: 0.0075s

... Processing glass.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.6951, precision @ rank n:0.25, execution time: 0.0698s
Cluster-based Local Outlier Factor ROC:0.811, precision @ rank n:0.25, execution time: 0.0898s
Feature Bagging ROC:0.7073, precision @ rank n:0.25, execution time: 0.0559s
Histogram-base Outlier Detection (HBOS) ROC:0.7073, precision @ rank n:0.0, execution time: 0.005s




Isolation Forest ROC:0.7134, precision @ rank n:0.25, execution time: 0.3631s
K Nearest Neighbors (KNN) ROC:0.8384, precision @ rank n:0.25, execution time: 0.016s
Local Outlier Factor (LOF) ROC:0.7043, precision @ rank n:0.25, execution time: 0.005s
Minimum Covariance Determinant (MCD) ROC:0.8293, precision @ rank n:0.0, execution time: 0.0744s
One-class SVM (OCSVM) ROC:0.6585, precision @ rank n:0.25, execution time: 0.003s
Principal Component Analysis (PCA) ROC:0.686, precision @ rank n:0.25, execution time: 0.003s

... Processing ionosphere.mat ...




Angle-based Outlier Detector (ABOD) ROC:0.9181, precision @ rank n:0.8431, execution time: 0.1995s
Cluster-based Local Outlier Factor ROC:0.9176, precision @ rank n:0.8039, execution time: 0.0748s
Feature Bagging ROC:0.9303, precision @ rank n:0.8039, execution time: 0.1127s
Histogram-base Outlier Detection (HBOS) ROC:0.6052, precision @ rank n:0.3922, execution time: 0.0189s




Isolation Forest ROC:0.8516, precision @ rank n:0.6078, execution time: 0.5306s
K Nearest Neighbors (KNN) ROC:0.932, precision @ rank n:0.8824, execution time: 0.031s
Local Outlier Factor (LOF) ROC:0.9227, precision @ rank n:0.7843, execution time: 0.0129s
Minimum Covariance Determinant (MCD) ROC:0.9669, precision @ rank n:0.8627, execution time: 0.1077s
One-class SVM (OCSVM) ROC:0.8257, precision @ rank n:0.6863, execution time: 0.008s
Principal Component Analysis (PCA) ROC:0.7941, precision @ rank n:0.5686, execution time: 0.0137s

... Processing letter.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.8783, precision @ rank n:0.4375, execution time: 0.6643s
Cluster-based Local Outlier Factor ROC:0.7783, precision @ rank n:0.1875, execution time: 0.2673s
Feature Bagging ROC:0.8947, precision @ rank n:0.4062, execution time: 1.4162s
Histogram-base Outlier Detection (HBOS) ROC:0.6063, precision @ rank n:0.0938, execution time: 0.0279s




Isolation Forest ROC:0.6279, precision @ rank n:0.0625, execution time: 0.5875s
K Nearest Neighbors (KNN) ROC:0.8573, precision @ rank n:0.3125, execution time: 0.2493s
Local Outlier Factor (LOF) ROC:0.8765, precision @ rank n:0.3438, execution time: 0.1695s
Minimum Covariance Determinant (MCD) ROC:0.8061, precision @ rank n:0.1875, execution time: 1.9249s
One-class SVM (OCSVM) ROC:0.5927, precision @ rank n:0.125, execution time: 0.1626s
Principal Component Analysis (PCA) ROC:0.5216, precision @ rank n:0.125, execution time: 0.014s

... Processing lympho.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.9831, precision @ rank n:0.0, execution time: 0.0638s
Cluster-based Local Outlier Factor ROC:1.0, precision @ rank n:1.0, execution time: 0.0698s
Feature Bagging ROC:1.0, precision @ rank n:1.0, execution time: 0.0529s
Histogram-base Outlier Detection (HBOS) ROC:1.0, precision @ rank n:1.0, execution time: 0.008s




Isolation Forest ROC:1.0, precision @ rank n:1.0, execution time: 0.4s
K Nearest Neighbors (KNN) ROC:1.0, precision @ rank n:1.0, execution time: 0.012s
Local Outlier Factor (LOF) ROC:1.0, precision @ rank n:1.0, execution time: 0.004s
Minimum Covariance Determinant (MCD) ROC:1.0, precision @ rank n:1.0, execution time: 0.0648s
One-class SVM (OCSVM) ROC:1.0, precision @ rank n:1.0, execution time: 0.002s
Principal Component Analysis (PCA) ROC:1.0, precision @ rank n:1.0, execution time: 0.003s

... Processing mnist.mat ...




Angle-based Outlier Detector (ABOD) ROC:0.7628, precision @ rank n:0.3367, execution time: 12.9863s
Cluster-based Local Outlier Factor ROC:0.8389, precision @ rank n:0.3912, execution time: 2.0146s
Feature Bagging ROC:0.7157, precision @ rank n:0.3741, execution time: 88.7717s
Histogram-base Outlier Detection (HBOS) ROC:0.5766, precision @ rank n:0.1361, execution time: 0.1157s




Isolation Forest ROC:0.7915, precision @ rank n:0.2687, execution time: 5.0027s
K Nearest Neighbors (KNN) ROC:0.8498, precision @ rank n:0.432, execution time: 12.3602s
Local Outlier Factor (LOF) ROC:0.7195, precision @ rank n:0.3673, execution time: 12.2502s




Minimum Covariance Determinant (MCD) ROC:0.8713, precision @ rank n:0.2653, execution time: 5.1183s
One-class SVM (OCSVM) ROC:0.854, precision @ rank n:0.3946, execution time: 8.9112s
Principal Component Analysis (PCA) ROC:0.8534, precision @ rank n:0.3878, execution time: 0.2882s

... Processing musk.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.2161, precision @ rank n:0.1, execution time: 3.9844s
Cluster-based Local Outlier Factor ROC:1.0, precision @ rank n:1.0, execution time: 0.6941s
Feature Bagging ROC:0.473, precision @ rank n:0.125, execution time: 24.063s
Histogram-base Outlier Detection (HBOS) ROC:0.9999, precision @ rank n:0.975, execution time: 0.1057s




Isolation Forest ROC:1.0, precision @ rank n:1.0, execution time: 2.2859s
K Nearest Neighbors (KNN) ROC:0.8009, precision @ rank n:0.175, execution time: 3.377s
Local Outlier Factor (LOF) ROC:0.4629, precision @ rank n:0.125, execution time: 3.3291s
Minimum Covariance Determinant (MCD) ROC:1.0, precision @ rank n:1.0, execution time: 19.8684s
One-class SVM (OCSVM) ROC:1.0, precision @ rank n:1.0, execution time: 2.4903s
Principal Component Analysis (PCA) ROC:1.0, precision @ rank n:1.0, execution time: 0.2822s

... Processing optdigits.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.4894, precision @ rank n:0.0152, execution time: 4.6606s
Cluster-based Local Outlier Factor ROC:0.7901, precision @ rank n:0.0, execution time: 0.9823s
Feature Bagging ROC:0.5062, precision @ rank n:0.0303, execution time: 23.5995s
Histogram-base Outlier Detection (HBOS) ROC:0.8774, precision @ rank n:0.2121, execution time: 0.1037s




Isolation Forest ROC:0.686, precision @ rank n:0.0303, execution time: 1.8161s
K Nearest Neighbors (KNN) ROC:0.406, precision @ rank n:0.0, execution time: 3.376s
Local Outlier Factor (LOF) ROC:0.5277, precision @ rank n:0.0303, execution time: 3.0678s




Minimum Covariance Determinant (MCD) ROC:0.3822, precision @ rank n:0.0, execution time: 2.2789s
One-class SVM (OCSVM) ROC:0.5171, precision @ rank n:0.0, execution time: 2.6819s
Principal Component Analysis (PCA) ROC:0.526, precision @ rank n:0.0, execution time: 0.0997s

... Processing pendigits.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.667, precision @ rank n:0.0526, execution time: 2.5801s
Cluster-based Local Outlier Factor ROC:0.8082, precision @ rank n:0.1579, execution time: 0.4687s
Feature Bagging ROC:0.4889, precision @ rank n:0.0526, execution time: 7.7991s
Histogram-base Outlier Detection (HBOS) ROC:0.9348, precision @ rank n:0.2632, execution time: 0.018s




Isolation Forest ROC:0.939, precision @ rank n:0.3333, execution time: 1.1489s
K Nearest Neighbors (KNN) ROC:0.7371, precision @ rank n:0.0702, execution time: 1.153s
Local Outlier Factor (LOF) ROC:0.4965, precision @ rank n:0.0702, execution time: 1.094s
Minimum Covariance Determinant (MCD) ROC:0.8204, precision @ rank n:0.0877, execution time: 3.7081s
One-class SVM (OCSVM) ROC:0.9235, precision @ rank n:0.3158, execution time: 1.7424s
Principal Component Analysis (PCA) ROC:0.9309, precision @ rank n:0.3158, execution time: 0.014s

... Processing pima.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.7163, precision @ rank n:0.5253, execution time: 0.2554s
Cluster-based Local Outlier Factor ROC:0.67, precision @ rank n:0.4949, execution time: 0.1526s
Feature Bagging ROC:0.6448, precision @ rank n:0.4444, execution time: 0.1695s
Histogram-base Outlier Detection (HBOS) ROC:0.711, precision @ rank n:0.5354, execution time: 0.005s




Isolation Forest ROC:0.6829, precision @ rank n:0.5253, execution time: 0.4388s
K Nearest Neighbors (KNN) ROC:0.7395, precision @ rank n:0.5859, execution time: 0.0698s
Local Outlier Factor (LOF) ROC:0.6574, precision @ rank n:0.4646, execution time: 0.0209s
Minimum Covariance Determinant (MCD) ROC:0.7175, precision @ rank n:0.5152, execution time: 0.1167s
One-class SVM (OCSVM) ROC:0.6561, precision @ rank n:0.5051, execution time: 0.0209s
Principal Component Analysis (PCA) ROC:0.6762, precision @ rank n:0.5354, execution time: 0.003s

... Processing satellite.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.5653, precision @ rank n:0.3962, execution time: 3.2344s
Cluster-based Local Outlier Factor ROC:0.7241, precision @ rank n:0.5412, execution time: 0.7889s
Feature Bagging ROC:0.572, precision @ rank n:0.4, execution time: 14.5112s
Histogram-base Outlier Detection (HBOS) ROC:0.7486, precision @ rank n:0.57, execution time: 0.0369s




Isolation Forest ROC:0.6838, precision @ rank n:0.5812, execution time: 1.3454s
K Nearest Neighbors (KNN) ROC:0.6853, precision @ rank n:0.4988, execution time: 1.9198s
Local Outlier Factor (LOF) ROC:0.572, precision @ rank n:0.395, execution time: 1.8281s
Minimum Covariance Determinant (MCD) ROC:0.8055, precision @ rank n:0.6762, execution time: 4.2177s
One-class SVM (OCSVM) ROC:0.6478, precision @ rank n:0.5225, execution time: 2.6469s
Principal Component Analysis (PCA) ROC:0.5923, precision @ rank n:0.465, execution time: 0.0459s

... Processing satimage-2.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.8432, precision @ rank n:0.2333, execution time: 2.9262s
Cluster-based Local Outlier Factor ROC:0.9998, precision @ rank n:0.9333, execution time: 0.6034s
Feature Bagging ROC:0.5235, precision @ rank n:0.1667, execution time: 11.6255s
Histogram-base Outlier Detection (HBOS) ROC:0.9784, precision @ rank n:0.6, execution time: 0.0309s




Isolation Forest ROC:0.9955, precision @ rank n:0.8667, execution time: 1.1968s
K Nearest Neighbors (KNN) ROC:0.9515, precision @ rank n:0.4333, execution time: 1.6925s
Local Outlier Factor (LOF) ROC:0.5257, precision @ rank n:0.1667, execution time: 1.5349s
Minimum Covariance Determinant (MCD) ROC:0.9963, precision @ rank n:0.6667, execution time: 3.746s
One-class SVM (OCSVM) ROC:0.9997, precision @ rank n:0.9, execution time: 2.1562s
Principal Component Analysis (PCA) ROC:0.9816, precision @ rank n:0.7333, execution time: 0.0359s

... Processing shuttle.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.6171, precision @ rank n:0.2003, execution time: 27.3977s
Cluster-based Local Outlier Factor ROC:0.6273, precision @ rank n:0.2025, execution time: 1.8979s
Feature Bagging ROC:0.4725, precision @ rank n:0.0257, execution time: 135.0771s
Histogram-base Outlier Detection (HBOS) ROC:0.9871, precision @ rank n:0.9985, execution time: 0.0339s




Isolation Forest ROC:0.9976, precision @ rank n:0.9501, execution time: 6.0599s
K Nearest Neighbors (KNN) ROC:0.6507, precision @ rank n:0.212, execution time: 15.1485s
Local Outlier Factor (LOF) ROC:0.5556, precision @ rank n:0.1548, execution time: 20.1641s






Minimum Covariance Determinant (MCD) ROC:0.9899, precision @ rank n:0.7395, execution time: 20.3215s
One-class SVM (OCSVM) ROC:0.9934, precision @ rank n:0.956, execution time: 84.6538s
Principal Component Analysis (PCA) ROC:0.9915, precision @ rank n:0.9516, execution time: 0.0758s

... Processing shuttle.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.6091, precision @ rank n:0.1857, execution time: 25.2574s
Cluster-based Local Outlier Factor ROC:0.6459, precision @ rank n:0.2221, execution time: 1.7632s
Feature Bagging ROC:0.5379, precision @ rank n:0.1107, execution time: 82.0441s
Histogram-base Outlier Detection (HBOS) ROC:0.9869, precision @ rank n:0.9304, execution time: 0.0359s




Isolation Forest ROC:0.9964, precision @ rank n:0.955, execution time: 6.2782s
K Nearest Neighbors (KNN) ROC:0.6482, precision @ rank n:0.2073, execution time: 14.9779s
Local Outlier Factor (LOF) ROC:0.536, precision @ rank n:0.145, execution time: 19.8429s






Minimum Covariance Determinant (MCD) ROC:0.9896, precision @ rank n:0.7471, execution time: 18.1933s
One-class SVM (OCSVM) ROC:0.991, precision @ rank n:0.9536, execution time: 83.7268s
Principal Component Analysis (PCA) ROC:0.9888, precision @ rank n:0.9486, execution time: 0.0608s

... Processing vertebral.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.3056, precision @ rank n:0.0833, execution time: 0.0768s
Cluster-based Local Outlier Factor ROC:0.3671, precision @ rank n:0.0833, execution time: 0.0788s
Feature Bagging ROC:0.378, precision @ rank n:0.0833, execution time: 0.0549s
Histogram-base Outlier Detection (HBOS) ROC:0.2932, precision @ rank n:0.0, execution time: 0.0037s




Isolation Forest ROC:0.3056, precision @ rank n:0.0833, execution time: 0.3641s
K Nearest Neighbors (KNN) ROC:0.3304, precision @ rank n:0.0, execution time: 0.017s
Local Outlier Factor (LOF) ROC:0.3591, precision @ rank n:0.0833, execution time: 0.006s
Minimum Covariance Determinant (MCD) ROC:0.3343, precision @ rank n:0.0, execution time: 0.2336s
One-class SVM (OCSVM) ROC:0.3829, precision @ rank n:0.0833, execution time: 0.004s
Principal Component Analysis (PCA) ROC:0.3284, precision @ rank n:0.0, execution time: 0.005s

... Processing vowels.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.9639, precision @ rank n:0.5714, execution time: 0.4828s
Cluster-based Local Outlier Factor ROC:0.9076, precision @ rank n:0.1429, execution time: 0.1426s
Feature Bagging ROC:0.9392, precision @ rank n:0.3333, execution time: 0.5475s
Histogram-base Outlier Detection (HBOS) ROC:0.6965, precision @ rank n:0.0476, execution time: 0.008s




Isolation Forest ROC:0.7605, precision @ rank n:0.0952, execution time: 0.5106s
K Nearest Neighbors (KNN) ROC:0.9748, precision @ rank n:0.4286, execution time: 0.1307s
Local Outlier Factor (LOF) ROC:0.9389, precision @ rank n:0.3333, execution time: 0.0618s
Minimum Covariance Determinant (MCD) ROC:0.8892, precision @ rank n:0.2857, execution time: 1.4062s
One-class SVM (OCSVM) ROC:0.795, precision @ rank n:0.1905, execution time: 0.0638s
Principal Component Analysis (PCA) ROC:0.6296, precision @ rank n:0.0952, execution time: 0.004s

... Processing wbc.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.9271, precision @ rank n:0.125, execution time: 0.1407s
Cluster-based Local Outlier Factor ROC:0.9497, precision @ rank n:0.375, execution time: 0.1097s
Feature Bagging ROC:0.9635, precision @ rank n:0.5, execution time: 0.1336s
Histogram-base Outlier Detection (HBOS) ROC:0.974, precision @ rank n:0.625, execution time: 0.0159s




Isolation Forest ROC:0.9627, precision @ rank n:0.5, execution time: 0.401s
K Nearest Neighbors (KNN) ROC:0.9514, precision @ rank n:0.375, execution time: 0.0319s
Local Outlier Factor (LOF) ROC:0.9549, precision @ rank n:0.375, execution time: 0.014s
Minimum Covariance Determinant (MCD) ROC:0.9071, precision @ rank n:0.375, execution time: 0.1107s
One-class SVM (OCSVM) ROC:0.9531, precision @ rank n:0.375, execution time: 0.009s
Principal Component Analysis (PCA) ROC:0.9505, precision @ rank n:0.375, execution time: 0.004s


In [19]:
roc_df

Unnamed: 0,Data,#Sample,#Dimensions,Outlier Perc,PCA,MCD,OCSVM,LOF,CBLOF,KNN,HBOS,ABOD,IFOREST,FEATUREBAGGING
0,arrhythmia,452,274,14.6018,0.7687,0.7789,0.7796,0.8511,0.8637,0.782,0.7787,0.8228,0.7986,0.7997
0,cardio,1831,21,9.6122,0.5892,0.8845,0.6385,0.8373,0.951,0.734,0.588,0.811,0.9478,0.9616
0,glass,214,9,4.2056,0.6951,0.811,0.7073,0.7073,0.7134,0.8384,0.7043,0.8293,0.6585,0.686
0,ionosphere,351,33,35.8974,0.9181,0.9176,0.9303,0.6052,0.8516,0.932,0.9227,0.9669,0.8257,0.7941
0,letter,1600,32,6.25,0.8783,0.7783,0.8947,0.6063,0.6279,0.8573,0.8765,0.8061,0.5927,0.5216
0,lympho,148,18,4.0541,0.9831,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
0,mnist,7603,100,9.2069,0.7628,0.8389,0.7157,0.5766,0.7915,0.8498,0.7195,0.8713,0.854,0.8534
0,musk,3062,166,3.1679,0.2161,1.0,0.473,0.9999,1.0,0.8009,0.4629,1.0,1.0,1.0
0,optdigits,5216,64,2.8758,0.4894,0.7901,0.5062,0.8774,0.686,0.406,0.5277,0.3822,0.5171,0.526
0,pendigits,6870,16,2.2707,0.667,0.8082,0.4889,0.9348,0.939,0.7371,0.4965,0.8204,0.9235,0.9309


In [20]:
roc_list

['wbc',
 378,
 30,
 5.5556,
 0.9271,
 0.9497,
 0.9635,
 0.974,
 0.9627,
 0.9514,
 0.9549,
 0.9071,
 0.9531,
 0.9505]

In [21]:
mat_file[:-4]

'wbc'

In [22]:
 X.shape[0]

378

In [23]:
 X.shape[1]

30

In [24]:
X.shape

(378, 30)

In [25]:
mat_file[:]

'wbc.mat'

In [26]:
outliers_percentage

5.5556

In [31]:
temp_df

Unnamed: 0,Data,#Sample,#Dimensions,Outlier Perc,PCA,MCD,OCSVM,LOF,CBLOF,KNN,HBOS,ABOD,IFOREST,FEATUREBAGGING
0,wbc,378,30,5.5556,0.125,0.375,0.5,0.625,0.5,0.375,0.375,0.375,0.375,0.375


In [28]:
prn_df

Unnamed: 0,Data,#Sample,#Dimensions,Outlier Perc,PCA,MCD,OCSVM,LOF,CBLOF,KNN,HBOS,ABOD,IFOREST,FEATUREBAGGING
0,arrhythmia,452,274,14.6018,0.3571,0.4643,0.4643,0.5714,0.6071,0.5,0.4643,0.4286,0.5,0.5
0,cardio,1831,21,9.6122,0.1918,0.4932,0.1781,0.4521,0.6027,0.3562,0.1507,0.4658,0.5342,0.6849
0,glass,214,9,4.2056,0.25,0.25,0.25,0.0,0.25,0.25,0.25,0.0,0.25,0.25
0,ionosphere,351,33,35.8974,0.8431,0.8039,0.8039,0.3922,0.6078,0.8824,0.7843,0.8627,0.6863,0.5686
0,letter,1600,32,6.25,0.4375,0.1875,0.4062,0.0938,0.0625,0.3125,0.3438,0.1875,0.125,0.125
0,lympho,148,18,4.0541,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
0,mnist,7603,100,9.2069,0.3367,0.3912,0.3741,0.1361,0.2687,0.432,0.3673,0.2653,0.3946,0.3878
0,musk,3062,166,3.1679,0.1,1.0,0.125,0.975,1.0,0.175,0.125,1.0,1.0,1.0
0,optdigits,5216,64,2.8758,0.0152,0.0,0.0303,0.2121,0.0303,0.0,0.0303,0.0,0.0,0.0
0,pendigits,6870,16,2.2707,0.0526,0.1579,0.0526,0.2632,0.3333,0.0702,0.0702,0.0877,0.3158,0.3158


In [32]:
roc

0.9505

In [33]:
prn

0.375

In [34]:
time_df

Unnamed: 0,Data,#Sample,#Dimensions,Outlier Perc,PCA,MCD,OCSVM,LOF,CBLOF,KNN,HBOS,ABOD,IFOREST,FEATUREBAGGING
0,arrhythmia,452,274,14.6018,3.501,2.1392,0.7181,1.9139,0.5963,0.1048,0.0888,1.2725,0.0738,0.1353
0,cardio,1831,21,9.6122,0.4638,0.1376,1.0283,0.008,0.376,0.1795,0.1217,0.7422,0.0967,0.0075
0,glass,214,9,4.2056,0.0698,0.0898,0.0559,0.005,0.3631,0.016,0.005,0.0744,0.003,0.003
0,ionosphere,351,33,35.8974,0.1995,0.0748,0.1127,0.0189,0.5306,0.031,0.0129,0.1077,0.008,0.0137
0,letter,1600,32,6.25,0.6643,0.2673,1.4162,0.0279,0.5875,0.2493,0.1695,1.9249,0.1626,0.014
0,lympho,148,18,4.0541,0.0638,0.0698,0.0529,0.008,0.4,0.012,0.004,0.0648,0.002,0.003
0,mnist,7603,100,9.2069,12.9863,2.0146,88.7717,0.1157,5.0027,12.3602,12.2502,5.1183,8.9112,0.2882
0,musk,3062,166,3.1679,3.9844,0.6941,24.063,0.1057,2.2859,3.377,3.3291,19.8684,2.4903,0.2822
0,optdigits,5216,64,2.8758,4.6606,0.9823,23.5995,0.1037,1.8161,3.376,3.0678,2.2789,2.6819,0.0997
0,pendigits,6870,16,2.2707,2.5801,0.4687,7.7991,0.018,1.1489,1.153,1.094,3.7081,1.7424,0.014


In [None]:
roc_df=pd.DataFrame(columns=df_columns)
prn_df=pd.DataFrame(columns=df_columns)
time_df=pd.DataFrame(columns=df_columns)

In [None]:
from time import time
random_state = np.random.RandomState(42)


print("\n... Processing", "cardio", '...')
mat = loadmat(os.path.join('cardio.mat'))

X = mat['X']
y = mat['y'].ravel()
outliers_fraction = np.count_nonzero(y) / len(y)
outliers_percentage = round(outliers_fraction * 100, ndigits=4)

   # construct containers for saving results
roc_list = [mat_file[:-4], X.shape[0], X.shape[1], outliers_percentage]
prn_list = [mat_file[:-4], X.shape[0], X.shape[1], outliers_percentage]
time_list = [mat_file[:-4], X.shape[0], X.shape[1], outliers_percentage]

   # 60% data for training and 40% for testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4,
                                                       random_state=random_state)

   # standardizing data for processing
X_train_norm, X_test_norm = standardizer(X_train, X_test)

classifiers = {'Angle-based Outlier Detector (ABOD)': ABOD(
       contamination=outliers_fraction),
       'Cluster-based Local Outlier Factor': CBLOF(
           contamination=outliers_fraction, check_estimator=False,
           random_state=random_state),
       'Feature Bagging': FeatureBagging(contamination=outliers_fraction,
                                         random_state=random_state),
       'Histogram-base Outlier Detection (HBOS)': HBOS(
           contamination=outliers_fraction),
       'Isolation Forest': IForest(contamination=outliers_fraction,
                                   random_state=random_state),
       'K Nearest Neighbors (KNN)': KNN(contamination=outliers_fraction),
       'Local Outlier Factor (LOF)': LOF(
           contamination=outliers_fraction),
       'Minimum Covariance Determinant (MCD)': MCD(
           contamination=outliers_fraction, random_state=random_state),
       'One-class SVM (OCSVM)': OCSVM(contamination=outliers_fraction),
       'Principal Component Analysis (PCA)': PCA(
           contamination=outliers_fraction, random_state=random_state),
   }

   for clf_name, clf in classifiers.items():
       t0 = time()
       clf.fit(X_train_norm)
       test_scores = clf.decision_function(X_test_norm)
       t1 = time()
       duration = round(t1 - t0, ndigits=4)
       time_list.append(duration)

       roc = round(roc_auc_score(y_test, test_scores), ndigits=4)
       prn = round(precision_n_scores(y_test, test_scores), ndigits=4)

       print('{clf_name} ROC:{roc}, precision @ rank n:{prn}, '
             'execution time: {duration}s'.format(
           clf_name=clf_name, roc=roc, prn=prn, duration=duration))

       roc_list.append(roc)
       prn_list.append(prn)

temp_df = pd.DataFrame(time_list).transpose()
temp_df.columns = df_columns
time_df = pd.concat([time_df, temp_df], axis=0)

temp_df = pd.DataFrame(roc_list).transpose()
temp_df.columns = df_columns
roc_df = pd.concat([roc_df, temp_df], axis=0)

temp_df = pd.DataFrame(prn_list).transpose()
temp_df.columns = df_columns
prn_df = pd.concat([prn_df, temp_df], axis=0)
