In [1]:
import os
import sys
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from scipy.io import loadmat 

Import Pyod Packages and Methods

In [2]:
from pyod.models.pca import PCA
from pyod.models.mcd import MCD
from pyod.models.ocsvm import OCSVM
from pyod.models.lof import LOF
from pyod.models.cblof import CBLOF
from pyod.models.knn import KNN
from pyod.models.hbos import HBOS
from pyod.models.abod import ABOD
from pyod.models.iforest import IForest
from pyod.models.feature_bagging import FeatureBagging




Import Metrics Packages

In [3]:
from pyod.utils.utility import standardizer
from pyod.utils.utility import precision_n_scores
from sklearn.metrics import roc_auc_score

Define DataFile and read X & y

In [4]:
mat_file_list=['arrhythmia.mat','cardio.mat','glass.mat','ionosphere.mat','letter.mat','lympho.mat','mnist.mat','musk.mat', 'optdigits.mat', 'pendigits.mat', 'pima.mat', 
                 'satellite.mat', 'satimage-2.mat', 'shuttle.mat', 'vertebral.mat', 'vowels.mat', 'wbc.mat' ]     
mat_file_list

['arrhythmia.mat',
 'cardio.mat',
 'glass.mat',
 'ionosphere.mat',
 'letter.mat',
 'lympho.mat',
 'mnist.mat',
 'musk.mat',
 'optdigits.mat',
 'pendigits.mat',
 'pima.mat',
 'satellite.mat',
 'satimage-2.mat',
 'shuttle.mat',
 'vertebral.mat',
 'vowels.mat',
 'wbc.mat']

In [5]:
data=loadmat(r'C:\Users\sathv\LU\datasets\Anamoly_detec_data\cardio.mat')

In [6]:
data

{'__header__': b'MATLAB 5.0 MAT-file, written by Octave 3.8.0, 2014-12-18 10:48:09 UTC',
 '__version__': '1.0',
 '__globals__': [],
 'X': array([[ 0.00491231,  0.69319077, -0.20364049, ...,  0.23149795,
         -0.28978574, -0.49329397],
        [ 0.11072935, -0.07990259, -0.20364049, ...,  0.09356344,
         -0.25638541, -0.49329397],
        [ 0.21654639, -0.27244466, -0.20364049, ...,  0.02459619,
         -0.25638541,  1.14001753],
        ...,
        [-0.41835583, -0.91998844, -0.16463485, ..., -1.49268341,
          0.24461959, -0.49329397],
        [-0.41835583, -0.91998844, -0.15093411, ..., -1.42371616,
          0.14441859, -0.49329397],
        [-0.41835583, -0.91998844, -0.20364049, ..., -1.28578165,
          3.58465295, -0.49329397]]),
 'y': array([[0.],
        [0.],
        [0.],
        ...,
        [1.],
        [1.],
        [1.]])}

In [7]:
len(data)

5

In [8]:
data.keys()

dict_keys(['__header__', '__version__', '__globals__', 'X', 'y'])

In [9]:
data.values()

dict_values([b'MATLAB 5.0 MAT-file, written by Octave 3.8.0, 2014-12-18 10:48:09 UTC', '1.0', [], array([[ 0.00491231,  0.69319077, -0.20364049, ...,  0.23149795,
        -0.28978574, -0.49329397],
       [ 0.11072935, -0.07990259, -0.20364049, ...,  0.09356344,
        -0.25638541, -0.49329397],
       [ 0.21654639, -0.27244466, -0.20364049, ...,  0.02459619,
        -0.25638541,  1.14001753],
       ...,
       [-0.41835583, -0.91998844, -0.16463485, ..., -1.49268341,
         0.24461959, -0.49329397],
       [-0.41835583, -0.91998844, -0.15093411, ..., -1.42371616,
         0.14441859, -0.49329397],
       [-0.41835583, -0.91998844, -0.20364049, ..., -1.28578165,
         3.58465295, -0.49329397]]), array([[0.],
       [0.],
       [0.],
       ...,
       [1.],
       [1.],
       [1.]])])

Input(Independent) Feature Shape in Mat File Format

In [10]:
type(data['X']),data['X'].shape

(numpy.ndarray, (1831, 21))

Dependent/Target/Output Feature shape

In [11]:
type(data['y']),data['y'].shape

(numpy.ndarray, (1831, 1))

In [12]:
df_columns = ['Data','#Samples','# Dimensions','Outlier Perc','ABOD','CBLOF','FB','HBOS','IForest','KNN','LOF','MCD','OCSVM','PCA']

Roc DataFrame

In [13]:
roc_df=pd.DataFrame(columns=df_columns)
roc_df

Unnamed: 0,Data,#Samples,# Dimensions,Outlier Perc,ABOD,CBLOF,FB,HBOS,IForest,KNN,LOF,MCD,OCSVM,PCA


Precision DataFrame

In [14]:
prn_df=pd.DataFrame(columns=df_columns)
prn_df

Unnamed: 0,Data,#Samples,# Dimensions,Outlier Perc,ABOD,CBLOF,FB,HBOS,IForest,KNN,LOF,MCD,OCSVM,PCA


Time DataFrame

In [15]:
time_df=pd.DataFrame(columns=df_columns)
time_df

Unnamed: 0,Data,#Samples,# Dimensions,Outlier Perc,ABOD,CBLOF,FB,HBOS,IForest,KNN,LOF,MCD,OCSVM,PCA


In [16]:
matfiles=r"C:\Users\sathv\LU\datasets\Anamoly_detec_data"

# Exploring MAT Files

In [19]:
from time import time
random_state = np.random.RandomState(42)

for mat_file in mat_file_list:
   print("\n... Processing", mat_file, '...')
   mat = loadmat(os.path.join(r"C:\Users\sathv\LU\datasets\Anamoly_detec_data", mat_file))

   X = mat['X']
   y = mat['y'].ravel()
   outliers_fraction = np.count_nonzero(y) / len(y)
   outliers_percentage = round(outliers_fraction * 100, ndigits=4)

   # construct containers for saving results
   roc_list = [mat_file[:-4], X.shape[0], X.shape[1], outliers_percentage]
   prn_list = [mat_file[:-4], X.shape[0], X.shape[1], outliers_percentage]
   time_list = [mat_file[:-4], X.shape[0], X.shape[1], outliers_percentage]

   # 60% data for training and 40% for testing
   X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4,
                                                       random_state=random_state)

   # standardizing data for processing
   X_train_norm, X_test_norm = standardizer(X_train, X_test)

   classifiers = {'Angle-based Outlier Detector (ABOD)': ABOD(
       contamination=outliers_fraction),
       'Cluster-based Local Outlier Factor': CBLOF(
           contamination=outliers_fraction, check_estimator=False,
           random_state=random_state),
       'Feature Bagging': FeatureBagging(contamination=outliers_fraction,
                                         random_state=random_state),
       'Histogram-base Outlier Detection (HBOS)': HBOS(
           contamination=outliers_fraction),
       'Isolation Forest': IForest(contamination=outliers_fraction,
                                   random_state=random_state),
       'K Nearest Neighbors (KNN)': KNN(contamination=outliers_fraction),
       'Local Outlier Factor (LOF)': LOF(
           contamination=outliers_fraction),
       'Minimum Covariance Determinant (MCD)': MCD(
           contamination=outliers_fraction, random_state=random_state),
       'One-class SVM (OCSVM)': OCSVM(contamination=outliers_fraction),
       'Principal Component Analysis (PCA)': PCA(
           contamination=outliers_fraction, random_state=random_state),
   }

   for clf_name, clf in classifiers.items():
       t0 = time()
       clf.fit(X_train_norm)
       test_scores = clf.decision_function(X_test_norm)
       t1 = time()
       duration = round(t1 - t0, ndigits=4)
       time_list.append(duration)

       roc = round(roc_auc_score(y_test, test_scores), ndigits=4)
       prn = round(precision_n_scores(y_test, test_scores), ndigits=4)

       print('{clf_name} ROC:{roc}, precision @ rank n:{prn}, '
             'execution time: {duration}s'.format(
           clf_name=clf_name, roc=roc, prn=prn, duration=duration))

       roc_list.append(roc)
       prn_list.append(prn)

   temp_df = pd.DataFrame(time_list).transpose()
   temp_df.columns = df_columns
   time_df = pd.concat([time_df, temp_df], axis=0)

   temp_df = pd.DataFrame(roc_list).transpose()
   temp_df.columns = df_columns
   roc_df = pd.concat([roc_df, temp_df], axis=0)

   temp_df = pd.DataFrame(prn_list).transpose()
   temp_df.columns = df_columns
   prn_df = pd.concat([prn_df, temp_df], axis=0)


... Processing arrhythmia.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.7687, precision @ rank n:0.3571, execution time: 0.1197s




Cluster-based Local Outlier Factor ROC:0.7684, precision @ rank n:0.4643, execution time: 0.0598s
Feature Bagging ROC:0.7799, precision @ rank n:0.5, execution time: 0.4857s
Histogram-base Outlier Detection (HBOS) ROC:0.8511, precision @ rank n:0.5714, execution time: 0.0499s
Isolation Forest ROC:0.8478, precision @ rank n:0.5357, execution time: 0.3271s
K Nearest Neighbors (KNN) ROC:0.782, precision @ rank n:0.5, execution time: 0.0718s
Local Outlier Factor (LOF) ROC:0.7787, precision @ rank n:0.4643, execution time: 0.0618s




Minimum Covariance Determinant (MCD) ROC:0.8228, precision @ rank n:0.4286, execution time: 0.4926s
One-class SVM (OCSVM) ROC:0.7986, precision @ rank n:0.5, execution time: 0.0369s
Principal Component Analysis (PCA) ROC:0.7997, precision @ rank n:0.5, execution time: 0.0409s

... Processing cardio.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.5763, precision @ rank n:0.1875, execution time: 0.341s
Cluster-based Local Outlier Factor ROC:0.8221, precision @ rank n:0.4844, execution time: 0.1032s




Feature Bagging ROC:0.4879, precision @ rank n:0.1406, execution time: 0.6712s
Histogram-base Outlier Detection (HBOS) ROC:0.8453, precision @ rank n:0.4688, execution time: 0.006s
Isolation Forest ROC:0.9316, precision @ rank n:0.4531, execution time: 0.392s
K Nearest Neighbors (KNN) ROC:0.6959, precision @ rank n:0.2812, execution time: 0.1233s
Local Outlier Factor (LOF) ROC:0.4715, precision @ rank n:0.125, execution time: 0.0846s




Minimum Covariance Determinant (MCD) ROC:0.8778, precision @ rank n:0.3906, execution time: 0.486s
One-class SVM (OCSVM) ROC:0.9507, precision @ rank n:0.5938, execution time: 0.0778s
Principal Component Analysis (PCA) ROC:0.9638, precision @ rank n:0.6875, execution time: 0.003s

... Processing glass.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.7104, precision @ rank n:0.25, execution time: 0.0369s
Cluster-based Local Outlier Factor ROC:0.8506, precision @ rank n:0.25, execution time: 0.0489s




Feature Bagging ROC:0.7043, precision @ rank n:0.25, execution time: 0.0359s
Histogram-base Outlier Detection (HBOS) ROC:0.6524, precision @ rank n:0.0, execution time: 0.003s
Isolation Forest ROC:0.7195, precision @ rank n:0.25, execution time: 0.2588s
K Nearest Neighbors (KNN) ROC:0.7805, precision @ rank n:0.25, execution time: 0.008s
Local Outlier Factor (LOF) ROC:0.7774, precision @ rank n:0.25, execution time: 0.003s
Minimum Covariance Determinant (MCD) ROC:0.7165, precision @ rank n:0.0, execution time: 0.0269s
One-class SVM (OCSVM) ROC:0.6189, precision @ rank n:0.25, execution time: 0.001s
Principal Component Analysis (PCA) ROC:0.622, precision @ rank n:0.25, execution time: 0.0009s

... Processing ionosphere.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.9004, precision @ rank n:0.8214, execution time: 0.0629s
Cluster-based Local Outlier Factor ROC:0.8952, precision @ rank n:0.8036, execution time: 0.0478s




Feature Bagging ROC:0.8933, precision @ rank n:0.75, execution time: 0.0818s
Histogram-base Outlier Detection (HBOS) ROC:0.5195, precision @ rank n:0.3393, execution time: 0.011s
Isolation Forest ROC:0.8294, precision @ rank n:0.6607, execution time: 0.3002s
K Nearest Neighbors (KNN) ROC:0.9134, precision @ rank n:0.8393, execution time: 0.015s
Local Outlier Factor (LOF) ROC:0.8989, precision @ rank n:0.75, execution time: 0.006s
Minimum Covariance Determinant (MCD) ROC:0.9399, precision @ rank n:0.8571, execution time: 0.0624s
One-class SVM (OCSVM) ROC:0.8372, precision @ rank n:0.7143, execution time: 0.006s
Principal Component Analysis (PCA) ROC:0.7971, precision @ rank n:0.5893, execution time: 0.003s

... Processing letter.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.8465, precision @ rank n:0.275, execution time: 0.3246s
Cluster-based Local Outlier Factor ROC:0.7423, precision @ rank n:0.175, execution time: 0.0947s




Feature Bagging ROC:0.866, precision @ rank n:0.4, execution time: 0.5993s
Histogram-base Outlier Detection (HBOS) ROC:0.5728, precision @ rank n:0.125, execution time: 0.008s
Isolation Forest ROC:0.5836, precision @ rank n:0.05, execution time: 0.3167s
K Nearest Neighbors (KNN) ROC:0.845, precision @ rank n:0.3, execution time: 0.1075s
Local Outlier Factor (LOF) ROC:0.8409, precision @ rank n:0.325, execution time: 0.0694s
Minimum Covariance Determinant (MCD) ROC:0.7499, precision @ rank n:0.075, execution time: 0.8455s
One-class SVM (OCSVM) ROC:0.5744, precision @ rank n:0.1, execution time: 0.0611s
Principal Component Analysis (PCA) ROC:0.48, precision @ rank n:0.05, execution time: 0.006s

... Processing lympho.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.9382, precision @ rank n:0.4, execution time: 0.0259s
Cluster-based Local Outlier Factor ROC:0.9709, precision @ rank n:0.6, execution time: 0.0319s
Feature Bagging ROC:0.9673, precision @ rank n:0.6, execution time: 0.0359s




Histogram-base Outlier Detection (HBOS) ROC:0.9964, precision @ rank n:0.8, execution time: 0.007s
Isolation Forest ROC:0.9855, precision @ rank n:0.6, execution time: 0.2569s
K Nearest Neighbors (KNN) ROC:0.9636, precision @ rank n:0.6, execution time: 0.0051s
Local Outlier Factor (LOF) ROC:0.9636, precision @ rank n:0.6, execution time: 0.002s
Minimum Covariance Determinant (MCD) ROC:0.9164, precision @ rank n:0.6, execution time: 0.028s
One-class SVM (OCSVM) ROC:0.9636, precision @ rank n:0.6, execution time: 0.001s
Principal Component Analysis (PCA) ROC:0.9818, precision @ rank n:0.8, execution time: 0.0019s

... Processing mnist.mat ...




Angle-based Outlier Detector (ABOD) ROC:0.7813, precision @ rank n:0.3562, execution time: 5.4881s




Cluster-based Local Outlier Factor ROC:0.8447, precision @ rank n:0.4007, execution time: 0.4498s
Feature Bagging ROC:0.7259, precision @ rank n:0.3664, execution time: 39.2462s
Histogram-base Outlier Detection (HBOS) ROC:0.5675, precision @ rank n:0.1199, execution time: 0.041s
Isolation Forest ROC:0.7813, precision @ rank n:0.3116, execution time: 1.5729s
K Nearest Neighbors (KNN) ROC:0.8409, precision @ rank n:0.4144, execution time: 4.997s
Local Outlier Factor (LOF) ROC:0.7085, precision @ rank n:0.339, execution time: 4.7772s




Minimum Covariance Determinant (MCD) ROC:0.863, precision @ rank n:0.3973, execution time: 2.138s
One-class SVM (OCSVM) ROC:0.8417, precision @ rank n:0.3801, execution time: 3.496s
Principal Component Analysis (PCA) ROC:0.8396, precision @ rank n:0.3767, execution time: 0.1157s

... Processing musk.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.0809, precision @ rank n:0.0333, execution time: 1.7369s
Cluster-based Local Outlier Factor ROC:1.0, precision @ rank n:1.0, execution time: 0.1398s




Feature Bagging ROC:0.5228, precision @ rank n:0.1667, execution time: 10.2535s
Histogram-base Outlier Detection (HBOS) ROC:0.9999, precision @ rank n:0.9667, execution time: 0.0449s
Isolation Forest ROC:0.9992, precision @ rank n:0.9, execution time: 0.9587s
K Nearest Neighbors (KNN) ROC:0.7348, precision @ rank n:0.2333, execution time: 1.3486s
Local Outlier Factor (LOF) ROC:0.5323, precision @ rank n:0.1333, execution time: 1.2889s
Minimum Covariance Determinant (MCD) ROC:1.0, precision @ rank n:0.9667, execution time: 8.9795s
One-class SVM (OCSVM) ROC:1.0, precision @ rank n:1.0, execution time: 0.9479s
Principal Component Analysis (PCA) ROC:1.0, precision @ rank n:1.0, execution time: 0.1098s

... Processing optdigits.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.4428, precision @ rank n:0.0161, execution time: 1.9774s




Cluster-based Local Outlier Factor ROC:0.7852, precision @ rank n:0.0, execution time: 0.2585s
Feature Bagging ROC:0.4641, precision @ rank n:0.0484, execution time: 10.7631s
Histogram-base Outlier Detection (HBOS) ROC:0.8822, precision @ rank n:0.2581, execution time: 0.023s
Isolation Forest ROC:0.5442, precision @ rank n:0.0161, execution time: 0.673s
K Nearest Neighbors (KNN) ROC:0.3824, precision @ rank n:0.0, execution time: 1.3489s
Local Outlier Factor (LOF) ROC:0.4584, precision @ rank n:0.0484, execution time: 1.2524s




Minimum Covariance Determinant (MCD) ROC:0.3486, precision @ rank n:0.0, execution time: 0.9018s
One-class SVM (OCSVM) ROC:0.4972, precision @ rank n:0.0, execution time: 1.114s
Principal Component Analysis (PCA) ROC:0.504, precision @ rank n:0.0, execution time: 0.037s

... Processing pendigits.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.7008, precision @ rank n:0.0308, execution time: 1.1752s
Cluster-based Local Outlier Factor ROC:0.9609, precision @ rank n:0.3077, execution time: 0.1845s




Feature Bagging ROC:0.4687, precision @ rank n:0.0462, execution time: 3.609s
Histogram-base Outlier Detection (HBOS) ROC:0.9294, precision @ rank n:0.2615, execution time: 0.008s
Isolation Forest ROC:0.9482, precision @ rank n:0.2615, execution time: 0.5201s
K Nearest Neighbors (KNN) ROC:0.7602, precision @ rank n:0.0462, execution time: 0.4752s
Local Outlier Factor (LOF) ROC:0.481, precision @ rank n:0.0462, execution time: 0.4374s
Minimum Covariance Determinant (MCD) ROC:0.8271, precision @ rank n:0.0615, execution time: 1.5094s
One-class SVM (OCSVM) ROC:0.93, precision @ rank n:0.2923, execution time: 0.7351s
Principal Component Analysis (PCA) ROC:0.9332, precision @ rank n:0.3385, execution time: 0.006s

... Processing pima.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.6757, precision @ rank n:0.5106, execution time: 0.1134s
Cluster-based Local Outlier Factor ROC:0.684, precision @ rank n:0.4681, execution time: 0.0648s




Feature Bagging ROC:0.6446, precision @ rank n:0.4468, execution time: 0.0739s
Histogram-base Outlier Detection (HBOS) ROC:0.7169, precision @ rank n:0.5213, execution time: 0.002s
Isolation Forest ROC:0.6777, precision @ rank n:0.4787, execution time: 0.2598s
K Nearest Neighbors (KNN) ROC:0.7252, precision @ rank n:0.5106, execution time: 0.0259s
Local Outlier Factor (LOF) ROC:0.6604, precision @ rank n:0.4787, execution time: 0.0079s
Minimum Covariance Determinant (MCD) ROC:0.7047, precision @ rank n:0.4787, execution time: 0.0394s
One-class SVM (OCSVM) ROC:0.6423, precision @ rank n:0.4574, execution time: 0.011s
Principal Component Analysis (PCA) ROC:0.6639, precision @ rank n:0.5, execution time: 0.002s

... Processing satellite.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.5681, precision @ rank n:0.3918, execution time: 1.4369s
Cluster-based Local Outlier Factor ROC:0.7234, precision @ rank n:0.5574, execution time: 0.1659s




Feature Bagging ROC:0.557, precision @ rank n:0.4051, execution time: 5.5872s
Histogram-base Outlier Detection (HBOS) ROC:0.7393, precision @ rank n:0.5466, execution time: 0.014s
Isolation Forest ROC:0.7094, precision @ rank n:0.578, execution time: 0.6047s
K Nearest Neighbors (KNN) ROC:0.6781, precision @ rank n:0.4994, execution time: 0.7943s
Local Outlier Factor (LOF) ROC:0.5551, precision @ rank n:0.4051, execution time: 0.735s
Minimum Covariance Determinant (MCD) ROC:0.792, precision @ rank n:0.6747, execution time: 1.5249s
One-class SVM (OCSVM) ROC:0.636, precision @ rank n:0.5224, execution time: 1.0042s
Principal Component Analysis (PCA) ROC:0.5783, precision @ rank n:0.4559, execution time: 0.0169s

... Processing satimage-2.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.86, precision @ rank n:0.2593, execution time: 1.2684s
Cluster-based Local Outlier Factor ROC:0.9987, precision @ rank n:0.8889, execution time: 0.1961s




Feature Bagging ROC:0.4971, precision @ rank n:0.0741, execution time: 4.7131s
Histogram-base Outlier Detection (HBOS) ROC:0.9837, precision @ rank n:0.5926, execution time: 0.014s
Isolation Forest ROC:0.9973, precision @ rank n:0.8889, execution time: 0.5321s
K Nearest Neighbors (KNN) ROC:0.9505, precision @ rank n:0.3704, execution time: 0.6519s
Local Outlier Factor (LOF) ROC:0.5006, precision @ rank n:0.0741, execution time: 0.567s
Minimum Covariance Determinant (MCD) ROC:0.9946, precision @ rank n:0.5185, execution time: 1.3789s
One-class SVM (OCSVM) ROC:0.9976, precision @ rank n:0.9259, execution time: 0.8521s
Principal Component Analysis (PCA) ROC:0.9841, precision @ rank n:0.8519, execution time: 0.0139s

... Processing shuttle.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.6186, precision @ rank n:0.1918, execution time: 11.5157s




Cluster-based Local Outlier Factor ROC:0.6286, precision @ rank n:0.2336, execution time: 0.4772s
Feature Bagging ROC:0.5211, precision @ rank n:0.111, execution time: 39.222s
Histogram-base Outlier Detection (HBOS) ROC:0.9851, precision @ rank n:0.9857, execution time: 0.0146s
Isolation Forest ROC:0.9972, precision @ rank n:0.9337, execution time: 2.492s
K Nearest Neighbors (KNN) ROC:0.645, precision @ rank n:0.2199, execution time: 6.5102s
Local Outlier Factor (LOF) ROC:0.5347, precision @ rank n:0.1406, execution time: 8.8355s






Minimum Covariance Determinant (MCD) ROC:0.9903, precision @ rank n:0.7534, execution time: 9.5238s
One-class SVM (OCSVM) ROC:0.9922, precision @ rank n:0.9553, execution time: 32.8611s
Principal Component Analysis (PCA) ROC:0.9902, precision @ rank n:0.9503, execution time: 0.0299s

... Processing vertebral.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.2797, precision @ rank n:0.0, execution time: 0.0399s
Cluster-based Local Outlier Factor ROC:0.3908, precision @ rank n:0.0, execution time: 0.0419s
Feature Bagging ROC:0.3027, precision @ rank n:0.0, execution time: 0.0309s




Histogram-base Outlier Detection (HBOS) ROC:0.2695, precision @ rank n:0.0, execution time: 0.001s
Isolation Forest ROC:0.3576, precision @ rank n:0.0, execution time: 0.2495s
K Nearest Neighbors (KNN) ROC:0.318, precision @ rank n:0.0, execution time: 0.008s
Local Outlier Factor (LOF) ROC:0.318, precision @ rank n:0.0, execution time: 0.002s
Minimum Covariance Determinant (MCD) ROC:0.3308, precision @ rank n:0.0, execution time: 0.0329s
One-class SVM (OCSVM) ROC:0.4087, precision @ rank n:0.0, execution time: 0.001s
Principal Component Analysis (PCA) ROC:0.3397, precision @ rank n:0.0, execution time: 0.001s

... Processing vowels.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.9521, precision @ rank n:0.4706, execution time: 0.2508s
Cluster-based Local Outlier Factor ROC:0.9278, precision @ rank n:0.4118, execution time: 0.0608s




Feature Bagging ROC:0.9385, precision @ rank n:0.3529, execution time: 0.2771s
Histogram-base Outlier Detection (HBOS) ROC:0.6758, precision @ rank n:0.1765, execution time: 0.004s
Isolation Forest ROC:0.7469, precision @ rank n:0.1176, execution time: 0.2937s
K Nearest Neighbors (KNN) ROC:0.9568, precision @ rank n:0.5294, execution time: 0.0593s
Local Outlier Factor (LOF) ROC:0.9345, precision @ rank n:0.4118, execution time: 0.0259s
Minimum Covariance Determinant (MCD) ROC:0.6779, precision @ rank n:0.0, execution time: 0.5807s
One-class SVM (OCSVM) ROC:0.7415, precision @ rank n:0.2941, execution time: 0.0369s
Principal Component Analysis (PCA) ROC:0.5787, precision @ rank n:0.1176, execution time: 0.003s

... Processing wbc.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.9232, precision @ rank n:0.3, execution time: 0.0798s
Cluster-based Local Outlier Factor ROC:0.9063, precision @ rank n:0.6, execution time: 0.0408s




Feature Bagging ROC:0.9415, precision @ rank n:0.5, execution time: 0.0838s
Histogram-base Outlier Detection (HBOS) ROC:0.9592, precision @ rank n:0.7, execution time: 0.0104s
Isolation Forest ROC:0.9451, precision @ rank n:0.5, execution time: 0.2725s
K Nearest Neighbors (KNN) ROC:0.9437, precision @ rank n:0.5, execution time: 0.015s
Local Outlier Factor (LOF) ROC:0.9352, precision @ rank n:0.4, execution time: 0.005s
Minimum Covariance Determinant (MCD) ROC:0.8986, precision @ rank n:0.4, execution time: 0.0588s
One-class SVM (OCSVM) ROC:0.9408, precision @ rank n:0.5, execution time: 0.007s
Principal Component Analysis (PCA) ROC:0.9324, precision @ rank n:0.6, execution time: 0.003s


In [20]:
roc_df

Unnamed: 0,Data,#Samples,# Dimensions,Outlier Perc,ABOD,CBLOF,FB,HBOS,IForest,KNN,LOF,MCD,OCSVM,PCA
0,arrhythmia,452,274,14.6018,0.7687,0.7684,0.7799,0.8511,0.8478,0.782,0.7787,0.8228,0.7986,0.7997
0,cardio,1831,21,9.6122,0.5763,0.8221,0.4879,0.8453,0.9316,0.6959,0.4715,0.8778,0.9507,0.9638
0,glass,214,9,4.2056,0.7104,0.8506,0.7043,0.6524,0.7195,0.7805,0.7774,0.7165,0.6189,0.622
0,ionosphere,351,33,35.8974,0.9004,0.8952,0.8933,0.5195,0.8294,0.9134,0.8989,0.9399,0.8372,0.7971
0,letter,1600,32,6.25,0.8465,0.7423,0.866,0.5728,0.5836,0.845,0.8409,0.7499,0.5744,0.48
0,lympho,148,18,4.0541,0.9382,0.9709,0.9673,0.9964,0.9855,0.9636,0.9636,0.9164,0.9636,0.9818
0,mnist,7603,100,9.2069,0.7813,0.8447,0.7259,0.5675,0.7813,0.8409,0.7085,0.863,0.8417,0.8396
0,musk,3062,166,3.1679,0.0809,1.0,0.5228,0.9999,0.9992,0.7348,0.5323,1.0,1.0,1.0
0,arrhythmia,452,274,14.6018,0.7687,0.7684,0.7799,0.8511,0.8478,0.782,0.7787,0.8228,0.7986,0.7997
0,cardio,1831,21,9.6122,0.5763,0.8221,0.4879,0.8453,0.9316,0.6959,0.4715,0.8778,0.9507,0.9638


In [21]:
prn_df

Unnamed: 0,Data,#Samples,# Dimensions,Outlier Perc,ABOD,CBLOF,FB,HBOS,IForest,KNN,LOF,MCD,OCSVM,PCA
0,arrhythmia,452,274,14.6018,0.3571,0.4643,0.5,0.5714,0.5357,0.5,0.4643,0.4286,0.5,0.5
0,cardio,1831,21,9.6122,0.1875,0.4844,0.1406,0.4688,0.4531,0.2812,0.125,0.3906,0.5938,0.6875
0,glass,214,9,4.2056,0.25,0.25,0.25,0.0,0.25,0.25,0.25,0.0,0.25,0.25
0,ionosphere,351,33,35.8974,0.8214,0.8036,0.75,0.3393,0.6607,0.8393,0.75,0.8571,0.7143,0.5893
0,letter,1600,32,6.25,0.275,0.175,0.4,0.125,0.05,0.3,0.325,0.075,0.1,0.05
0,lympho,148,18,4.0541,0.4,0.6,0.6,0.8,0.6,0.6,0.6,0.6,0.6,0.8
0,mnist,7603,100,9.2069,0.3562,0.4007,0.3664,0.1199,0.3116,0.4144,0.339,0.3973,0.3801,0.3767
0,musk,3062,166,3.1679,0.0333,1.0,0.1667,0.9667,0.9,0.2333,0.1333,0.9667,1.0,1.0
0,arrhythmia,452,274,14.6018,0.3571,0.4643,0.5,0.5714,0.5357,0.5,0.4643,0.4286,0.5,0.5
0,cardio,1831,21,9.6122,0.1875,0.4844,0.1406,0.4688,0.4531,0.2812,0.125,0.3906,0.5938,0.6875


In [22]:
time_df

Unnamed: 0,Data,#Samples,# Dimensions,Outlier Perc,ABOD,CBLOF,FB,HBOS,IForest,KNN,LOF,MCD,OCSVM,PCA
0,arrhythmia,452,274,14.6018,0.1237,0.0642,0.5003,0.0504,0.3408,0.0699,0.0598,0.4356,0.035,0.041
0,cardio,1831,21,9.6122,0.2991,0.1002,0.6423,0.005,0.31,0.1171,0.076,0.3576,0.055,0.007
0,glass,214,9,4.2056,0.0292,0.0312,0.0269,0.002,0.2443,0.0072,0.002,0.0299,0.001,0.001
0,ionosphere,351,33,35.8974,0.0499,0.0364,0.0668,0.011,0.2773,0.013,0.005,0.0529,0.006,0.003
0,letter,1600,32,6.25,0.3221,0.0728,0.5874,0.008,0.3451,0.1147,0.0738,0.8378,0.0648,0.006
0,lympho,148,18,4.0541,0.0339,0.0369,0.0309,0.007,0.2634,0.006,0.002,0.0269,0.001,0.002
0,mnist,7603,100,9.2069,5.7512,0.4278,39.5301,0.0468,1.7603,4.9953,4.771,1.8654,3.4817,0.1267
0,musk,3062,166,3.1679,1.7287,0.2004,10.2688,0.049,1.0071,1.3801,1.2728,8.3789,0.94,0.1097
0,arrhythmia,452,274,14.6018,0.1197,0.0598,0.4857,0.0499,0.3271,0.0718,0.0618,0.4926,0.0369,0.0409
0,cardio,1831,21,9.6122,0.341,0.1032,0.6712,0.006,0.392,0.1233,0.0846,0.486,0.0778,0.003
