# Benchmark of various outlier detection model thresholders

### The models are evaluated by ROC, Precision @ n and execution time on 17 benchmark datasets. All datasets are split (60% for training and 40% for testing). The full result by averaging 10 indepent trials can be found [here]

The thresholders covered in this example include:

1. **IQR: Inter-Quartile Region** 
2. **MAD: Median Absolute Deviation**
3. **FWFM: Full Width at Full Minimum**
4. **YJ: Yeo-Johnson Transformation**
5. **KMEANS: Kmeans Clustering**
6. **ZSCORE: Z Score**
7. **AUCP: Area Under the Curve Percentage**
8. **QMCD: Quasi-Monte Carlo Discreperancy**
9. **FGD: Fixed Gradient Descent**
10. **DSN: Distance Shift from Normal**
11. **CLF: Trained Classifier**
12. **FILTER: Filtering Based**
13. **WIND: Topological Winding Number**
14. **EB: Elliptical Boundary**
15. **REGR: Regression Intercept**
16. **BOOT: Bootstrap Method**
17. **MCST: Monte Carlo Statistical Tests**
18. **Histogram Based Methods**
19. **Mollifier**
20. **Chauvenet's Criterion**
21. **Generalized Extreme Studentized Deviate**
22. **Modified Thompson Tau Test**
23. **Mean Shift Clustering**

In [1]:
from __future__ import division
from __future__ import print_function

import os
import sys
from time import time

# temporary solution for relative imports in case pyod is not installed
# if pyod is installed, no need to use the following line
sys.path.append(
    os.path.abspath(os.path.join(os.path.dirname("__file__"), '..')))

import numpy as np
from numpy import percentile
import matplotlib.pyplot as plt
import matplotlib.font_manager
import pandas as pd
from sklearn.model_selection import train_test_split
from scipy.io import loadmat

from pyod.models.knn import KNN
from pyod.models.iforest import IForest

from pythresh.thresholds.iqr import IQR
from pythresh.thresholds.mad import MAD
from pythresh.thresholds.fwfm import FWFM
from pythresh.thresholds.yj import YJ
from pythresh.thresholds.kmeans import KMEANS
from pythresh.thresholds.zscore import ZSCORE
from pythresh.thresholds.aucp import AUCP
from pythresh.thresholds.qmcd import QMCD
from pythresh.thresholds.fgd import FGD
from pythresh.thresholds.dsn import DSN
from pythresh.thresholds.clf import CLF
from pythresh.thresholds.filter import FILTER
from pythresh.thresholds.wind import WIND
from pythresh.thresholds.eb import EB
from pythresh.thresholds.regr import REGR
from pythresh.thresholds.boot import BOOT
from pythresh.thresholds.mcst import MCST
from pythresh.thresholds.hist import HIST
from pythresh.thresholds.moll import MOLL
from pythresh.thresholds.chau import CHAU
from pythresh.thresholds.gesd import GESD
from pythresh.thresholds.mtt import MTT
from pythresh.thresholds.shift import SHIFT
from pythresh.thresholds.karch import KARCH

from pyod.utils.utility import standardizer
from pyod.utils.utility import precision_n_scores
from sklearn.metrics import roc_auc_score

INFO: Using numpy backend


In [3]:
# Define data file and read X and y
mat_file_list = ['arrhythmia.mat',
                 'cardio.mat',
                 'glass.mat',
                 'ionosphere.mat',
                 'letter.mat',
                 'lympho.mat',
                 'mnist.mat',
                 'musk.mat',
                 'optdigits.mat',
                 'pendigits.mat',
                 'pima.mat',
                 'satellite.mat',
                 'satimage-2.mat',
                 'vertebral.mat',
                 'vowels.mat',
                 'wbc.mat']

# Define nine outlier detection tools to be compared
random_state = np.random.RandomState(42)

df_columns = ['Data', '#Samples', '# Dimensions', 'Outlier Perc', 'IQR', 'MAD', 'FWFM', 
              'YJ', 'KMEANS', 'ZSCORE', 'AUCP', 'QMCD', 'FGD', 'DSN', 'CLF', 'FILTER', 'WIND', 
              'EB', 'REGR', 'BOOT', 'MCST', 'HIST', 'MOLL', 'CHAU', 'GESD', 'MTT', 'SHIFT', 'KARCH']
roc_df = pd.DataFrame(columns=df_columns)
prn_df = pd.DataFrame(columns=df_columns)
time_df = pd.DataFrame(columns=df_columns)

clf = IForest()


for mat_file in mat_file_list:
    print("\n... Processing", mat_file, '...')
    mat = loadmat(os.path.join('data', mat_file))

    X = mat['X']
    y = mat['y'].ravel()
    outliers_fraction = np.count_nonzero(y) / len(y)
    outliers_percentage = round(outliers_fraction * 100, ndigits=4)

    # construct containers for saving results
    roc_list = [mat_file[:-4], X.shape[0], X.shape[1], outliers_percentage]
    prn_list = [mat_file[:-4], X.shape[0], X.shape[1], outliers_percentage]
    time_list = [mat_file[:-4], X.shape[0], X.shape[1], outliers_percentage]

    # 60% data for training and 40% for testing
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4,
                                                        random_state=random_state)

    # standardizing data for processing
    X_train_norm, X_test_norm = standardizer(X_train, X_test)

    thresholders = {
        'Inter-Quartile Region (IQR)':IQR(),
        'Median Absolute Deviation (MAD)':MAD(),
        'Full Width at Full Minimum (FWFM)':FWFM(),
        'Yeo-Johnson Transformation (YJ)': YJ(),
        'Kmeans Clustering (KMEANS)': KMEANS(),
        'Z Score (ZSCORE)': ZSCORE(),
        'AUC Percentage (AUCP)': AUCP(),
        'Quasi-Monte Carlo Discreperancy (QMCD)': QMCD(),
        'Fixed Gradient Descent (FGD)': FGD(),
        'Distance Shift from Normal (DSN)': DSN(),
        'Trained Classifier (CLF)': CLF(),
        'Filtering Based (FILTER)': FILTER(),
        'Topological Winding Number (WIND)': WIND(),
        'Elliptical Boundary (EB)': EB(),
        'Regression Intercept (REGR)': REGR(),
        'Bootstrap Method (BOOT)': BOOT(),
        'Monte Carlo Statistical Tests (MCST)': MCST(),
        'Histogram Based Methods (HIST)': HIST(),
        'Mollifier (MOLL)': MOLL(),
        "Chauvenet's Criterion (CHAU)": CHAU(),
        'Generalized Extreme Studentized Deviate (GESD)': GESD(),
        'Modified Thompson Tau Test (MTT)': MTT(),
        'Mean Shift Clustering (SHIFT)': SHIFT(),
        'Karcher Mean (KARCH)': KARCH()
    }
    
    clf.fit(X_train_norm)
    scores = clf.decision_scores_
    
    for thres_name, thres in thresholders.items():
        t0 = time()
        pred = thres.eval(scores)
        contam = np.sum(pred)/len(pred)
        
        if contam<=0: contam=1e-3
        if contam>0.5: contam=0.5
        
        clf = IForest(contamination=contam)
        clf.fit(X_train_norm)
        test_scores = clf.decision_function(X_test_norm)
        t1 = time()
        duration = round(t1 - t0, ndigits=4)
        time_list.append(duration)

        roc = round(roc_auc_score(y_test, test_scores), ndigits=4)
        prn = round(precision_n_scores(y_test, test_scores), ndigits=4)

        print('{thres_name} ROC:{roc}, precision @ rank n:{prn}, '
              'execution time: {duration}s'.format(
        thres_name=thres_name, roc=roc, prn=prn, duration=duration))

        roc_list.append(roc)
        prn_list.append(prn)

    temp_df = pd.DataFrame(time_list).transpose()
    temp_df.columns = df_columns
    time_df = pd.concat([time_df, temp_df], axis=0)

    temp_df = pd.DataFrame(roc_list).transpose()
    temp_df.columns = df_columns
    roc_df = pd.concat([roc_df, temp_df], axis=0)

    temp_df = pd.DataFrame(prn_list).transpose()
    temp_df.columns = df_columns
    prn_df = pd.concat([prn_df, temp_df], axis=0)


... Processing arrhythmia.mat ...
Inter-Quartile Region (IQR) ROC:0.8175, precision @ rank n:0.5714, execution time: 0.8029s
Median Absolute Deviation (MAD) ROC:0.8228, precision @ rank n:0.4643, execution time: 0.8186s
Full Width at Full Minimum (FWFM) ROC:0.817, precision @ rank n:0.5714, execution time: 0.7547s
Yeo-Johnson Transformation (YJ) ROC:0.8564, precision @ rank n:0.6071, execution time: 1.1197s
Kmeans Clustering (KMEANS) ROC:0.8471, precision @ rank n:0.4643, execution time: 0.7626s
Z Score (ZSCORE) ROC:0.8448, precision @ rank n:0.5714, execution time: 0.7298s
AUC Percentage (AUCP) ROC:0.8483, precision @ rank n:0.5714, execution time: 0.759s
Quasi-Monte Carlo Discreperancy (QMCD) ROC:0.841, precision @ rank n:0.5714, execution time: 0.7257s
Fixed Gradient Descent (FGD) ROC:0.8429, precision @ rank n:0.5357, execution time: 0.7383s
Distance Shift from Normal (DSN) ROC:0.8597, precision @ rank n:0.5714, execution time: 0.8597s
Trained Classifier (CLF) ROC:0.8336, precisio

Histogram Based Methods (HIST) ROC:0.784, precision @ rank n:0.5435, execution time: 0.5561s
Mollifier (MOLL) ROC:0.7808, precision @ rank n:0.5217, execution time: 0.5688s
Chauvenet's Criterion (CHAU) ROC:0.7762, precision @ rank n:0.587, execution time: 0.5654s
Generalized Extreme Studentized Deviate (GESD) ROC:0.7881, precision @ rank n:0.5435, execution time: 0.6209s
Modified Thompson Tau Test (MTT) ROC:0.7792, precision @ rank n:0.587, execution time: 0.6133s
Mean Shift Clustering (SHIFT) ROC:0.7773, precision @ rank n:0.5435, execution time: 1.4104s
Karcher Mean (KARCH) ROC:0.7595, precision @ rank n:0.5, execution time: 0.559s

... Processing letter.mat ...
Inter-Quartile Region (IQR) ROC:0.6078, precision @ rank n:0.122, execution time: 0.7148s
Median Absolute Deviation (MAD) ROC:0.6239, precision @ rank n:0.0244, execution time: 0.7262s
Full Width at Full Minimum (FWFM) ROC:0.6165, precision @ rank n:0.0976, execution time: 0.8059s
Yeo-Johnson Transformation (YJ) ROC:0.6678, p

Filtering Based (FILTER) ROC:1.0, precision @ rank n:1.0, execution time: 1.6602s
Topological Winding Number (WIND) ROC:1.0, precision @ rank n:1.0, execution time: 2.1638s
Elliptical Boundary (EB) ROC:1.0, precision @ rank n:1.0, execution time: 2.3471s
Regression Intercept (REGR) ROC:0.9997, precision @ rank n:0.9512, execution time: 1.9308s
Bootstrap Method (BOOT) ROC:1.0, precision @ rank n:0.9756, execution time: 2.2521s
Monte Carlo Statistical Tests (MCST) ROC:1.0, precision @ rank n:1.0, execution time: 2.0391s
Histogram Based Methods (HIST) ROC:0.9998, precision @ rank n:0.9512, execution time: 1.6211s
Mollifier (MOLL) ROC:0.9998, precision @ rank n:0.9512, execution time: 1.5954s
Chauvenet's Criterion (CHAU) ROC:0.9985, precision @ rank n:0.9024, execution time: 1.5764s
Generalized Extreme Studentized Deviate (GESD) ROC:1.0, precision @ rank n:0.9756, execution time: 2.1543s
Modified Thompson Tau Test (MTT) ROC:1.0, precision @ rank n:1.0, execution time: 1.6618s
Mean Shift Cl

Kmeans Clustering (KMEANS) ROC:0.7263, precision @ rank n:0.5961, execution time: 1.3393s
Z Score (ZSCORE) ROC:0.6969, precision @ rank n:0.5628, execution time: 1.373s
AUC Percentage (AUCP) ROC:0.6821, precision @ rank n:0.5862, execution time: 2.894s
Quasi-Monte Carlo Discreperancy (QMCD) ROC:0.7271, precision @ rank n:0.6059, execution time: 1.4281s
Fixed Gradient Descent (FGD) ROC:0.6902, precision @ rank n:0.5554, execution time: 2.7034s
Distance Shift from Normal (DSN) ROC:0.6858, precision @ rank n:0.5628, execution time: 4.3827s
Trained Classifier (CLF) ROC:0.7053, precision @ rank n:0.5825, execution time: 1.3283s
Filtering Based (FILTER) ROC:0.7105, precision @ rank n:0.5874, execution time: 1.3409s
Topological Winding Number (WIND) ROC:0.6866, precision @ rank n:0.5567, execution time: 4.0085s
Elliptical Boundary (EB) ROC:0.6881, precision @ rank n:0.553, execution time: 3.2967s
Regression Intercept (REGR) ROC:0.7042, precision @ rank n:0.5542, execution time: 2.5812s
Bootst

Modified Thompson Tau Test (MTT) ROC:0.8037, precision @ rank n:0.2632, execution time: 0.7686s
Mean Shift Clustering (SHIFT) ROC:0.8136, precision @ rank n:0.3158, execution time: 12.7682s
Karcher Mean (KARCH) ROC:0.7881, precision @ rank n:0.2105, execution time: 0.8853s

... Processing wbc.mat ...
Inter-Quartile Region (IQR) ROC:0.9246, precision @ rank n:0.5556, execution time: 0.6501s
Median Absolute Deviation (MAD) ROC:0.9231, precision @ rank n:0.5556, execution time: 0.6314s
Full Width at Full Minimum (FWFM) ROC:0.913, precision @ rank n:0.5556, execution time: 0.6279s
Yeo-Johnson Transformation (YJ) ROC:0.9371, precision @ rank n:0.5556, execution time: 0.8238s
Kmeans Clustering (KMEANS) ROC:0.9301, precision @ rank n:0.5556, execution time: 0.6285s
Z Score (ZSCORE) ROC:0.9308, precision @ rank n:0.5556, execution time: 0.6127s
AUC Percentage (AUCP) ROC:0.9433, precision @ rank n:0.5556, execution time: 0.6506s
Quasi-Monte Carlo Discreperancy (QMCD) ROC:0.9091, precision @ ran

In [4]:
print('Time complexity')
time_df

Time complexity


Unnamed: 0,Data,#Samples,# Dimensions,Outlier Perc,IQR,MAD,FWFM,YJ,KMEANS,ZSCORE,...,REGR,BOOT,MCST,HIST,MOLL,CHAU,GESD,MTT,SHIFT,KARCH
0,arrhythmia,452,274,14.6018,0.8029,0.8186,0.7547,1.1197,0.7626,0.7298,...,0.7723,0.8926,0.8548,0.738,0.7444,0.7519,0.8498,0.7407,1.8985,0.7393
0,cardio,1831,21,9.6122,0.7034,0.7341,0.8101,1.355,0.7435,0.6992,...,0.9294,1.3687,0.8632,0.7655,0.7121,0.7231,1.0852,0.7495,7.5961,0.8762
0,glass,214,9,4.2056,0.5753,0.5432,0.5448,0.8493,0.5378,0.5369,...,0.5841,0.5563,0.5573,0.535,0.5405,0.5269,0.5777,0.5486,0.8851,0.537
0,ionosphere,351,33,35.8974,0.5683,0.5762,0.5861,0.8242,0.5865,0.5855,...,0.5855,0.6669,0.5782,0.5561,0.5688,0.5654,0.6209,0.6133,1.4104,0.559
0,letter,1600,32,6.25,0.7148,0.7262,0.8059,1.4674,0.732,0.7307,...,0.9921,1.0114,0.8938,0.7254,0.7379,1.0107,1.2034,0.7621,11.6471,0.8131
0,lympho,148,18,4.0541,0.5492,0.5945,0.647,0.7367,0.5364,0.5209,...,0.5482,0.576,0.5446,0.5691,0.6267,0.6076,0.5837,0.5496,0.991,0.5446
0,mnist,7603,100,9.2069,2.2465,2.3214,4.5107,5.4333,2.281,2.2618,...,3.7981,8.0063,4.6245,2.1663,2.2753,2.1395,3.5962,2.1912,47.728,3.0734
0,musk,3062,166,3.1679,1.568,1.512,1.8597,2.5021,1.5537,1.5633,...,1.9308,2.2521,2.0391,1.6211,1.5954,1.5764,2.1543,1.6618,22.7535,1.6892
0,optdigits,5216,64,2.8758,1.455,1.4558,2.4701,3.4967,1.5318,1.4845,...,2.4765,2.9729,2.8173,1.5682,1.5245,1.4757,2.4597,1.4945,41.1534,2.0215
0,pendigits,6870,16,2.2707,1.2763,1.2037,3.0527,4.2825,1.2186,1.2666,...,2.9318,4.4499,3.7945,1.3755,1.2136,1.1855,2.4925,1.2301,44.4325,1.9667


In [5]:
print('ROC Performance')
roc_df

ROC Performance


Unnamed: 0,Data,#Samples,# Dimensions,Outlier Perc,IQR,MAD,FWFM,YJ,KMEANS,ZSCORE,...,REGR,BOOT,MCST,HIST,MOLL,CHAU,GESD,MTT,SHIFT,KARCH
0,arrhythmia,452,274,14.6018,0.8175,0.8228,0.817,0.8564,0.8471,0.8448,...,0.8352,0.8667,0.8361,0.8527,0.8504,0.8266,0.8329,0.8331,0.8445,0.8396
0,cardio,1831,21,9.6122,0.9253,0.9435,0.9484,0.9311,0.9167,0.902,...,0.9047,0.9073,0.9406,0.9109,0.9039,0.9115,0.9237,0.9219,0.9083,0.9155
0,glass,214,9,4.2056,0.6494,0.6667,0.6667,0.6691,0.6593,0.6691,...,0.642,0.6543,0.6568,0.6321,0.6123,0.6296,0.6296,0.6469,0.6346,0.6296
0,ionosphere,351,33,35.8974,0.7895,0.7968,0.7805,0.7881,0.781,0.7812,...,0.7632,0.773,0.7929,0.784,0.7808,0.7762,0.7881,0.7792,0.7773,0.7595
0,letter,1600,32,6.25,0.6078,0.6239,0.6165,0.6678,0.6281,0.6466,...,0.614,0.5822,0.6379,0.5873,0.5936,0.61,0.6088,0.6046,0.6133,0.6012
0,lympho,148,18,4.0541,1.0,0.9942,1.0,1.0,1.0,0.9942,...,0.9942,1.0,1.0,1.0,0.9942,1.0,0.9942,0.9942,1.0,1.0
0,mnist,7603,100,9.2069,0.807,0.8009,0.8104,0.7979,0.7763,0.7866,...,0.7994,0.7252,0.8226,0.8152,0.7973,0.7775,0.7966,0.7894,0.7655,0.8259
0,musk,3062,166,3.1679,1.0,1.0,1.0,0.9999,1.0,1.0,...,0.9997,1.0,1.0,0.9998,0.9998,0.9985,1.0,1.0,1.0,1.0
0,optdigits,5216,64,2.8758,0.6982,0.6557,0.7943,0.6935,0.6841,0.6808,...,0.6124,0.7211,0.7167,0.7008,0.7687,0.7091,0.7916,0.7053,0.7067,0.727
0,pendigits,6870,16,2.2707,0.9336,0.9071,0.922,0.9326,0.9478,0.9479,...,0.937,0.9389,0.9348,0.934,0.9421,0.9449,0.9551,0.9331,0.9593,0.9431


In [6]:
print('Precision @ n Performance')
prn_df

Precision @ n Performance


Unnamed: 0,Data,#Samples,# Dimensions,Outlier Perc,IQR,MAD,FWFM,YJ,KMEANS,ZSCORE,...,REGR,BOOT,MCST,HIST,MOLL,CHAU,GESD,MTT,SHIFT,KARCH
0,arrhythmia,452,274,14.6018,0.5714,0.4643,0.5714,0.6071,0.4643,0.5714,...,0.5,0.6071,0.5,0.5714,0.5714,0.5357,0.5,0.5357,0.5,0.5357
0,cardio,1831,21,9.6122,0.5,0.6,0.5571,0.5429,0.4429,0.4714,...,0.4286,0.5143,0.5429,0.4714,0.4429,0.5,0.5286,0.4857,0.4714,0.5143
0,glass,214,9,4.2056,0.2,0.2,0.2,0.2,0.2,0.2,...,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2
0,ionosphere,351,33,35.8974,0.5652,0.587,0.5435,0.5217,0.5652,0.5217,...,0.5435,0.5217,0.587,0.5435,0.5217,0.587,0.5435,0.587,0.5435,0.5
0,letter,1600,32,6.25,0.122,0.0244,0.0976,0.0976,0.122,0.0732,...,0.0488,0.0488,0.0732,0.0488,0.0732,0.122,0.0732,0.0732,0.122,0.0488
0,lympho,148,18,4.0541,1.0,0.6667,1.0,1.0,1.0,0.6667,...,0.6667,1.0,1.0,1.0,0.6667,1.0,0.6667,0.6667,1.0,1.0
0,mnist,7603,100,9.2069,0.2926,0.3074,0.3037,0.3111,0.2185,0.237,...,0.2963,0.1889,0.3259,0.2889,0.2778,0.2185,0.2815,0.2778,0.2519,0.3333
0,musk,3062,166,3.1679,1.0,1.0,1.0,0.9512,1.0,1.0,...,0.9512,0.9756,1.0,0.9512,0.9512,0.9024,0.9756,1.0,1.0,1.0
0,optdigits,5216,64,2.8758,0.0308,0.0308,0.0615,0.0154,0.0154,0.0308,...,0.0154,0.0154,0.0308,0.0462,0.0462,0.0,0.0462,0.0462,0.0462,0.0
0,pendigits,6870,16,2.2707,0.2903,0.2419,0.2581,0.2419,0.2903,0.2419,...,0.2419,0.2581,0.2258,0.2419,0.2258,0.2258,0.3226,0.1774,0.2742,0.2742
