# Benchmark of various outlier detection model thresholders

### The models are evaluated by ROC, Precision @ n and execution time on 17 benchmark datasets. All datasets are split (60% for training and 40% for testing). The full result by averaging 10 indepent trials can be found [here]

The thresholders covered in this example include:

1. **IQR: Inter-Quartile Region** 
2. **MAD: Median Absolute Deviation**
3. **FWFM: Full Width at Full Minimum**
4. **YJ: Yeo-Johnson Transformation**
5. **KMEANS: Kmeans Clustering**
6. **ZSCORE: Z Score**
7. **AUCP: Area Under the Curve Percentage**
8. **QMCD: Quasi-Monte Carlo Discreperancy**
9. **FGD: Fixed Gradient Descent**
10. **DSN: Distance Shift from Normal**
11. **CLF: Trained Classifier**
12. **GF: Gaussian Filter**
13. **WIND: Topological Winding Number**
14. **EB: Elliptical Boundary**
15. **REGR: Regression Intercept**
16. **BOOT: Bootstrap Method**
17. **MCST: Monte Carlo Statistical Tests**
18. **Histogram Based Methods**
19. **Mollifier**
20. **Chauvenet's Criterion**
21. **Generalized Extreme Studentized Deviate**
22. **Modified Thompson Tau Test**
23. **Mean Shift Clustering**

In [1]:
from __future__ import division
from __future__ import print_function

import os
import sys
from time import time

# temporary solution for relative imports in case pyod is not installed
# if pyod is installed, no need to use the following line
sys.path.append(
    os.path.abspath(os.path.join(os.path.dirname("__file__"), '..')))

import numpy as np
from numpy import percentile
import matplotlib.pyplot as plt
import matplotlib.font_manager
import pandas as pd
from sklearn.model_selection import train_test_split
from scipy.io import loadmat

from pyod.models.sampling import Sampling
from pyod.models.kde import KDE
from pyod.models.knn import KNN
from pyod.models.mcd import MCD
from pyod.models.iforest import IForest

from pythresh.thresholds.iqr import IQR
from pythresh.thresholds.mad import MAD
from pythresh.thresholds.fwfm import FWFM
from pythresh.thresholds.yj import YJ
from pythresh.thresholds.kmeans import KMEANS
from pythresh.thresholds.zscore import ZSCORE
from pythresh.thresholds.aucp import AUCP
from pythresh.thresholds.qmcd import QMCD
from pythresh.thresholds.fgd import FGD
from pythresh.thresholds.dsn import DSN
from pythresh.thresholds.clf import CLF
from pythresh.thresholds.gf import GF
from pythresh.thresholds.wind import WIND
from pythresh.thresholds.eb import EB
from pythresh.thresholds.regr import REGR
from pythresh.thresholds.boot import BOOT
from pythresh.thresholds.mcst import MCST
from pythresh.thresholds.hist import HIST
from pythresh.thresholds.moll import MOLL
from pythresh.thresholds.chau import CHAU
from pythresh.thresholds.gesd import GESD
from pythresh.thresholds.mtt import MTT
from pythresh.thresholds.shift import SHIFT

from pyod.utils.utility import standardizer
from pyod.utils.utility import precision_n_scores
from sklearn.metrics import roc_auc_score

In [2]:
# Define data file and read X and y
mat_file_list = ['arrhythmia.mat',
                 'cardio.mat',
                 'glass.mat',
                 'ionosphere.mat',
                 'letter.mat',
                 'lympho.mat',
                 'mnist.mat',
                 'musk.mat',
                 'optdigits.mat',
                 'pendigits.mat',
                 'pima.mat',
                 'satellite.mat',
                 'satimage-2.mat',
                 'vertebral.mat',
                 'vowels.mat',
                 'wbc.mat']

# Define nine outlier detection tools to be compared
random_state = np.random.RandomState(42)

df_columns = ['Data', '#Samples', '# Dimensions', 'Outlier Perc', 'IQR', 'MAD', 'FWFM', 
              'YJ', 'KMEANS', 'ZSCORE', 'AUCP', 'QMCD', 'FGD', 'DSN', 'CLF', 'GF', 'WIND', 
              'EB', 'REGR', 'BOOT', 'MCST', 'HIST', 'MOLL', 'CHAU', 'GESD', 'MTT', 'SHIFT']
roc_df = pd.DataFrame(columns=df_columns)
prn_df = pd.DataFrame(columns=df_columns)
time_df = pd.DataFrame(columns=df_columns)

clf = IForest()


for mat_file in mat_file_list:
    print("\n... Processing", mat_file, '...')
    mat = loadmat(os.path.join('data', mat_file))

    X = mat['X']
    y = mat['y'].ravel()
    outliers_fraction = np.count_nonzero(y) / len(y)
    outliers_percentage = round(outliers_fraction * 100, ndigits=4)

    # construct containers for saving results
    roc_list = [mat_file[:-4], X.shape[0], X.shape[1], outliers_percentage]
    prn_list = [mat_file[:-4], X.shape[0], X.shape[1], outliers_percentage]
    time_list = [mat_file[:-4], X.shape[0], X.shape[1], outliers_percentage]

    # 60% data for training and 40% for testing
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4,
                                                        random_state=random_state)

    # standardizing data for processing
    X_train_norm, X_test_norm = standardizer(X_train, X_test)

    thresholders = {
        'Inter-Quartile Region (IQR)':IQR(),
        'Median Absolute Deviation (MAD)':MAD(),
        'Full Width at Full Minimum (FWFM)':FWFM(),
        'Yeo-Johnson Transformation (YJ)': YJ(),
        'Kmeans Clustering (KMEANS)': KMEANS(),
        'Z Score (ZSCORE)': ZSCORE(),
        'AUC Percentage (AUCP)': AUCP(),
        'Quasi-Monte Carlo Discreperancy (QMCD)': QMCD(),
        'Fixed Gradient Descent (FGD)': FGD(),
        'Distance Shift from Normal (DSN)': DSN(),
        'Trained Classifier (CLF)': CLF(),
        'Gaussian Filter (GF)': GF(),
        'Topological Winding Number (WIND)': WIND(),
        'Elliptical Boundary (EB)': EB(),
        'Regression Intercept (REGR)': REGR(),
        'Bootstrap Method (BOOT)': BOOT(),
        'Monte Carlo Statistical Tests (MCST)': MCST(),
        'Histogram Based Methods (HIST)': HIST(),
        'Mollifier (MOLL)': MOLL(),
        "Chauvenet's Criterion (CHAU)": CHAU(),
        'Generalized Extreme Studentized Deviate (GESD)': GESD(),
        'Modified Thompson Tau Test (MTT)': MTT(),
        'Mean Shift Clustering (SHIFT)': SHIFT()
    }
    
    clf.fit(X_train_norm)
    scores = clf.decision_scores_
    
    for thres_name, thres in thresholders.items():
        t0 = time()
        pred = thres.eval(scores)
        contam = np.sum(pred)/len(pred)
        
        if contam<=0: contam=1e-3
        if contam>0.5: contam=0.5
        
        clf = IForest(contamination=contam)
        clf.fit(X_train_norm)
        test_scores = clf.decision_function(X_test_norm)
        t1 = time()
        duration = round(t1 - t0, ndigits=4)
        time_list.append(duration)

        roc = round(roc_auc_score(y_test, test_scores), ndigits=4)
        prn = round(precision_n_scores(y_test, test_scores), ndigits=4)

        print('{thres_name} ROC:{roc}, precision @ rank n:{prn}, '
              'execution time: {duration}s'.format(
        thres_name=thres_name, roc=roc, prn=prn, duration=duration))

        roc_list.append(roc)
        prn_list.append(prn)

    temp_df = pd.DataFrame(time_list).transpose()
    temp_df.columns = df_columns
    time_df = pd.concat([time_df, temp_df], axis=0)

    temp_df = pd.DataFrame(roc_list).transpose()
    temp_df.columns = df_columns
    roc_df = pd.concat([roc_df, temp_df], axis=0)

    temp_df = pd.DataFrame(prn_list).transpose()
    temp_df.columns = df_columns
    prn_df = pd.concat([prn_df, temp_df], axis=0)


... Processing arrhythmia.mat ...
Inter-Quartile Region (IQR) ROC:0.8275, precision @ rank n:0.5714, execution time: 0.7727s
Median Absolute Deviation (MAD) ROC:0.8497, precision @ rank n:0.4643, execution time: 0.782s
Full Width at Full Minimum (FWFM) ROC:0.8494, precision @ rank n:0.6071, execution time: 0.7227s
Yeo-Johnson Transformation (YJ) ROC:0.8789, precision @ rank n:0.5714, execution time: 1.0575s
Kmeans Clustering (KMEANS) ROC:0.8156, precision @ rank n:0.5, execution time: 0.7203s
Z Score (ZSCORE) ROC:0.8457, precision @ rank n:0.5357, execution time: 0.7161s
AUC Percentage (AUCP) ROC:0.8357, precision @ rank n:0.5357, execution time: 0.7473s
Quasi-Monte Carlo Discreperancy (QMCD) ROC:0.8343, precision @ rank n:0.5357, execution time: 0.7124s
Fixed Gradient Descent (FGD) ROC:0.8508, precision @ rank n:0.6071, execution time: 0.72s
Distance Shift from Normal (DSN) ROC:0.848, precision @ rank n:0.5714, execution time: 0.7253s
Trained Classifier (CLF) ROC:0.8298, precision @ 

  log_a = np.log(a)


Chauvenet's Criterion (CHAU) ROC:0.8541, precision @ rank n:0.5357, execution time: 0.7216s
Generalized Extreme Studentized Deviate (GESD) ROC:0.8494, precision @ rank n:0.5357, execution time: 0.7948s
Modified Thompson Tau Test (MTT) ROC:0.8371, precision @ rank n:0.5357, execution time: 0.7226s
Mean Shift Clustering (SHIFT) ROC:0.8529, precision @ rank n:0.5, execution time: 1.8228s

... Processing cardio.mat ...
Inter-Quartile Region (IQR) ROC:0.9176, precision @ rank n:0.4571, execution time: 0.6833s
Median Absolute Deviation (MAD) ROC:0.8889, precision @ rank n:0.4143, execution time: 0.6936s
Full Width at Full Minimum (FWFM) ROC:0.9275, precision @ rank n:0.4571, execution time: 0.7983s
Yeo-Johnson Transformation (YJ) ROC:0.9215, precision @ rank n:0.4286, execution time: 1.3049s
Kmeans Clustering (KMEANS) ROC:0.9275, precision @ rank n:0.5286, execution time: 0.6973s
Z Score (ZSCORE) ROC:0.9274, precision @ rank n:0.5, execution time: 0.6891s
AUC Percentage (AUCP) ROC:0.9194, pr

  log_a = np.log(a)


Chauvenet's Criterion (CHAU) ROC:0.9092, precision @ rank n:0.4857, execution time: 0.6911s
Generalized Extreme Studentized Deviate (GESD) ROC:0.9251, precision @ rank n:0.5143, execution time: 1.0267s
Modified Thompson Tau Test (MTT) ROC:0.9191, precision @ rank n:0.4429, execution time: 0.7158s
Mean Shift Clustering (SHIFT) ROC:0.9248, precision @ rank n:0.5143, execution time: 6.5442s

... Processing glass.mat ...
Inter-Quartile Region (IQR) ROC:0.6469, precision @ rank n:0.2, execution time: 0.4993s
Median Absolute Deviation (MAD) ROC:0.6593, precision @ rank n:0.2, execution time: 0.5082s
Full Width at Full Minimum (FWFM) ROC:0.6593, precision @ rank n:0.2, execution time: 0.5114s
Yeo-Johnson Transformation (YJ) ROC:0.6519, precision @ rank n:0.2, execution time: 0.7181s
Kmeans Clustering (KMEANS) ROC:0.6519, precision @ rank n:0.2, execution time: 0.5148s
Z Score (ZSCORE) ROC:0.6346, precision @ rank n:0.2, execution time: 0.5079s
AUC Percentage (AUCP) ROC:0.6494, precision @ ran

  log_a = np.log(a)


Chauvenet's Criterion (CHAU) ROC:0.6741, precision @ rank n:0.2, execution time: 0.526s
Generalized Extreme Studentized Deviate (GESD) ROC:0.6642, precision @ rank n:0.2, execution time: 0.549s
Modified Thompson Tau Test (MTT) ROC:0.684, precision @ rank n:0.2, execution time: 0.5109s
Mean Shift Clustering (SHIFT) ROC:0.6296, precision @ rank n:0.2, execution time: 0.8418s

... Processing ionosphere.mat ...
Inter-Quartile Region (IQR) ROC:0.786, precision @ rank n:0.5652, execution time: 0.5867s
Median Absolute Deviation (MAD) ROC:0.7783, precision @ rank n:0.5435, execution time: 0.5797s
Full Width at Full Minimum (FWFM) ROC:0.7707, precision @ rank n:0.5435, execution time: 0.5849s
Yeo-Johnson Transformation (YJ) ROC:0.7805, precision @ rank n:0.5652, execution time: 0.8081s
Kmeans Clustering (KMEANS) ROC:0.7817, precision @ rank n:0.5652, execution time: 0.5532s
Z Score (ZSCORE) ROC:0.7835, precision @ rank n:0.5435, execution time: 0.5471s
AUC Percentage (AUCP) ROC:0.7835, precisio

  log_a = np.log(a)


Chauvenet's Criterion (CHAU) ROC:0.7787, precision @ rank n:0.5435, execution time: 0.5499s
Generalized Extreme Studentized Deviate (GESD) ROC:0.7746, precision @ rank n:0.5217, execution time: 0.6161s
Modified Thompson Tau Test (MTT) ROC:0.7897, precision @ rank n:0.587, execution time: 0.5844s
Mean Shift Clustering (SHIFT) ROC:0.7801, precision @ rank n:0.5652, execution time: 1.6737s

... Processing letter.mat ...
Inter-Quartile Region (IQR) ROC:0.6872, precision @ rank n:0.122, execution time: 0.737s
Median Absolute Deviation (MAD) ROC:0.5659, precision @ rank n:0.0732, execution time: 0.7653s
Full Width at Full Minimum (FWFM) ROC:0.5883, precision @ rank n:0.0488, execution time: 0.8262s
Yeo-Johnson Transformation (YJ) ROC:0.604, precision @ rank n:0.0732, execution time: 1.2038s
Kmeans Clustering (KMEANS) ROC:0.5743, precision @ rank n:0.0732, execution time: 0.7156s
Z Score (ZSCORE) ROC:0.6082, precision @ rank n:0.0732, execution time: 0.7073s
AUC Percentage (AUCP) ROC:0.6318, 

  log_a = np.log(a)


Chauvenet's Criterion (CHAU) ROC:0.6181, precision @ rank n:0.0488, execution time: 0.7989s
Generalized Extreme Studentized Deviate (GESD) ROC:0.6348, precision @ rank n:0.0976, execution time: 0.9934s
Modified Thompson Tau Test (MTT) ROC:0.6063, precision @ rank n:0.0976, execution time: 0.7085s
Mean Shift Clustering (SHIFT) ROC:0.6357, precision @ rank n:0.0732, execution time: 12.5889s

... Processing lympho.mat ...
Inter-Quartile Region (IQR) ROC:1.0, precision @ rank n:1.0, execution time: 0.5015s
Median Absolute Deviation (MAD) ROC:0.9942, precision @ rank n:0.6667, execution time: 0.521s
Full Width at Full Minimum (FWFM) ROC:0.9942, precision @ rank n:0.6667, execution time: 0.524s
Yeo-Johnson Transformation (YJ) ROC:0.9942, precision @ rank n:0.6667, execution time: 0.8475s
Kmeans Clustering (KMEANS) ROC:1.0, precision @ rank n:1.0, execution time: 0.5093s
Z Score (ZSCORE) ROC:1.0, precision @ rank n:1.0, execution time: 0.5048s
AUC Percentage (AUCP) ROC:0.9942, precision @ ran

  log_a = np.log(a)


Chauvenet's Criterion (CHAU) ROC:0.9942, precision @ rank n:0.6667, execution time: 0.509s
Generalized Extreme Studentized Deviate (GESD) ROC:1.0, precision @ rank n:1.0, execution time: 0.5305s
Modified Thompson Tau Test (MTT) ROC:1.0, precision @ rank n:1.0, execution time: 0.505s
Mean Shift Clustering (SHIFT) ROC:1.0, precision @ rank n:1.0, execution time: 0.8564s

... Processing mnist.mat ...
Inter-Quartile Region (IQR) ROC:0.8311, precision @ rank n:0.3481, execution time: 2.156s
Median Absolute Deviation (MAD) ROC:0.745, precision @ rank n:0.1926, execution time: 2.0798s
Full Width at Full Minimum (FWFM) ROC:0.8056, precision @ rank n:0.3037, execution time: 4.2423s
Yeo-Johnson Transformation (YJ) ROC:0.8235, precision @ rank n:0.3556, execution time: 5.2298s
Kmeans Clustering (KMEANS) ROC:0.8007, precision @ rank n:0.2926, execution time: 2.0935s
Z Score (ZSCORE) ROC:0.8012, precision @ rank n:0.3185, execution time: 2.0807s
AUC Percentage (AUCP) ROC:0.8145, precision @ rank n:

  log_a = np.log(a)


Chauvenet's Criterion (CHAU) ROC:0.7793, precision @ rank n:0.2704, execution time: 2.0846s
Generalized Extreme Studentized Deviate (GESD) ROC:0.7812, precision @ rank n:0.2481, execution time: 3.5197s
Modified Thompson Tau Test (MTT) ROC:0.7938, precision @ rank n:0.2963, execution time: 2.1343s
Mean Shift Clustering (SHIFT) ROC:0.7825, precision @ rank n:0.2519, execution time: 50.7764s

... Processing musk.mat ...
Inter-Quartile Region (IQR) ROC:0.9999, precision @ rank n:0.9512, execution time: 1.4499s
Median Absolute Deviation (MAD) ROC:1.0, precision @ rank n:1.0, execution time: 1.4816s
Full Width at Full Minimum (FWFM) ROC:0.9995, precision @ rank n:0.9512, execution time: 1.8642s
Yeo-Johnson Transformation (YJ) ROC:0.9977, precision @ rank n:0.8537, execution time: 2.9819s
Kmeans Clustering (KMEANS) ROC:0.9997, precision @ rank n:0.9268, execution time: 1.5326s
Z Score (ZSCORE) ROC:0.9999, precision @ rank n:0.9512, execution time: 1.5021s
AUC Percentage (AUCP) ROC:0.9999, pre

  log_a = np.log(a)


Chauvenet's Criterion (CHAU) ROC:1.0, precision @ rank n:1.0, execution time: 1.459s
Generalized Extreme Studentized Deviate (GESD) ROC:0.9996, precision @ rank n:0.9268, execution time: 2.0173s
Modified Thompson Tau Test (MTT) ROC:1.0, precision @ rank n:0.9756, execution time: 1.4871s
Mean Shift Clustering (SHIFT) ROC:1.0, precision @ rank n:1.0, execution time: 27.3599s

... Processing optdigits.mat ...
Inter-Quartile Region (IQR) ROC:0.7275, precision @ rank n:0.0308, execution time: 1.4293s
Median Absolute Deviation (MAD) ROC:0.7533, precision @ rank n:0.0615, execution time: 1.4123s
Full Width at Full Minimum (FWFM) ROC:0.7027, precision @ rank n:0.0154, execution time: 2.4325s
Yeo-Johnson Transformation (YJ) ROC:0.6954, precision @ rank n:0.0, execution time: 3.3058s
Kmeans Clustering (KMEANS) ROC:0.6977, precision @ rank n:0.0154, execution time: 1.494s
Z Score (ZSCORE) ROC:0.6746, precision @ rank n:0.0462, execution time: 1.4843s
AUC Percentage (AUCP) ROC:0.6679, precision @ 

  log_a = np.log(a)


Chauvenet's Criterion (CHAU) ROC:0.7554, precision @ rank n:0.0462, execution time: 1.4128s
Generalized Extreme Studentized Deviate (GESD) ROC:0.7413, precision @ rank n:0.0, execution time: 2.4141s
Modified Thompson Tau Test (MTT) ROC:0.6703, precision @ rank n:0.0154, execution time: 1.4327s
Mean Shift Clustering (SHIFT) ROC:0.7081, precision @ rank n:0.0154, execution time: 42.5253s

... Processing pendigits.mat ...
Inter-Quartile Region (IQR) ROC:0.9202, precision @ rank n:0.2097, execution time: 1.1644s
Median Absolute Deviation (MAD) ROC:0.9359, precision @ rank n:0.2581, execution time: 1.2056s
Full Width at Full Minimum (FWFM) ROC:0.9629, precision @ rank n:0.3065, execution time: 2.8667s
Yeo-Johnson Transformation (YJ) ROC:0.9463, precision @ rank n:0.2742, execution time: 3.8452s
Kmeans Clustering (KMEANS) ROC:0.9617, precision @ rank n:0.3065, execution time: 1.1664s
Z Score (ZSCORE) ROC:0.9483, precision @ rank n:0.2258, execution time: 1.1558s
AUC Percentage (AUCP) ROC:0.9

  log_a = np.log(a)


Chauvenet's Criterion (CHAU) ROC:0.9547, precision @ rank n:0.3548, execution time: 1.1646s
Generalized Extreme Studentized Deviate (GESD) ROC:0.9434, precision @ rank n:0.2581, execution time: 2.45s
Modified Thompson Tau Test (MTT) ROC:0.9255, precision @ rank n:0.2097, execution time: 1.228s
Mean Shift Clustering (SHIFT) ROC:0.9579, precision @ rank n:0.3387, execution time: 37.9711s

... Processing pima.mat ...
Inter-Quartile Region (IQR) ROC:0.7007, precision @ rank n:0.5664, execution time: 0.5609s
Median Absolute Deviation (MAD) ROC:0.7142, precision @ rank n:0.5664, execution time: 0.6093s
Full Width at Full Minimum (FWFM) ROC:0.698, precision @ rank n:0.5841, execution time: 0.5962s
Yeo-Johnson Transformation (YJ) ROC:0.692, precision @ rank n:0.5664, execution time: 1.0715s
Kmeans Clustering (KMEANS) ROC:0.697, precision @ rank n:0.5487, execution time: 0.5744s
Z Score (ZSCORE) ROC:0.7035, precision @ rank n:0.5664, execution time: 0.5673s
AUC Percentage (AUCP) ROC:0.6948, pre

  log_a = np.log(a)


Chauvenet's Criterion (CHAU) ROC:0.7037, precision @ rank n:0.5752, execution time: 0.5739s
Generalized Extreme Studentized Deviate (GESD) ROC:0.7145, precision @ rank n:0.5841, execution time: 0.7072s
Modified Thompson Tau Test (MTT) ROC:0.714, precision @ rank n:0.5664, execution time: 0.581s
Mean Shift Clustering (SHIFT) ROC:0.7177, precision @ rank n:0.5664, execution time: 3.165s

... Processing satellite.mat ...
Inter-Quartile Region (IQR) ROC:0.6731, precision @ rank n:0.5333, execution time: 1.2871s
Median Absolute Deviation (MAD) ROC:0.6634, precision @ rank n:0.5517, execution time: 1.2897s
Full Width at Full Minimum (FWFM) ROC:0.7047, precision @ rank n:0.5702, execution time: 2.7541s
Yeo-Johnson Transformation (YJ) ROC:0.7007, precision @ rank n:0.5579, execution time: 3.7194s
Kmeans Clustering (KMEANS) ROC:0.7035, precision @ rank n:0.5788, execution time: 1.3018s
Z Score (ZSCORE) ROC:0.7071, precision @ rank n:0.5665, execution time: 1.2891s
AUC Percentage (AUCP) ROC:0.68

  log_a = np.log(a)


Chauvenet's Criterion (CHAU) ROC:0.6946, precision @ rank n:0.5764, execution time: 1.3696s
Generalized Extreme Studentized Deviate (GESD) ROC:0.6702, precision @ rank n:0.5665, execution time: 2.5075s
Modified Thompson Tau Test (MTT) ROC:0.6863, precision @ rank n:0.5837, execution time: 1.4516s
Mean Shift Clustering (SHIFT) ROC:0.7027, precision @ rank n:0.569, execution time: 20.194s

... Processing satimage-2.mat ...
Inter-Quartile Region (IQR) ROC:0.9994, precision @ rank n:0.871, execution time: 1.2056s
Median Absolute Deviation (MAD) ROC:0.9995, precision @ rank n:0.9355, execution time: 1.2088s
Full Width at Full Minimum (FWFM) ROC:0.9998, precision @ rank n:0.9032, execution time: 2.5674s
Yeo-Johnson Transformation (YJ) ROC:0.9997, precision @ rank n:0.9355, execution time: 3.4552s
Kmeans Clustering (KMEANS) ROC:0.9996, precision @ rank n:0.9032, execution time: 1.226s
Z Score (ZSCORE) ROC:0.9997, precision @ rank n:0.9032, execution time: 1.2181s
AUC Percentage (AUCP) ROC:0.9

  log_a = np.log(a)


Chauvenet's Criterion (CHAU) ROC:0.9997, precision @ rank n:0.9032, execution time: 1.266s
Generalized Extreme Studentized Deviate (GESD) ROC:0.9995, precision @ rank n:0.9032, execution time: 2.4761s
Modified Thompson Tau Test (MTT) ROC:0.9993, precision @ rank n:0.9355, execution time: 1.291s
Mean Shift Clustering (SHIFT) ROC:0.9993, precision @ rank n:0.871, execution time: 20.8922s

... Processing vertebral.mat ...
Inter-Quartile Region (IQR) ROC:0.4435, precision @ rank n:0.1667, execution time: 0.5063s
Median Absolute Deviation (MAD) ROC:0.3909, precision @ rank n:0.0833, execution time: 0.5141s
Full Width at Full Minimum (FWFM) ROC:0.3968, precision @ rank n:0.0833, execution time: 0.575s
Yeo-Johnson Transformation (YJ) ROC:0.4008, precision @ rank n:0.0, execution time: 0.7315s
Kmeans Clustering (KMEANS) ROC:0.4296, precision @ rank n:0.0833, execution time: 0.5213s
Z Score (ZSCORE) ROC:0.3978, precision @ rank n:0.0833, execution time: 0.514s
AUC Percentage (AUCP) ROC:0.4306, 

  log_a = np.log(a)


Chauvenet's Criterion (CHAU) ROC:0.4296, precision @ rank n:0.0833, execution time: 0.5196s
Generalized Extreme Studentized Deviate (GESD) ROC:0.4335, precision @ rank n:0.0833, execution time: 0.5571s
Modified Thompson Tau Test (MTT) ROC:0.4375, precision @ rank n:0.0833, execution time: 0.5175s
Mean Shift Clustering (SHIFT) ROC:0.4722, precision @ rank n:0.0833, execution time: 1.0437s

... Processing vowels.mat ...
Inter-Quartile Region (IQR) ROC:0.8021, precision @ rank n:0.2632, execution time: 0.6379s
Median Absolute Deviation (MAD) ROC:0.7975, precision @ rank n:0.1053, execution time: 0.6433s
Full Width at Full Minimum (FWFM) ROC:0.819, precision @ rank n:0.2632, execution time: 0.7092s
Yeo-Johnson Transformation (YJ) ROC:0.7666, precision @ rank n:0.2105, execution time: 1.2754s
Kmeans Clustering (KMEANS) ROC:0.7926, precision @ rank n:0.1579, execution time: 0.6512s
Z Score (ZSCORE) ROC:0.7826, precision @ rank n:0.1579, execution time: 0.6431s
AUC Percentage (AUCP) ROC:0.782

  log_a = np.log(a)


Chauvenet's Criterion (CHAU) ROC:0.8192, precision @ rank n:0.2105, execution time: 0.6484s
Generalized Extreme Studentized Deviate (GESD) ROC:0.7971, precision @ rank n:0.2105, execution time: 0.9073s
Modified Thompson Tau Test (MTT) ROC:0.7896, precision @ rank n:0.3158, execution time: 0.651s
Mean Shift Clustering (SHIFT) ROC:0.7884, precision @ rank n:0.0526, execution time: 7.0352s

... Processing wbc.mat ...
Inter-Quartile Region (IQR) ROC:0.927, precision @ rank n:0.4444, execution time: 0.5339s
Median Absolute Deviation (MAD) ROC:0.9207, precision @ rank n:0.4444, execution time: 0.5466s
Full Width at Full Minimum (FWFM) ROC:0.9215, precision @ rank n:0.5556, execution time: 0.5452s
Yeo-Johnson Transformation (YJ) ROC:0.9122, precision @ rank n:0.5556, execution time: 0.8526s
Kmeans Clustering (KMEANS) ROC:0.927, precision @ rank n:0.5556, execution time: 0.5459s
Z Score (ZSCORE) ROC:0.927, precision @ rank n:0.4444, execution time: 0.5402s
AUC Percentage (AUCP) ROC:0.9153, pre

  log_a = np.log(a)


Chauvenet's Criterion (CHAU) ROC:0.9402, precision @ rank n:0.5556, execution time: 0.5488s
Generalized Extreme Studentized Deviate (GESD) ROC:0.9301, precision @ rank n:0.5556, execution time: 0.6134s
Modified Thompson Tau Test (MTT) ROC:0.9347, precision @ rank n:0.5556, execution time: 0.5478s
Mean Shift Clustering (SHIFT) ROC:0.9153, precision @ rank n:0.5556, execution time: 1.2417s


In [3]:
print('Time complexity')
time_df

Time complexity


Unnamed: 0,Data,#Samples,# Dimensions,Outlier Perc,IQR,MAD,FWFM,YJ,KMEANS,ZSCORE,...,EB,REGR,BOOT,MCST,HIST,MOLL,CHAU,GESD,MTT,SHIFT
0,arrhythmia,452,274,14.6018,0.7727,0.782,0.7227,1.0575,0.7203,0.7161,...,1.1599,0.7489,0.8757,0.7272,0.7105,0.7173,0.7216,0.7948,0.7226,1.8228
0,cardio,1831,21,9.6122,0.6833,0.6936,0.7983,1.3049,0.6973,0.6891,...,1.2716,0.8935,1.2427,0.8464,0.6889,0.6912,0.6911,1.0267,0.7158,6.5442
0,glass,214,9,4.2056,0.4993,0.5082,0.5114,0.7181,0.5148,0.5079,...,0.9928,0.5722,0.5754,0.5387,0.5367,0.5724,0.526,0.549,0.5109,0.8418
0,ionosphere,351,33,35.8974,0.5867,0.5797,0.5849,0.8081,0.5532,0.5471,...,0.9866,0.5768,0.6066,0.5592,0.5446,0.5526,0.5499,0.6161,0.5844,1.6737
0,letter,1600,32,6.25,0.737,0.7653,0.8262,1.2038,0.7156,0.7073,...,1.2361,0.8727,0.9919,0.8265,0.7808,0.7751,0.7989,0.9934,0.7085,12.5889
0,lympho,148,18,4.0541,0.5015,0.521,0.524,0.8475,0.5093,0.5048,...,0.8691,0.5142,0.5359,0.5091,0.5045,0.5065,0.509,0.5305,0.505,0.8564
0,mnist,7603,100,9.2069,2.156,2.0798,4.2423,5.2298,2.0935,2.0807,...,4.6261,3.7223,4.6721,4.5163,2.159,2.0917,2.0846,3.5197,2.1343,50.7764
0,musk,3062,166,3.1679,1.4499,1.4816,1.8642,2.9819,1.5326,1.5021,...,2.3041,1.8837,2.1508,1.8769,1.4602,1.4656,1.459,2.0173,1.4871,27.3599
0,optdigits,5216,64,2.8758,1.4293,1.4123,2.4325,3.3058,1.494,1.4843,...,2.8857,2.3425,2.7082,2.5929,1.4047,1.437,1.4128,2.4141,1.4327,42.5253
0,pendigits,6870,16,2.2707,1.1644,1.2056,2.8667,3.8452,1.1664,1.1558,...,3.335,2.544,3.6631,3.1533,1.1529,1.1735,1.1646,2.45,1.228,37.9711


In [4]:
print('ROC Performance')
roc_df

ROC Performance


Unnamed: 0,Data,#Samples,# Dimensions,Outlier Perc,IQR,MAD,FWFM,YJ,KMEANS,ZSCORE,...,EB,REGR,BOOT,MCST,HIST,MOLL,CHAU,GESD,MTT,SHIFT
0,arrhythmia,452,274,14.6018,0.8275,0.8497,0.8494,0.8789,0.8156,0.8457,...,0.8373,0.8417,0.82,0.8193,0.8259,0.8242,0.8541,0.8494,0.8371,0.8529
0,cardio,1831,21,9.6122,0.9176,0.8889,0.9275,0.9215,0.9275,0.9274,...,0.915,0.9182,0.9342,0.932,0.931,0.9109,0.9092,0.9251,0.9191,0.9248
0,glass,214,9,4.2056,0.6469,0.6593,0.6593,0.6519,0.6519,0.6346,...,0.642,0.6642,0.6321,0.6173,0.6568,0.6617,0.6741,0.6642,0.684,0.6296
0,ionosphere,351,33,35.8974,0.786,0.7783,0.7707,0.7805,0.7817,0.7835,...,0.7648,0.7803,0.7757,0.7879,0.7826,0.7746,0.7787,0.7746,0.7897,0.7801
0,letter,1600,32,6.25,0.6872,0.5659,0.5883,0.604,0.5743,0.6082,...,0.5623,0.615,0.6265,0.5966,0.6455,0.5949,0.6181,0.6348,0.6063,0.6357
0,lympho,148,18,4.0541,1.0,0.9942,0.9942,0.9942,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,0.9942,1.0,1.0,1.0
0,mnist,7603,100,9.2069,0.8311,0.745,0.8056,0.8235,0.8007,0.8012,...,0.7855,0.8227,0.7889,0.7979,0.8313,0.8066,0.7793,0.7812,0.7938,0.7825
0,musk,3062,166,3.1679,0.9999,1.0,0.9995,0.9977,0.9997,0.9999,...,1.0,1.0,0.9999,1.0,0.9992,1.0,1.0,0.9996,1.0,1.0
0,optdigits,5216,64,2.8758,0.7275,0.7533,0.7027,0.6954,0.6977,0.6746,...,0.6975,0.7419,0.782,0.718,0.6776,0.7367,0.7554,0.7413,0.6703,0.7081
0,pendigits,6870,16,2.2707,0.9202,0.9359,0.9629,0.9463,0.9617,0.9483,...,0.9062,0.9463,0.9318,0.9408,0.9164,0.9274,0.9547,0.9434,0.9255,0.9579


In [5]:
print('Precision @ n Performance')
prn_df

Precision @ n Performance


Unnamed: 0,Data,#Samples,# Dimensions,Outlier Perc,IQR,MAD,FWFM,YJ,KMEANS,ZSCORE,...,EB,REGR,BOOT,MCST,HIST,MOLL,CHAU,GESD,MTT,SHIFT
0,arrhythmia,452,274,14.6018,0.5714,0.4643,0.6071,0.5714,0.5,0.5357,...,0.5714,0.5714,0.4643,0.5,0.5714,0.5357,0.5357,0.5357,0.5357,0.5
0,cardio,1831,21,9.6122,0.4571,0.4143,0.4571,0.4286,0.5286,0.5,...,0.5,0.5143,0.5429,0.5,0.5429,0.4714,0.4857,0.5143,0.4429,0.5143
0,glass,214,9,4.2056,0.2,0.2,0.2,0.2,0.2,0.2,...,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2
0,ionosphere,351,33,35.8974,0.5652,0.5435,0.5435,0.5652,0.5652,0.5435,...,0.5217,0.5652,0.5435,0.587,0.5,0.5435,0.5435,0.5217,0.587,0.5652
0,letter,1600,32,6.25,0.122,0.0732,0.0488,0.0732,0.0732,0.0732,...,0.0732,0.0732,0.0732,0.0488,0.0488,0.0732,0.0488,0.0976,0.0976,0.0732
0,lympho,148,18,4.0541,1.0,0.6667,0.6667,0.6667,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,0.6667,1.0,1.0,1.0
0,mnist,7603,100,9.2069,0.3481,0.1926,0.3037,0.3556,0.2926,0.3185,...,0.2407,0.3259,0.2815,0.3111,0.3852,0.3407,0.2704,0.2481,0.2963,0.2519
0,musk,3062,166,3.1679,0.9512,1.0,0.9512,0.8537,0.9268,0.9512,...,1.0,1.0,0.9756,1.0,0.9512,1.0,1.0,0.9268,0.9756,1.0
0,optdigits,5216,64,2.8758,0.0308,0.0615,0.0154,0.0,0.0154,0.0462,...,0.0308,0.0308,0.0154,0.0154,0.0154,0.0462,0.0462,0.0,0.0154,0.0154
0,pendigits,6870,16,2.2707,0.2097,0.2581,0.3065,0.2742,0.3065,0.2258,...,0.1774,0.2258,0.2581,0.2258,0.2419,0.2258,0.3548,0.2581,0.2097,0.3387
