本 notebook 对比当各个detector选取自己最大的disagreement点做阈值的时候，是否能够起到如下两个作用：
1. 提高f1
2. 甄别 detector

In [3]:
from __future__ import division,print_function
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=FutureWarning)

import pandas as pd
import numpy as np
import os
import sys
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor
import re
from sklearn.preprocessing import RobustScaler
import scipy.stats as ss
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
import scipy.io
import os
import sys
from time import time
import scipy.stats as ss
from sklearn.preprocessing import RobustScaler
from sklearn import metrics

# temporary solution for relative imports in case pyod is not installed
# if pyod is installed, no need to use the following line
sys.path.append(
    os.path.abspath(os.path.join(os.path.dirname("__file__"), '..')))
from numpy import percentile
import matplotlib.pyplot as plt
import matplotlib.font_manager
# Import all models
from pyod.models.abod import ABOD
from pyod.models.cblof import CBLOF
from pyod.models.feature_bagging import FeatureBagging
from pyod.models.hbos import HBOS
from pyod.models.iforest import IForest
from pyod.models.knn import KNN
from pyod.models.lof import LOF
from pyod.models.mcd import MCD
from pyod.models.ocsvm import OCSVM
from pyod.models.pca import PCA
from pyod.models.lscp import LSCP # ensemble
import multiprocessing as mp
from IPython.display import display
from sklearn.metrics import roc_auc_score
from thresholders import *


data_path = "/Users/kadima/experiment_any/anomaly-detection/datasets/"
def getData(fileName):
    # check fileName:
    files = [x for x in os.listdir(data_path) if x.endswith(".mat")]
    mat = scipy.io.loadmat(data_path+fileName)
    X = mat["X"]
    y = mat["y"]
    return X, y

def get_score_matrix(X, num_detectors):
    return np.zeros([X.shape[0], num_detectors])

def get_perform_matrix(num_thresholders, num_detectors):
    return np.zeros((num_thresholders, num_detectors))


random_state = np.random.RandomState(10)
outliers_fraction = 0.4
# initialize a set of detectors for LSCP
detector_list = [LOF(n_neighbors=5), LOF(n_neighbors=10), LOF(n_neighbors=15),
                 LOF(n_neighbors=20), LOF(n_neighbors=25), LOF(n_neighbors=30),
                 LOF(n_neighbors=35), LOF(n_neighbors=40), LOF(n_neighbors=45),
                 LOF(n_neighbors=50)]
classifiers = {
    'Angle-based Outlier Detector (ABOD)':
        ABOD(contamination=outliers_fraction),
#     'Cluster-based Local Outlier Factor (CBLOF)':
#         CBLOF(contamination=outliers_fraction,
#               check_estimator=False, random_state=random_state),
    'Feature Bagging':
        FeatureBagging(LOF(n_neighbors=35),
                       contamination=outliers_fraction,
                       random_state=random_state),
    'Histogram-base Outlier Detection (HBOS)': HBOS(
        contamination=outliers_fraction),
    'Isolation Forest': IForest(contamination=outliers_fraction,
                                random_state=random_state, n_estimators = 280),
    'K Nearest Neighbors (KNN)': KNN(
        contamination=outliers_fraction),
    'Average KNN': KNN(method='mean',
                       contamination=outliers_fraction),
    'Local Outlier Factor (LOF)':
        LOF(n_neighbors=35, contamination=outliers_fraction),
#     'Minimum Covariance Determinant (MCD)': MCD(
#         contamination=outliers_fraction, random_state=random_state),
    'One-class SVM (OCSVM)': OCSVM(contamination=outliers_fraction)
#     'Principal Component Analysis (PCA)': PCA(
#         contamination=outliers_fraction, random_state=random_state),
#     'Locally Selective Combination (LSCP)': LSCP(
#         detector_list, contamination=outliers_fraction,
#         random_state=random_state)
}

names = []
# Show all detectors
for i, clf in enumerate(classifiers.keys()):
    names.append(clf)
#     print('Model', i + 1, clf)

# Fit the models with the generated data and 
# compare model performances
def get_result(X, y, classifiers):
    threshold_records = list()
    # create matrix to store the performance
    score_matrix = get_score_matrix(X, len(classifiers.keys()))
    perform_table = get_perform_matrix(6, len(classifiers.keys()) )

    np.random.seed(5)
    clfs = []

    # Fit the model
    for i, (clf_name, clf) in enumerate(classifiers.items()):
#         print(i + 1, 'fitting', clf_name)
        # fit the data and tag outliers
        clf.fit(X)
        clfs.append(clf)
        scores_pred = clf.decision_function(X)
        score_matrix[:, i] = scores_pred
        

    for i, thresholder in enumerate([sd_thresholder, mad_thresholder, 
                                     iqr_thresholder]):
        kk = []
        for j in range(score_matrix.shape[1]):
            _,perform_table[i,j],b = thresholder(score_matrix[:,j], y)
            kk.append(b)
            
        threshold_records.append(kk)
            
    for i in range(score_matrix.shape[1]):
        perform_table[-3, i] = f1_score(y, clfs[i].predict(X))
        perform_table[-2, i] = roc_auc_score(y, clf.predict_proba(X)[:, 1])

        
    perform_table[-1,:], d_threshold, disagreement_record = disagreement_one_scalar(score_matrix, len(classifiers), y, False)
    threshold_records.append(d_threshold)
    
    return (pd.DataFrame(perform_table, columns = names, 
            index = ["sd",'mad','iqr','default-f1','auc','disagreement']), 
            threshold_records, disagreement_record, score_matrix)

def run_helper(datasets):
    X,y = getData(datasets)
    X = X.astype(np.float64)
    return get_result(X, y, classifiers)


result_dict = dict()
bug_datasets = ["letter.mat","speech.mat","cardio.mat","http.mat", "smtp.mat",
               "ecoli.mat","shuttle.mat","seismic-bumps.arff","mammography.mat","cover.mat"]
for datasets in os.listdir("/Users/kadima/experiment_any/anomaly-detection/datasets/"):
    if datasets in bug_datasets:
        continue
    print(datasets)
    result_dict[datasets] = run_helper(datasets)


display(result_dict["lympho.mat"][0])


pima.mat
wine.mat
mnist.mat




arrhythmia.mat




pendigits.mat
musk.mat
thyroid.mat


  return _methods._var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  return _methods._var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  return _methods._var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)


vowels.mat
optdigits.mat




breastw.mat


  return _methods._var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  return _methods._var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  return _methods._var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)


satellite.mat
lympho.mat
annthyroid.mat


  return _methods._var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  return _methods._var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  return _methods._var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)


ionosphere.mat
wbc.mat
glass.mat
satimage-2.mat
vertebral.mat




Unnamed: 0,Angle-based Outlier Detector (ABOD),Feature Bagging,Histogram-base Outlier Detection (HBOS),Isolation Forest,K Nearest Neighbors (KNN),Average KNN,Local Outlier Factor (LOF),Minimum Covariance Determinant (MCD),One-class SVM (OCSVM)
sd,0.0,0.666667,0.8,0.909091,0.6,0.6,0.666667,0.5,0.4
mad,0.0,0.077922,0.0,0.909091,0.077922,0.077922,0.077922,0.218182,0.545455
iqr,0.0,0.714286,0.857143,0.769231,0.5,0.533333,0.666667,0.5,0.545455
default-f1,0.166667,0.1875,0.184615,0.184615,0.342857,0.5,0.184615,0.184615,0.184615
auc,0.9777,0.9777,0.9777,0.9777,0.9777,0.9777,0.9777,0.9777,0.9777
disagreement,0.296296,0.285714,0.8,0.909091,0.47619,0.555556,0.285714,0.235294,0.444444


In [6]:
for datasets in os.listdir("/Users/kadima/experiment_any/anomaly-detection/datasets/"):
    if datasets in bug_datasets:
        continue
    else:
        print(datasets)
        print(np.sum(result_dict[datasets][2],axis=0))
        idx_worst = np.argmax(np.median(result_dict[datasets][2],axis=0))
        idx_best = np.argmin(np.median(result_dict[datasets][2],axis=0))

#         print(names[idx_worst],'is worst')
#         print(names[idx_best], 'is best')
        display(result_dict[datasets][0])
        display(pd.DataFrame(np.asarray(result_dict[datasets][1]).reshape(-1,9),
                             columns=result_dict[datasets][0].columns, 
                             index = ['sd', 'mad', 'iqr','disagreement']))
        print("=="*55)
        print("\n\n")


pima.mat
525.4917015719627


Unnamed: 0,Angle-based Outlier Detector (ABOD),Feature Bagging,Histogram-base Outlier Detection (HBOS),Isolation Forest,K Nearest Neighbors (KNN),Average KNN,Local Outlier Factor (LOF),Minimum Covariance Determinant (MCD),One-class SVM (OCSVM)
sd,0.0,0.06338,0.095563,0.070671,0.036101,0.043321,0.05614,0.115254,0.0
mad,0.0,0.517375,0.517375,0.057143,0.465649,0.472441,0.517375,0.364066,0.0
iqr,0.0,0.158824,0.126667,0.057143,0.15534,0.118421,0.171254,0.234957,0.0
default-f1,0.481802,0.422383,0.528696,0.566957,0.475836,0.379391,0.441652,0.553043,0.42087
auc,0.539302,0.539302,0.539302,0.539302,0.539302,0.539302,0.539302,0.539302,0.539302
disagreement,0.014815,0.014815,0.53202,0.022059,0.007435,0.007435,0.007435,0.007435,0.500566


Unnamed: 0,Angle-based Outlier Detector (ABOD),Feature Bagging,Histogram-base Outlier Detection (HBOS),Isolation Forest,K Nearest Neighbors (KNN),Average KNN,Local Outlier Factor (LOF),Minimum Covariance Determinant (MCD),One-class SVM (OCSVM)
sd,3.898839e-05,1.609722,23.108828,0.11835,57.78039,38.1234,1.612252,59.229399,0.000733
mad,8.458347e-06,0.212685,2.817009,0.125791,21.504689,15.494152,0.240251,19.780183,0.001121
iqr,1.200092e-05,1.276634,22.778228,0.124183,38.740957,28.037795,1.293413,32.865689,0.001047
disagreement,-1.441992e-10,2.914823,20.243166,0.159419,270.98041,161.9942,3.231154,153.184688,-0.000239





wine.mat
107.84306065141558


Unnamed: 0,Angle-based Outlier Detector (ABOD),Feature Bagging,Histogram-base Outlier Detection (HBOS),Isolation Forest,K Nearest Neighbors (KNN),Average KNN,Local Outlier Factor (LOF),Minimum Covariance Determinant (MCD),One-class SVM (OCSVM)
sd,0.0,0.666667,0.0,0.0,0.666667,0.75,0.666667,0.588235,0.0
mad,0.0,0.143885,0.143885,0.0,0.425532,0.4,0.143885,0.4,0.095238
iqr,0.0,0.645161,0.0,0.0,0.833333,0.769231,0.645161,0.636364,0.105263
default-f1,0.333333,0.333333,0.258065,0.258065,0.392157,0.526316,0.333333,0.322581,0.096774
auc,0.47521,0.47521,0.47521,0.47521,0.47521,0.47521,0.47521,0.47521,0.47521
disagreement,0.5,0.25974,0.0,0.0,0.5,0.588235,0.15748,0.166667,0.114943


Unnamed: 0,Angle-based Outlier Detector (ABOD),Feature Bagging,Histogram-base Outlier Detection (HBOS),Isolation Forest,K Nearest Neighbors (KNN),Average KNN,Local Outlier Factor (LOF),Minimum Covariance Determinant (MCD),One-class SVM (OCSVM)
sd,6.276764e-05,2.643409,20.949649,0.097284,106.665932,55.017989,2.743007,106.429878,0.000518
mad,4.385684e-05,0.190203,6.542906,0.108296,23.030922,13.287362,0.128663,24.14599,0.000338
iqr,3.114666e-05,1.482144,21.290711,0.091433,47.369782,25.705406,1.494692,70.197373,0.000365
disagreement,-3.217208e-07,1.022233,23.932535,0.155682,27.38646,18.669079,0.973545,142.067452,-5.3e-05





mnist.mat
6612271.553298203


Unnamed: 0,Angle-based Outlier Detector (ABOD),Feature Bagging,Histogram-base Outlier Detection (HBOS),Isolation Forest,K Nearest Neighbors (KNN),Average KNN,Local Outlier Factor (LOF),Minimum Covariance Determinant (MCD),One-class SVM (OCSVM)
sd,0.0,0.184035,0.042714,0.101611,0.173703,0.162031,0.184211,0.018824,0.168614
mad,0.0,0.168614,0.168614,0.040984,0.169965,0.172235,0.168614,0.373325,0.168614
iqr,0.0,0.244618,0.036223,0.081152,0.161567,0.146402,0.229249,0.38307,0.168614
default-f1,0.244344,0.257261,0.243785,0.305266,0.303893,0.367898,0.248083,0.337878,0.0
auc,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5
disagreement,0.23323,0.211891,0.239636,0.340107,0.252256,0.242386,0.205614,0.002829,0.168614


Unnamed: 0,Angle-based Outlier Detector (ABOD),Feature Bagging,Histogram-base Outlier Detection (HBOS),Isolation Forest,K Nearest Neighbors (KNN),Average KNN,Local Outlier Factor (LOF),Minimum Covariance Determinant (MCD),One-class SVM (OCSVM)
sd,1.191458e-11,1.259708,301.461587,0.08641,628.11368,480.180672,1.240016,83171850.0,0.0
mad,5.219713e-12,0.179424,3.186567,0.1096,250.249669,199.137178,0.168531,212.2028,0.0
iqr,4.197097e-12,1.234101,301.599866,0.094494,632.976854,486.327714,1.216737,2681.237,0.0
disagreement,-1.614525e-12,1.032279,298.675732,0.014402,393.12212,295.683733,1.023884,284501000.0,-0.0





arrhythmia.mat
627865.4780454746


Unnamed: 0,Angle-based Outlier Detector (ABOD),Feature Bagging,Histogram-base Outlier Detection (HBOS),Isolation Forest,K Nearest Neighbors (KNN),Average KNN,Local Outlier Factor (LOF),Minimum Covariance Determinant (MCD),One-class SVM (OCSVM)
sd,0.0,0.292683,0.195122,0.246914,0.253165,0.207792,0.135135,0.136986,0.0
mad,0.0,0.254826,0.254826,0.40404,0.270833,0.272527,0.254826,0.373444,0.253968
iqr,0.0,0.350515,0.232558,0.395833,0.336842,0.322581,0.350515,0.478873,0.0
default-f1,0.348485,0.432653,0.42915,0.42915,0.439834,0.453901,0.436214,0.364372,0.0
auc,0.467891,0.467891,0.467891,0.467891,0.467891,0.467891,0.467891,0.467891,0.467891
disagreement,0.0,0.135135,0.056338,0.111111,0.184211,0.136986,0.135135,0.340909,0.254826


Unnamed: 0,Angle-based Outlier Detector (ABOD),Feature Bagging,Histogram-base Outlier Detection (HBOS),Isolation Forest,K Nearest Neighbors (KNN),Average KNN,Local Outlier Factor (LOF),Minimum Covariance Determinant (MCD),One-class SVM (OCSVM)
sd,3.231405e-10,1.836089,246.785909,0.106933,299.057446,221.132637,1.932612,96837.976254,2.754063e-09
mad,2.63044e-10,0.328728,59.414008,0.074106,101.135326,80.571861,0.329692,325.627447,0.0
iqr,2.092516e-10,1.532234,238.87126,0.07796,237.401494,182.151126,1.563965,3415.460823,1.354035e-10
disagreement,-3.111202e-13,2.000852,268.379331,0.15097,340.433283,261.843586,2.003275,25363.755046,-1.5006e-08





pendigits.mat
4237.649488925767


Unnamed: 0,Angle-based Outlier Detector (ABOD),Feature Bagging,Histogram-base Outlier Detection (HBOS),Isolation Forest,K Nearest Neighbors (KNN),Average KNN,Local Outlier Factor (LOF),Minimum Covariance Determinant (MCD),One-class SVM (OCSVM)
sd,0.0,0.049315,0.329749,0.378738,0.075,0.067485,0.055249,0.014151,0.273292
mad,0.0,0.044406,0.0,0.35206,0.067157,0.065669,0.044406,0.137566,0.31746
iqr,0.0,0.051118,0.333333,0.377104,0.095,0.088235,0.045662,0.039301,0.311966
default-f1,0.063262,0.038503,0.106749,0.107438,0.078978,0.080851,0.039384,0.107438,0.107438
auc,0.935419,0.935419,0.935419,0.935419,0.935419,0.935419,0.935419,0.935419,0.935419
disagreement,0.044571,0.050609,0.129032,0.034884,0.050808,0.045667,0.047348,0.016949,0.047059


Unnamed: 0,Angle-based Outlier Detector (ABOD),Feature Bagging,Histogram-base Outlier Detection (HBOS),Isolation Forest,K Nearest Neighbors (KNN),Average KNN,Local Outlier Factor (LOF),Minimum Covariance Determinant (MCD),One-class SVM (OCSVM)
sd,311.80462,1.369033,2.242112,0.099869,0.522285,0.375395,1.38008,221.072323,155.622547
mad,118.411416,0.187195,13.165767,0.109562,0.234205,0.165562,0.185224,49.160122,134.813397
iqr,103.019777,1.255896,2.153678,0.101501,0.489459,0.349111,1.254587,156.646199,131.466936
disagreement,-1076.374919,1.173092,7.809122,0.150673,0.182117,0.104668,1.17456,198.799024,249.623277





musk.mat
149067441.80827743


Unnamed: 0,Angle-based Outlier Detector (ABOD),Feature Bagging,Histogram-base Outlier Detection (HBOS),Isolation Forest,K Nearest Neighbors (KNN),Average KNN,Local Outlier Factor (LOF),Minimum Covariance Determinant (MCD),One-class SVM (OCSVM)
sd,0.0,0.020619,0.892473,1.0,0.013514,0.016667,0.020305,0.594203,0.0
mad,0.0,0.061412,0.061412,0.934066,0.061412,0.03655,0.061412,0.128647,0.103761
iqr,0.0,0.022727,0.905473,1.0,0.0,0.017094,0.023077,0.705455,0.081855
default-f1,0.007491,0.04375,0.146747,0.146747,0.042989,0.010101,0.043512,0.146747,0.093868
auc,0.572822,0.572822,0.572822,0.572822,0.572822,0.572822,0.572822,0.572822,0.572822
disagreement,0.061412,0.058315,0.355932,0.962567,0.036866,0.04345,0.059588,0.916201,0.05972


Unnamed: 0,Angle-based Outlier Detector (ABOD),Feature Bagging,Histogram-base Outlier Detection (HBOS),Isolation Forest,K Nearest Neighbors (KNN),Average KNN,Local Outlier Factor (LOF),Minimum Covariance Determinant (MCD),One-class SVM (OCSVM)
sd,3.334232e-05,1.114383,536.565584,0.063175,617.03542,450.480701,1.11695,67978.941143,0.000687
mad,7.494205e-12,0.087943,3.516014,0.076037,208.25127,163.526302,0.089887,664.161018,0.000246
iqr,4.419785e-12,1.10072,536.156103,0.062836,629.591654,455.538274,1.102385,13438.225196,0.000364
disagreement,-0.0005102837,0.979881,538.523343,0.073027,364.882173,147.298051,0.974501,32467.576652,-0.000384





thyroid.mat
nan


Unnamed: 0,Angle-based Outlier Detector (ABOD),Feature Bagging,Histogram-base Outlier Detection (HBOS),Isolation Forest,K Nearest Neighbors (KNN),Average KNN,Local Outlier Factor (LOF),Minimum Covariance Determinant (MCD),One-class SVM (OCSVM)
sd,0.0,0.0,0.49789,0.563025,0.223529,0.218391,0.232804,0.288288,0.1375
mad,0.0,0.048124,0.021277,0.459459,0.135015,0.145136,0.048124,0.192149,0.161943
iqr,0.0,0.276316,0.42515,0.459459,0.357309,0.340741,0.244318,0.283019,0.169643
default-f1,0.0,0.121331,0.114178,0.116105,0.128169,0.181263,0.118029,0.116105,0.106117
auc,0.843728,0.843728,0.843728,0.843728,0.843728,0.843728,0.843728,0.843728,0.843728
disagreement,0.246628,0.0,0.056655,0.05075,0.049232,0.050516,0.048704,0.051681,0.059349


Unnamed: 0,Angle-based Outlier Detector (ABOD),Feature Bagging,Histogram-base Outlier Detection (HBOS),Isolation Forest,K Nearest Neighbors (KNN),Average KNN,Local Outlier Factor (LOF),Minimum Covariance Determinant (MCD),One-class SVM (OCSVM)
sd,,1887924.0,-2.193003,0.168693,0.150542,0.102504,1.65911,16606.323006,60.786809
mad,,0.2420732,7.300475,0.114789,0.046078,0.032952,0.297262,22.489851,40.665942
iqr,,1.345631,-4.531328,0.115001,0.087642,0.06176,1.412395,46.232761,43.769728
disagreement,-4756.197106,7245102.0,-14.313408,-0.049913,0.010667,0.008481,0.972222,1.384877,-9.124047





vowels.mat
954.5834676501795


Unnamed: 0,Angle-based Outlier Detector (ABOD),Feature Bagging,Histogram-base Outlier Detection (HBOS),Isolation Forest,K Nearest Neighbors (KNN),Average KNN,Local Outlier Factor (LOF),Minimum Covariance Determinant (MCD),One-class SVM (OCSVM)
sd,0.0,0.3,0.0625,0.112676,0.516129,0.62069,0.381818,0.022727,0.246575
mad,0.0,0.066401,0.066401,0.113208,0.084746,0.0815,0.066401,0.078704,0.166667
iqr,0.0,0.372093,0.0,0.129032,0.571429,0.623656,0.353591,0.024096,0.25
default-f1,0.168675,0.163132,0.098101,0.126582,0.19685,0.494253,0.163132,0.116904,0.123418
auc,0.778336,0.778336,0.778336,0.778336,0.778336,0.778336,0.778336,0.778336,0.778336
disagreement,0.066401,0.069252,0.070028,0.083904,0.067889,0.066578,0.070822,0.071276,0.090909


Unnamed: 0,Angle-based Outlier Detector (ABOD),Feature Bagging,Histogram-base Outlier Detection (HBOS),Isolation Forest,K Nearest Neighbors (KNN),Average KNN,Local Outlier Factor (LOF),Minimum Covariance Determinant (MCD),One-class SVM (OCSVM)
sd,3.207885,1.337367,22.260395,0.073847,2.319503,1.543594,1.356429,126.136528,61.511949
mad,0.419137,0.179155,6.138666,0.094352,1.041342,0.676642,0.181062,55.56827,68.029485
iqr,0.381311,1.238609,23.071796,0.080765,2.226631,1.479408,1.24207,129.851724,63.137615
disagreement,-25.567397,0.975127,14.714639,-0.029552,0.793143,0.40574,0.975847,5.781895,-9.261919





optdigits.mat
19869.46443862503


Unnamed: 0,Angle-based Outlier Detector (ABOD),Feature Bagging,Histogram-base Outlier Detection (HBOS),Isolation Forest,K Nearest Neighbors (KNN),Average KNN,Local Outlier Factor (LOF),Minimum Covariance Determinant (MCD),One-class SVM (OCSVM)
sd,0.0,0.112676,0.077348,0.007843,0.043478,0.017094,0.097561,0.0,0.0
mad,0.0,0.055908,0.055908,0.01,0.056001,0.054266,0.055908,0.008674,0.0
iqr,0.0,0.107784,0.045977,0.008,0.043478,0.017544,0.097143,0.0,0.0
default-f1,0.065827,0.067003,0.12254,0.100179,0.045929,0.012739,0.069885,0.025034,0.055456
auc,0.503669,0.503669,0.503669,0.503669,0.503669,0.503669,0.503669,0.503669,0.503669
disagreement,0.055619,0.059305,0.069691,0.046243,0.052138,0.0496,0.054445,0.0,0.057256


Unnamed: 0,Angle-based Outlier Detector (ABOD),Feature Bagging,Histogram-base Outlier Detection (HBOS),Isolation Forest,K Nearest Neighbors (KNN),Average KNN,Local Outlier Factor (LOF),Minimum Covariance Determinant (MCD),One-class SVM (OCSVM)
sd,1.896956e-06,1.207941,104.632771,0.067921,26.927032,20.240315,1.209276,18378.465125,0.00605
mad,1.139349e-06,0.147875,15.983631,0.079216,9.687946,7.495566,0.142302,92.361639,0.000499
iqr,7.719795e-07,1.188656,105.055236,0.069294,26.957877,20.375743,1.185125,198.466578,0.000486
disagreement,-2.788963e-06,1.025347,86.369253,0.043127,14.491377,10.409582,1.009929,99660.375433,-9.4e-05





breastw.mat
nan


Unnamed: 0,Angle-based Outlier Detector (ABOD),Feature Bagging,Histogram-base Outlier Detection (HBOS),Isolation Forest,K Nearest Neighbors (KNN),Average KNN,Local Outlier Factor (LOF),Minimum Covariance Determinant (MCD),One-class SVM (OCSVM)
sd,0.0,0.0,0.0,0.0,0.040984,0.024793,0.015686,0.196226,0.0
mad,0.0,0.518438,0.912206,0.0,0.319444,0.228782,0.518438,0.905303,0.0
iqr,0.0,0.0,0.0,0.0,0.0,0.0,0.08254,0.253623,0.0
default-f1,0.0,0.090551,0.925781,0.929688,0.93254,0.89172,0.25098,0.929688,0.921875
auc,0.978693,0.978693,0.978693,0.978693,0.978693,0.978693,0.978693,0.978693,0.978693
disagreement,0.0,0.513939,0.561692,0.643338,0.518438,0.518438,0.501726,0.669468,0.685796


Unnamed: 0,Angle-based Outlier Detector (ABOD),Feature Bagging,Histogram-base Outlier Detection (HBOS),Isolation Forest,K Nearest Neighbors (KNN),Average KNN,Local Outlier Factor (LOF),Minimum Covariance Determinant (MCD),One-class SVM (OCSVM)
sd,,2490202000.0,28.432986,0.219683,8.693649,6.275332,2.482509,9582.671141,22.387171
mad,,0.4765935,16.747104,0.315274,6.290139,4.946379,0.285846,121.58117,17.468753
iqr,,285714300.0,37.765205,0.361561,11.989579,8.269521,1.718182,5979.586155,35.880585
disagreement,,0.9947452,5.885972,-0.121508,0.0,0.0,0.980717,4.159465,-1.57448





satellite.mat
5777.508700224842


Unnamed: 0,Angle-based Outlier Detector (ABOD),Feature Bagging,Histogram-base Outlier Detection (HBOS),Isolation Forest,K Nearest Neighbors (KNN),Average KNN,Local Outlier Factor (LOF),Minimum Covariance Determinant (MCD),One-class SVM (OCSVM)
sd,0.0,0.089405,0.192035,0.166294,0.141641,0.13022,0.097778,0.259685,0.038871
mad,0.0,0.480699,0.480699,0.44553,0.505898,0.495766,0.480699,0.689447,0.125242
iqr,0.0,0.144061,0.443024,0.408951,0.207101,0.202521,0.146218,0.595829,0.074894
default-f1,0.404733,0.408678,0.577007,0.568764,0.484753,0.428301,0.403536,0.645987,0.340564
auc,0.48731,0.48731,0.48731,0.48731,0.48731,0.48731,0.48731,0.48731,0.48731
disagreement,0.00878,0.368105,0.572622,0.263848,0.014606,0.007824,0.390267,0.000982,0.450473


Unnamed: 0,Angle-based Outlier Detector (ABOD),Feature Bagging,Histogram-base Outlier Detection (HBOS),Isolation Forest,K Nearest Neighbors (KNN),Average KNN,Local Outlier Factor (LOF),Minimum Covariance Determinant (MCD),One-class SVM (OCSVM)
sd,2.537253e-07,1.469135,115.616407,0.127665,48.189977,35.132628,1.476722,1014.564097,0.00049
mad,2.806045e-07,0.280117,2.915573,0.091906,19.419862,13.80806,0.278843,65.379975,0.000202
iqr,1.749335e-07,1.395262,114.346316,0.096633,43.101765,31.55853,1.394121,157.524244,0.000389
disagreement,-9.217186e-10,1.16421,112.880053,0.113185,71.119618,53.931143,1.119128,2390.057761,-0.0002





lympho.mat
74.85559565559667


Unnamed: 0,Angle-based Outlier Detector (ABOD),Feature Bagging,Histogram-base Outlier Detection (HBOS),Isolation Forest,K Nearest Neighbors (KNN),Average KNN,Local Outlier Factor (LOF),Minimum Covariance Determinant (MCD),One-class SVM (OCSVM)
sd,0.0,0.666667,0.8,0.909091,0.6,0.6,0.666667,0.5,0.4
mad,0.0,0.077922,0.0,0.909091,0.077922,0.077922,0.077922,0.218182,0.545455
iqr,0.0,0.714286,0.857143,0.769231,0.5,0.533333,0.666667,0.5,0.545455
default-f1,0.166667,0.1875,0.184615,0.184615,0.342857,0.5,0.184615,0.184615,0.184615
auc,0.9777,0.9777,0.9777,0.9777,0.9777,0.9777,0.9777,0.9777,0.9777
disagreement,0.296296,0.285714,0.8,0.909091,0.47619,0.555556,0.285714,0.235294,0.444444


Unnamed: 0,Angle-based Outlier Detector (ABOD),Feature Bagging,Histogram-base Outlier Detection (HBOS),Isolation Forest,K Nearest Neighbors (KNN),Average KNN,Local Outlier Factor (LOF),Minimum Covariance Determinant (MCD),One-class SVM (OCSVM)
sd,0.006399,1.218966,-10.533459,0.125367,3.669084,2.696982,1.21808,47.656101,7.909586
mad,0.004032,0.112745,11.054582,0.110841,0.949257,0.779551,0.08698,18.908183,7.561092
iqr,0.003421,1.163317,-12.414658,0.098495,3.260276,2.429143,1.142611,42.826684,6.747274
disagreement,-0.000584,1.344739,-9.720204,0.127569,3.162278,2.251504,1.358477,20.117246,9.317072





annthyroid.mat
nan


Unnamed: 0,Angle-based Outlier Detector (ABOD),Feature Bagging,Histogram-base Outlier Detection (HBOS),Isolation Forest,K Nearest Neighbors (KNN),Average KNN,Local Outlier Factor (LOF),Minimum Covariance Determinant (MCD),One-class SVM (OCSVM)
sd,0.0,0.003077,0.23057,0.29,0.141141,0.140299,0.189266,0.112478,0.085672
mad,0.0,0.138092,0.0,0.314453,0.243113,0.270293,0.138092,0.415833,0.093252
iqr,0.0,0.244262,0.258333,0.314066,0.293176,0.297723,0.299475,0.442467,0.086842
default-f1,0.0,0.279767,0.160893,0.253661,0.241915,0.278412,0.222903,0.31283,0.135911
auc,0.546726,0.546726,0.546726,0.546726,0.546726,0.546726,0.546726,0.546726,0.546726
disagreement,0.28833,0.310317,0.141026,0.208723,0.291931,0.29497,0.309153,0.242424,0.141844


Unnamed: 0,Angle-based Outlier Detector (ABOD),Feature Bagging,Histogram-base Outlier Detection (HBOS),Isolation Forest,K Nearest Neighbors (KNN),Average KNN,Local Outlier Factor (LOF),Minimum Covariance Determinant (MCD),One-class SVM (OCSVM)
sd,,1141267.0,-10.1728,0.166302,0.05494,0.037141,1.666615,13368.499959,65.158383
mad,,0.2137703,6.984987,0.119473,0.014505,0.011141,0.256056,22.575555,55.053515
iqr,,1.315296,-12.504815,0.117284,0.02749,0.019581,1.333828,45.562463,58.372769
disagreement,-474287.35049,1.129626,-5.87536,0.230922,0.02939,0.019101,1.241332,2132.760259,-1.932858





ionosphere.mat
303.61447942619805


Unnamed: 0,Angle-based Outlier Detector (ABOD),Feature Bagging,Histogram-base Outlier Detection (HBOS),Isolation Forest,K Nearest Neighbors (KNN),Average KNN,Local Outlier Factor (LOF),Minimum Covariance Determinant (MCD),One-class SVM (OCSVM)
sd,0.0,0.160584,0.0,0.015748,0.090909,0.061538,0.160584,0.25,0.105263
mad,0.0,0.528302,0.0,0.0,0.813084,0.807512,0.528302,0.844106,0.2
iqr,0.0,0.25,0.0,0.0,0.25,0.273973,0.273973,0.56,0.225352
default-f1,0.808824,0.722433,0.518797,0.676692,0.837736,0.882353,0.722433,0.838951,0.62406
auc,0.762116,0.762116,0.762116,0.762116,0.762116,0.762116,0.762116,0.762116,0.762116
disagreement,0.528302,0.54902,0.530526,0.553846,0.529412,0.531646,0.544276,0.531646,0.615873


Unnamed: 0,Angle-based Outlier Detector (ABOD),Feature Bagging,Histogram-base Outlier Detection (HBOS),Isolation Forest,K Nearest Neighbors (KNN),Average KNN,Local Outlier Factor (LOF),Minimum Covariance Determinant (MCD),One-class SVM (OCSVM)
sd,41.957608,3.257485,49.304671,0.194795,4.988323,3.89542,3.414739,8923.718487,44.6963
mad,0.216793,0.8681,57.899196,0.258079,2.226241,1.703185,0.903405,314.70733,39.002284
iqr,1.430057,2.777351,60.374703,0.213668,4.544217,3.498421,2.889523,4442.131834,38.153848
disagreement,-216.545792,0.983547,-28.914301,-0.09971,0.193972,0.144186,0.983007,3.151805,-0.56808





wbc.mat
171.7431074806446


Unnamed: 0,Angle-based Outlier Detector (ABOD),Feature Bagging,Histogram-base Outlier Detection (HBOS),Isolation Forest,K Nearest Neighbors (KNN),Average KNN,Local Outlier Factor (LOF),Minimum Covariance Determinant (MCD),One-class SVM (OCSVM)
sd,0.0,0.457143,0.611111,0.555556,0.432432,0.388889,0.470588,0.344828,0.413793
mad,0.0,0.105263,0.173913,0.518519,0.124629,0.132075,0.105263,0.28777,0.491803
iqr,0.0,0.491228,0.612245,0.518519,0.54902,0.528302,0.491228,0.383562,0.535714
default-f1,0.222222,0.251497,0.244186,0.244186,0.256098,0.391304,0.254545,0.244186,0.232558
auc,0.938242,0.938242,0.938242,0.938242,0.938242,0.938242,0.938242,0.938242,0.938242
disagreement,0.16,0.357143,0.625,0.37037,0.086957,0.086957,0.344828,0.387097,0.166667


Unnamed: 0,Angle-based Outlier Detector (ABOD),Feature Bagging,Histogram-base Outlier Detection (HBOS),Isolation Forest,K Nearest Neighbors (KNN),Average KNN,Local Outlier Factor (LOF),Minimum Covariance Determinant (MCD),One-class SVM (OCSVM)
sd,25.25723,1.757174,-5.566513,0.160262,0.791464,0.574255,1.743035,3473.948255,8.68793
mad,19.775763,0.297367,33.355934,0.095124,0.250513,0.188711,0.31249,112.411913,2.633241
iqr,19.581838,1.458776,-17.757732,0.101885,0.62042,0.446922,1.459554,276.077308,2.924901
disagreement,-0.005739,2.010306,-2.805404,0.232212,1.205545,0.924275,1.952617,2465.535396,18.537171





glass.mat
200.92848963822186


Unnamed: 0,Angle-based Outlier Detector (ABOD),Feature Bagging,Histogram-base Outlier Detection (HBOS),Isolation Forest,K Nearest Neighbors (KNN),Average KNN,Local Outlier Factor (LOF),Minimum Covariance Determinant (MCD),One-class SVM (OCSVM)
sd,0.0,0.0,0.0,0.125,0.117647,0.117647,0.133333,0.0,0.105263
mad,0.0,0.080717,0.0,0.111111,0.258065,0.222222,0.080717,0.202247,0.148148
iqr,0.0,0.228571,0.0,0.125,0.222222,0.227273,0.181818,0.040816,0.083333
default-f1,0.193548,0.130435,0.084211,0.105263,0.206897,0.235294,0.131868,0.189474,0.105263
auc,0.593496,0.593496,0.593496,0.593496,0.593496,0.593496,0.593496,0.593496,0.593496
disagreement,0.181818,0.3,0.086957,0.125,0.095238,0.111111,0.255319,0.0,0.095238


Unnamed: 0,Angle-based Outlier Detector (ABOD),Feature Bagging,Histogram-base Outlier Detection (HBOS),Isolation Forest,K Nearest Neighbors (KNN),Average KNN,Local Outlier Factor (LOF),Minimum Covariance Determinant (MCD),One-class SVM (OCSVM)
sd,135.29737,2.795689,6.647519,0.190016,3.333042,2.212136,3.093281,7125.607455,33.08249
mad,29.490408,0.629727,15.007944,0.178628,0.838384,0.43054,0.675138,35.469169,12.414798
iqr,34.417478,1.797607,7.558638,0.188971,1.584989,0.987213,1.817078,485.683631,29.18769
disagreement,-2.5e-05,1.713176,2.376251,0.190864,2.844258,2.207048,1.504148,19065.495843,32.469447





satimage-2.mat
4208.855669382159


Unnamed: 0,Angle-based Outlier Detector (ABOD),Feature Bagging,Histogram-base Outlier Detection (HBOS),Isolation Forest,K Nearest Neighbors (KNN),Average KNN,Local Outlier Factor (LOF),Minimum Covariance Determinant (MCD),One-class SVM (OCSVM)
sd,0.0,0.074627,0.47619,0.589286,0.292035,0.270742,0.07971,0.611354,0.0
mad,0.0,0.024174,0.024174,0.409496,0.03164,0.027434,0.024174,0.077216,0.0
iqr,0.0,0.06015,0.29902,0.350254,0.203297,0.185792,0.061758,0.194787,0.0
default-f1,0.041651,0.020539,0.058528,0.059365,0.06312,0.111226,0.021432,0.059365,0.020067
auc,0.420907,0.420907,0.420907,0.420907,0.420907,0.420907,0.420907,0.420907,0.420907
disagreement,0.090476,0.03907,0.027778,0.027778,0.233766,0.189055,0.037234,0.40404,0.022468


Unnamed: 0,Angle-based Outlier Detector (ABOD),Feature Bagging,Histogram-base Outlier Detection (HBOS),Isolation Forest,K Nearest Neighbors (KNN),Average KNN,Local Outlier Factor (LOF),Minimum Covariance Determinant (MCD),One-class SVM (OCSVM)
sd,2.482073e-07,1.481917,114.278031,0.110183,46.146462,33.522359,1.48532,483.106733,0.0005
mad,2.813051e-07,0.272503,2.910328,0.085146,18.662439,13.187549,0.268408,54.5633,0.000448
iqr,1.756973e-07,1.389678,113.270786,0.077847,40.879043,29.914119,1.379072,111.582013,0.000523
disagreement,-2.443549e-08,1.219851,119.127767,0.300472,42.544095,29.379102,1.209861,1271.701557,-0.000365





vertebral.mat
150.86201952862595


Unnamed: 0,Angle-based Outlier Detector (ABOD),Feature Bagging,Histogram-base Outlier Detection (HBOS),Isolation Forest,K Nearest Neighbors (KNN),Average KNN,Local Outlier Factor (LOF),Minimum Covariance Determinant (MCD),One-class SVM (OCSVM)
sd,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
mad,0.0,0.222222,0.222222,0.0,0.114286,0.079365,0.222222,0.027397,0.0
iqr,0.0,0.04,0.0,0.0,0.0,0.0,0.041667,0.0,0.0
default-f1,0.125984,0.183333,0.111111,0.111111,0.088496,0.025316,0.177419,0.126984,0.269841
auc,0.625794,0.625794,0.625794,0.625794,0.625794,0.625794,0.625794,0.625794,0.625794
disagreement,0.0,0.0,0.039216,0.0,0.0,0.0,0.0,0.0,0.264706


Unnamed: 0,Angle-based Outlier Detector (ABOD),Feature Bagging,Histogram-base Outlier Detection (HBOS),Isolation Forest,K Nearest Neighbors (KNN),Average KNN,Local Outlier Factor (LOF),Minimum Covariance Determinant (MCD),One-class SVM (OCSVM)
sd,3.971734e-05,2.152736,19.228757,0.143777,71.473083,53.479981,2.241742,138.602298,0.000629
mad,1.173622e-05,0.235108,0.906859,0.102229,16.77562,12.55672,0.228372,16.113625,0.000608
iqr,1.605367e-05,1.300725,19.236822,0.117639,34.385352,24.83845,1.29226,22.683827,0.000501
disagreement,-1.188478e-13,7.026778,18.94898,0.375282,318.116255,245.87457,7.624551,742.927464,0.000124







表现好的