In [1]:
import pandas as pd

import warnings
warnings.filterwarnings("ignore")

import os
os.chdir('..')

# import the necessary package
from adbench.datasets.data_generator import DataGenerator
from adbench.myutils import Utils

datagenerator = DataGenerator()
utils = Utils()

In [2]:
from adbench.baseline.PyOD import PYOD
from adbench.baseline.DevNet.run import DevNet
from adbench.baseline.Supervised import supervised
from adbench.baseline.REPEN.run import REPEN
from adbench.baseline.DevNet.run import DevNet
from adbench.baseline.PReNet.run import PReNet
from adbench.baseline.FEAWAD.run import FEAWAD
from adbench.baseline.DAGMM.run import DAGMM # Unsup
from adbench.baseline.DeepSAD.src.run import DeepSAD # Semi


# model_dict = {'DAGMM':DAGMM, 'COPOD': PYOD, 'ECOD': PYOD, 'XGBOD':PYOD, 'DeepSAD':DeepSAD, 'DevNet':DevNet, 'PReNet':PReNet, 'FEAWAD':FEAWAD}

# GANomaly DeepSAD REPEN DevNet PReNet FEAWAD XGBOD NB SVM MLP ResNet FTTrans
# former RF LGB XGB CatB

# dataset and model list / dict
dataset_list = ['1_ALOI', '6_cardio', '12_fault', '18_Ionosphere', '29_Pima']
 
model_dict = {'DeepSAD':DeepSAD, 'XGBOD':PYOD, 'CatB': supervised, 'COPOD': PYOD, 'ECOD': PYOD, 'RF': supervised, 'SVM': supervised, 'IForest':PYOD, 'PReNet':PReNet, 'DAGMM':DAGMM, }

# save the results
df_AUCROC = pd.DataFrame(data=None, index=dataset_list, columns = model_dict.keys())
df_AUCPR = pd.DataFrame(data=None, index=dataset_list, columns = model_dict.keys())

In [3]:
# seed for reproducible results
seed = 42

for dataset in dataset_list:
    '''
    la: ratio of labeled anomalies, from 0.0 to 1.0
    realistic_synthetic_mode: types of synthetic anomalies, can be local, global, dependency or cluster
    noise_type: inject data noises for testing model robustness, can be duplicated_anomalies, irrelevant_features or label_contamination
    '''
    
    # import the dataset
    datagenerator.dataset = dataset # specify the dataset name
    data = datagenerator.generator(la=0.1, realistic_synthetic_mode=None, noise_type=None) # only 10% labeled anomalies are available
    
    for name, clf in model_dict.items():
        
        clf = clf(seed=seed, model_name=name)
        clf = clf.fit(X_train=data['X_train'], y_train=data['y_train'])

        if name =="DAGMM":
            score = clf.predict_score(data['X_train'], data['X_test'])
        else:
            score = clf.predict_score(data['X_test'])

        # evaluation
        result = utils.metric(y_true=data['y_test'], y_score=score)
        
        # save results
        df_AUCROC.loc[dataset, name] = result['aucroc']
        df_AUCPR.loc[dataset, name] = result['aucpr']

subsampling for dataset 1_ALOI...
current noise type: None
{'Samples': 10000, 'Features': 27, 'Anomalies': 302, 'Anomalies Ratio(%)': 3.02}
best param: None
Learning rate set to 0.023648
0:	learn: 0.6279188	total: 173ms	remaining: 2m 52s
1:	learn: 0.5683493	total: 193ms	remaining: 1m 36s
2:	learn: 0.5148908	total: 207ms	remaining: 1m 8s
3:	learn: 0.4640346	total: 215ms	remaining: 53.6s
4:	learn: 0.4220544	total: 224ms	remaining: 44.5s
5:	learn: 0.3827438	total: 232ms	remaining: 38.4s
6:	learn: 0.3480773	total: 239ms	remaining: 33.9s
7:	learn: 0.3174367	total: 246ms	remaining: 30.5s
8:	learn: 0.2882729	total: 253ms	remaining: 27.9s
9:	learn: 0.2630682	total: 260ms	remaining: 25.7s
10:	learn: 0.2400025	total: 267ms	remaining: 24s
11:	learn: 0.2190697	total: 273ms	remaining: 22.4s
12:	learn: 0.2011648	total: 280ms	remaining: 21.2s
13:	learn: 0.1841713	total: 286ms	remaining: 20.2s
14:	learn: 0.1694869	total: 293ms	remaining: 19.3s
15:	learn: 0.1558192	total: 301ms	remaining: 18.5s
16:	lea

In [4]:
df_AUCROC

Unnamed: 0,DeepSAD,XGBOD,CatB,COPOD,ECOD,RF,SVM,IForest,PReNet,DAGMM
1_ALOI,0.548098,0.721894,0.510126,0.475179,0.512396,0.548935,0.50469,0.496581,0.445911,0.512215
6_cardio,0.899852,0.963669,0.983827,0.928363,0.942827,0.910539,0.930071,0.944193,0.986941,0.704567
12_fault,0.752449,0.720317,0.773901,0.459889,0.481212,0.720297,0.64754,0.567306,0.736,0.570892
18_Ionosphere,0.944976,0.939091,0.940383,0.809378,0.750048,0.866124,0.88512,0.860335,0.809904,0.8411
29_Pima,0.643988,0.702629,0.771536,0.656143,0.592756,0.641349,0.594305,0.651639,0.757494,0.544768


In [5]:
df_AUCPR

Unnamed: 0,DeepSAD,XGBOD,CatB,COPOD,ECOD,RF,SVM,IForest,PReNet,DAGMM
1_ALOI,0.052038,0.067865,0.03411,0.030852,0.033768,0.038262,0.044376,0.031745,0.028944,0.035426
6_cardio,0.51458,0.794947,0.909086,0.604146,0.592825,0.776392,0.657865,0.615718,0.914535,0.20108
12_fault,0.618186,0.572156,0.670142,0.31601,0.334607,0.570844,0.520713,0.408845,0.639592,0.427181
18_Ionosphere,0.93691,0.912974,0.89672,0.709213,0.658163,0.836762,0.802022,0.818807,0.839171,0.722788
29_Pima,0.509887,0.586728,0.625059,0.541231,0.486831,0.523493,0.475128,0.517859,0.641025,0.398701
