In [22]:
import pandas as pd

import warnings
warnings.filterwarnings("ignore")

import os
os.chdir('..')

# import the necessary package
from adbench.datasets.data_generator import DataGenerator
from adbench.myutils import Utils

datagenerator = DataGenerator()
utils = Utils()

In [27]:
from adbench.baseline.PyOD import PYOD
from adbench.baseline.DevNet.run import DevNet
from adbench.baseline.Supervised import supervised
from adbench.baseline.REPEN.run import REPEN
from adbench.baseline.DevNet.run import DevNet
from adbench.baseline.PReNet.run import PReNet
from adbench.baseline.FEAWAD.run import FEAWAD
from adbench.baseline.DAGMM.run import DAGMM # Unsup
from adbench.baseline.DeepSAD.src.run import DeepSAD # Semi


# model_dict = {'DAGMM':DAGMM, 'COPOD': PYOD, 'ECOD': PYOD, 'XGBOD':PYOD, 'DeepSAD':DeepSAD, 'DevNet':DevNet, 'PReNet':PReNet, 'FEAWAD':FEAWAD}

# GANomaly DeepSAD REPEN DevNet PReNet FEAWAD XGBOD NB SVM MLP ResNet FTTrans
# former RF LGB XGB CatB

# dataset and model list / dict
dataset_list = ['1_ALOI', '6_cardio', '25_musk', '18_Ionosphere', '29_Pima']
 
model_dict = {'DeepSAD':DeepSAD, 'XGBOD':PYOD, 'CatB': supervised, 'COPOD': PYOD, 'ECOD': PYOD, 'RF': supervised, 'SVM': supervised, 'IForest':PYOD, 'PReNet':PReNet, 'DAGMM':DAGMM, }

# save the results
df_AUCROC = pd.DataFrame(data=None, index=dataset_list, columns = model_dict.keys())
df_AUCPR = pd.DataFrame(data=None, index=dataset_list, columns = model_dict.keys())

In [28]:
# seed for reproducible results
seed = 42

for dataset in dataset_list:
    '''
    la: ratio of labeled anomalies, from 0.0 to 1.0
    realistic_synthetic_mode: types of synthetic anomalies, can be local, global, dependency or cluster
    noise_type: inject data noises for testing model robustness, can be duplicated_anomalies, irrelevant_features or label_contamination
    '''
    
    # import the dataset
    datagenerator.dataset = dataset # specify the dataset name
    data = datagenerator.generator(la=0.1, realistic_synthetic_mode=None, noise_type=None) # only 10% labeled anomalies are available
    
    for name, clf in model_dict.items():
        
        clf = clf(seed=seed, model_name=name)
        clf = clf.fit(X_train=data['X_train'], y_train=data['y_train'])

        if name =="DAGMM":
            score = clf.predict_score(data['X_train'], data['X_test'])
        else:
            score = clf.predict_score(data['X_test'])

        # evaluation
        result = utils.metric(y_true=data['y_test'], y_score=score)
        
        # save results
        df_AUCROC.loc[dataset, name] = result['aucroc']
        df_AUCPR.loc[dataset, name] = result['aucpr']

current noise type: None
{'Samples': 5263, 'Features': 512, 'Anomalies': 263, 'Anomalies Ratio(%)': 5.0}
best param: None
Learning rate set to 0.017978
0:	learn: 0.6487284	total: 61.1ms	remaining: 1m 1s
1:	learn: 0.6065474	total: 113ms	remaining: 56.5s
2:	learn: 0.5659612	total: 165ms	remaining: 54.8s
3:	learn: 0.5308481	total: 215ms	remaining: 53.6s
4:	learn: 0.4962053	total: 266ms	remaining: 52.8s
5:	learn: 0.4652204	total: 318ms	remaining: 52.6s
6:	learn: 0.4345968	total: 371ms	remaining: 52.7s
7:	learn: 0.4075550	total: 421ms	remaining: 52.2s
8:	learn: 0.3817997	total: 468ms	remaining: 51.6s
9:	learn: 0.3575882	total: 515ms	remaining: 51s
10:	learn: 0.3354327	total: 562ms	remaining: 50.5s
11:	learn: 0.3157009	total: 611ms	remaining: 50.3s
12:	learn: 0.2957087	total: 657ms	remaining: 49.9s
13:	learn: 0.2780156	total: 704ms	remaining: 49.6s
14:	learn: 0.2609073	total: 752ms	remaining: 49.4s
15:	learn: 0.2457197	total: 799ms	remaining: 49.1s
16:	learn: 0.2314872	total: 844ms	remaining

In [29]:
df_AUCROC

Unnamed: 0,DeepSAD,XGBOD,CatB,COPOD,ECOD,RF,SVM,IForest,PReNet,DAGMM
CIFAR10_2,0.628397,0.686093,0.635814,0.581165,0.599443,0.632793,0.24146,0.612152,0.700759,0.57016
1_ALOI,0.548098,0.721894,0.510126,0.475179,0.512396,0.548935,0.50469,0.496581,0.445911,0.512215
6_cardio,0.899852,0.963669,0.983827,0.928363,0.942827,0.910539,0.930071,0.944193,0.986941,0.704567
25_musk,0.96234,0.999923,1.0,0.95525,0.964549,0.930686,0.640643,1.0,1.0,0.772104
18_Ionosphere,0.944976,0.939091,0.940383,0.809378,0.750048,0.866124,0.88512,0.860335,0.809904,0.8411
29_Pima,0.643988,0.702629,0.771536,0.656143,0.592756,0.641349,0.594305,0.651639,0.757494,0.544768


In [30]:
df_AUCPR

Unnamed: 0,DeepSAD,XGBOD,CatB,COPOD,ECOD,RF,SVM,IForest,PReNet,DAGMM
CIFAR10_2,0.080546,0.173721,0.115081,0.062513,0.066072,0.095619,0.030506,0.069597,0.201825,0.074435
1_ALOI,0.052038,0.067865,0.03411,0.030852,0.033768,0.038262,0.044376,0.031745,0.028944,0.035426
6_cardio,0.51458,0.794947,0.909086,0.604146,0.592825,0.776392,0.657865,0.615718,0.914535,0.20108
25_musk,0.792046,0.997775,1.0,0.43955,0.582992,0.859032,0.341239,1.0,1.0,0.287882
18_Ionosphere,0.93691,0.912974,0.89672,0.709213,0.658163,0.836762,0.802022,0.818807,0.839171,0.722788
29_Pima,0.509887,0.586728,0.625059,0.541231,0.486831,0.523493,0.475128,0.517859,0.641025,0.398701
