# Run ADBench 
- Here we provide a demo for testing AD algorithms on the datasets proposed in ADBench.
- Feel free to evaluate any customized algorithm in ADBench.
- For reproducing the complete experiment results in ADBench, please run the code in the run.py file.

In [3]:
# import basic package
import os
import pandas as pd

import warnings
warnings.filterwarnings("ignore")

# import the necessary package
from data_generator import DataGenerator
from myutils import Utils

datagenerator = DataGenerator() # data generator
utils = Utils() # utils function

- We include all the datasets of ADBench in the "datasets" folder, as the "number_data_class.npz" filename. Please see the table in the markdown for details.
    - You can specify the dataset name by removing the filename ".npz" suffix in the data generator, e.g., "10_cover.npz" as "10_cover". 
    
    
- All the algorithms included in the ADBench are illustrated in the table of markdown.
    - You need to specify the model name when initialization, as some algorithms (e.g., supervised algorithms) are integrated in one class, please see the table in the markdown for details.
    - You can also test your own AD algorithms on our generated dataset, as long as the algorithm can output anomaly score for evaluation.

In [4]:
os.listdir('datasets/Classical')

['10_cover.npz',
 '11_donors.npz',
 '12_fault.npz',
 '13_fraud.npz',
 '14_glass.npz',
 '15_Hepatitis.npz',
 '16_http.npz',
 '17_InternetAds.npz',
 '18_Ionosphere.npz',
 '19_landsat.npz',
 '1_ALOI.npz',
 '20_letter.npz',
 '21_Lymphography.npz',
 '22_magic.gamma.npz',
 '23_mammography.npz',
 '24_mnist.npz',
 '25_musk.npz',
 '26_optdigits.npz',
 '27_PageBlocks.npz',
 '28_pendigits.npz',
 '29_Pima.npz',
 '2_annthyroid.npz',
 '30_satellite.npz',
 '31_satimage-2.npz',
 '32_shuttle.npz',
 '33_skin.npz',
 '34_smtp.npz',
 '35_SpamBase.npz',
 '36_speech.npz',
 '37_Stamps.npz',
 '38_thyroid.npz',
 '39_vertebral.npz',
 '3_backdoor.npz',
 '40_vowels.npz',
 '41_Waveform.npz',
 '42_WBC.npz',
 '43_WDBC.npz',
 '44_Wilt.npz',
 '45_wine.npz',
 '46_WPBC.npz',
 '47_yeast.npz',
 '4_breastw.npz',
 '5_campaign.npz',
 '6_cardio.npz',
 '7_Cardiotocography.npz',
 '8_celeba.npz',
 '9_census.npz']

In [5]:
from baseline.PyOD import PYOD
from baseline.DevNet.run import DevNet
from baseline.Supervised import supervised

# dataset and model list / dict
dataset_list = ['6_cardio', '25_musk', '26_optdigits', '36_speech', '40_vowels']
model_dict = {'IForest': PYOD, 'DeepSVDD': PYOD, 'DevNet': DevNet, 'RF': supervised, 'CatB': supervised}

# save the results
df_AUCROC = pd.DataFrame(data=None, index=dataset_list, columns = model_dict.keys())
df_AUCPR = pd.DataFrame(data=None, index=dataset_list, columns = model_dict.keys())

In [None]:
# seed for reproducible results
seed = 42

for dataset in dataset_list:
    '''
    la: ratio of labeled anomalies, from 0.0 to 1.0
    realistic_synthetic_mode: types of synthetic anomalies, can be local, global, dependency or cluster
    noise_type: inject data noises for testing model robustness, can be duplicated_anomalies, irrelevant_features or label_contamination
    '''
    
    # import the dataset
    datagenerator.dataset = dataset # specify the dataset name
    data = datagenerator.generator(la=0.1, realistic_synthetic_mode=None, noise_type=None) # only 10% labeled anomalies are available
    
    for name, clf in model_dict.items():
        # model initialization
        clf = clf(seed=seed, model_name=name)
        
        # training, for unsupervised models the y label will be discarded
        clf = clf.fit(X_train=data['X_train'], y_train=data['y_train'])
        
        # output predicted anomaly score on testing set
        score = clf.predict_score(data['X_test'])

        # evaluation
        result = utils.metric(y_true=data['y_test'], y_score=score)
        
        # save results
        df_AUCROC.loc[dataset, name] = result['aucroc']
        df_AUCPR.loc[dataset, name] = result['aucpr']

current noise type: None
{'Samples': 1831, 'Features': 21, 'Anomalies': 176, 'Anomalies Ratio(%)': 9.61}
best param: None
best param: None
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 21)]              0         
                                                                 
 dense_1 (Dense)             (None, 64)                1344      
                                                                 
 net_output (D

In [None]:
df_AUCROC

In [None]:
df_AUCPR