# Experimental Test Code:
- Code example for testing Anomaly Detection algorithms on Smart-Manufacturing datasets.
- Please refer to [ADBench](https://github.com/Minqi824/ADBench) package to use of additional algorithms or not manufacturing datasets.

In [None]:
# import basic package
import os
import pandas as pd
import numpy as np
import time

import warnings
warnings.filterwarnings("ignore")

# import the necessary package
from data_generator import DataGenerator
from myutils import Utils

# instiantiate datagenerator and util objects
datagenerator = DataGenerator() 
utils = Utils()

- We include all the datasets for Smart-Manufacturing in the "datasets" folder, as the "number_data.npz" filename. Please see the table in the markdown for details. You can specify the dataset name by removing the filename ".npz" suffix in the data generator, e.g., "88_GenesisPickPlace.npz" as "88_GenesisPickPlace". 
    
    
- All the algorithms included are explained in detail in the [ADBench](https://github.com/Minqi824/ADBench) resource.
    - You need to specify the model name when initialization, as some algorithms (e.g., supervised algorithms) are integrated in one class, please see [ADBench](https://github.com/Minqi824/ADBench) for details.
    - You can also test your own AD algorithms on the proposed datasets, as long as the algorithm can output anomaly score for evaluation.

In [None]:
os.listdir('datasets/Classical')

In [None]:
# import AD algorimths or class of algorithms to be used
from baseline.PyOD import PYOD
from baseline.DevNet.run import DevNet
from baseline.Supervised import supervised
from baseline.LSTMOD.LSTMOD import LSTMOutlierDetector
from baseline.GANomaly.run import GANomaly
from pyod.models.vae import VAE

# dataset and model list / dict
dataset_list = ['133_HighStorageSystem_anreal', '135_GenesisPickPlace_anreal'] # Add the datasets you want to test.

model_dict = {'CBLOF':PYOD,'OCSVM':PYOD,'HBOS':PYOD,'KNN':PYOD,'LOF':PYOD,'PCA':PYOD,'IForest': PYOD, # Classical
              'DeepSVDD': PYOD,'AutoEncoder': PYOD,'VAE':PYOD,  # Deep
              'LSTMOutlierDetector': LSTMOutlierDetector,'DevNet': DevNet,'GANomaly': GANomaly, # Deep
              'XGBOD':PYOD,'RF': supervised, 'CatB': supervised} # Supervised

# dataframes to save the results
df_AUCROC = pd.DataFrame(data=None, index=dataset_list, columns = model_dict.keys())
df_AUCPR = pd.DataFrame(data=None, index=dataset_list, columns = model_dict.keys())
df_TIMETRAIN = pd.DataFrame(data=None, index=dataset_list, columns = model_dict.keys())
df_TIMEINFER = pd.DataFrame(data=None, index=dataset_list, columns = model_dict.keys())

In [None]:
# seed for reproducible results
seed = 42

for dataset in dataset_list:
    '''
    la: ratio of labeled anomalies, from 0.0 to 1.0
    realistic_synthetic_mode: types of synthetic anomalies, can be local, global, dependency or cluster
    noise_type: inject data noises for testing model robustness, can be duplicated_anomalies, irrelevant_features or label_contamination
    '''
    
    # import the dataset
    datagenerator.dataset = dataset # specify the dataset name
    data = datagenerator.generator(la=0.1, realistic_synthetic_mode=None, noise_type=None) 
    for name, clf in model_dict.items():
        # model initialization.
        # You can make special cases of AD algorithms (in this case, VAE) to tune hyperparameters:
        if name == 'VAE':
            # model initialization
            clf = VAE(encoder_neurons =[64, 32, 1],decoder_neurons =[1, 32, 64])

            # training, for unsupervised models the y label will be discarded
            try:
                start_train = time.time()
                clf = clf.fit(data['X_train'], data['y_train'])
                duracion_train = (time.time() - start_train)
            except:
                pass
            
            # output predicted anomaly score on testing set
            try:
                start_infer = time.time()
                score = clf.decision_function(pd.DataFrame(data['X_test']))
                duracion_infer = (time.time() - start_infer)
            except:
                pass
        
        else:
            # model initialization
            clf = clf(seed=seed, model_name=name)
            
            # training, for unsupervised models the y label will be discarded
            try:
                start_train = time.time()
                clf = clf.fit(data['X_train'], data['y_train'])
                duracion_train = (time.time() - start_train)
            except:
                pass
            
            # output predicted anomaly score on testing set
            try:
                start_infer = time.time()
                score = clf.predict_score(data['X_test'])
                duracion_infer = (time.time() - start_infer)
            except:
                pass
        
        # evaluation
        try:
            result = utils.metric(y_true=data['y_test'], y_score=score)
        except:
            result = {'aucroc':np.float('nan'),'aucpr':np.float('nan')}
            pass
        
        # save results
        df_AUCROC.loc[dataset, name] = result['aucroc']
        df_AUCPR.loc[dataset, name] = result['aucpr']
        df_TIMETRAIN.loc[dataset, name] = duracion_train
        try:
            df_TIMEINFER.loc[dataset, name] = duracion_infer
        except:
            pass

In [None]:
df_AUCROC

In [None]:
df_AUCPR

In [None]:
df_TIMETRAIN

In [None]:
df_TIMEINFER