# Imports

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, make_scorer
from deepod.metrics import tabular_metrics

from tqdm import tqdm
from time import time

In [2]:
X_train = np.load('data/x_train.npy')
y_train = np.load('data/y_train.npy')

X_test = np.load('data/x_test.npy')
y_test = np.load('data/y_test.npy')

#X_val = np.load('data/x_val.npy')
#y_val = np.load('data/y_val.npy')

In [3]:
#X_train = np.concatenate((X_train, X_val), axis=0)
#y_train = np.concatenate((y_train, y_val), axis=0)

# Playground

In [4]:
np.unique(y_train, return_counts=True)

(array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
 array([ 2142,  1863, 13082, 35620, 19397, 47097, 74400, 11189,  1209,
          139]))

6 is normal class

In [5]:
y_test_binary = np.where(y_test == 6, 'normal', 'anomaly')

In [6]:
normal_data = X_train[y_train == 6] # normal class

In [7]:
len(normal_data)

74400

# Helpers

In [8]:
def evaluate_model(model, y_test, y_pred):
    positive_class = "normal"
    
    accuracy = accuracy_score(y_test, y_pred)
    
    precision = precision_score(y_test, y_pred, pos_label=positive_class)
    recall = recall_score(y_test, y_pred, pos_label=positive_class)
    f1 = f1_score(y_test, y_pred, pos_label=positive_class)
    
    metrics = {
        'model': [model],
        'accuracy': [accuracy],
        'precision': [precision],
        'recall': [recall],
        'f1': [f1],
    }
    
    return metrics

# DIF

In [9]:
from deepod.models.tabular import DeepIsolationForest

In [10]:
clf_dif = DeepIsolationForest(epochs=50)
clf_dif.fit(normal_data, y=None)
y_preds_dif = clf_dif.predict(X_test)

Start Training...


100%|██████████| 50/50 [00:31<00:00,  1.59it/s]


Start Inference on the training data...
Start Inference...


100%|██████████| 50/50 [02:30<00:00,  3.01s/it]


Start Inference...


100%|██████████| 50/50 [01:39<00:00,  1.99s/it]


In [14]:
y_preds_dif_binary = np.where(y_preds_dif == 0, 'normal', 'anomaly')

In [15]:
results_dif = pd.DataFrame(evaluate_model("DIF", y_test_binary, y_preds_dif_binary))
results_dif

Unnamed: 0,model,accuracy,precision,recall,f1
0,DIF,0.581255,0.455126,0.812473,0.58343


# SLAD

In [16]:
from deepod.models.tabular import SLAD

In [20]:
clf_slad = SLAD(epochs=50)
clf_slad.fit(normal_data, y=None)
y_preds_slad = clf_slad.predict(X_test)

Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 50, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  9 10 12 13 14 16 17 18 20 22 24 25 26 27 28 30 31
 32 33 34 35 36 37 38 40 41 42 44 45 46 47 48 49 50 51 53 54 55 56 57 58
 59 60]
epoch  1, training loss: 0.697231, time: 3.2s
epoch 10, training loss: 0.680002, time: 3.4s
epoch 20, training loss: 0.679506, time: 3.4s
epoch 30, training loss: 0.679260, time: 3.2s
epoch 40, training loss: 0.679102, time: 3.4s
epoch 50, training loss: 0.679479, time: 3.4s
Start Inference on the training data...


In [21]:
y_preds_slad_binary = np.where(y_preds_slad == 0, 'normal', 'anomaly')

In [22]:
results_slad = pd.DataFrame(evaluate_model("SLAD", y_test_binary, y_preds_slad_binary))
results_slad

Unnamed: 0,model,accuracy,precision,recall,f1
0,SLAD,0.83258,0.711558,0.901613,0.79539


# ICL

In [9]:
from deepod.models.tabular import ICL

In [10]:
clf_icl = ICL(epochs=5)
clf_icl.fit(normal_data, y=None)
y_preds_icl = clf_icl.predict(X_test)

Start Training...
ensemble size: 2
kernel size: 10
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=51, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(52, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(52, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(52, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=10, out_features=50, bias=

testing: 100%|██████████| 1163/1163 [00:01<00:00, 605.59it/s]
testing: 100%|██████████| 1163/1163 [00:01<00:00, 613.78it/s]
testing: 100%|██████████| 806/806 [00:01<00:00, 615.98it/s]
testing: 100%|██████████| 806/806 [00:01<00:00, 610.48it/s]


In [11]:
y_preds_icl_binary = np.where(y_preds_icl == 0, 'normal', 'anomaly')

In [12]:
results_icl = pd.DataFrame(evaluate_model("ICL", y_test_binary, y_preds_icl_binary))
results_icl

Unnamed: 0,model,accuracy,precision,recall,f1
0,ICL,0.863568,0.763976,0.900054,0.826451


# NeuTraL

In [13]:
from deepod.models.tabular import NeuTraL

In [14]:
clf_neutral = NeuTraL(epochs=1)
clf_neutral.fit(normal_data, y=None)
y_preds_neutral = clf_neutral.predict(X_test)

Start Training...
ensemble size: 1
epoch  1, training loss: 0.134671, time: 106.3s
Start Inference on the training data...


RuntimeError: CUDA out of memory. Tried to allocate 2.00 MiB (GPU 0; 10.75 GiB total capacity; 9.35 GiB already allocated; 2.69 MiB free; 9.96 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [None]:
y_preds_neutral_binary = np.where(y_preds_neutral == 0, 'normal', 'anomaly')

In [None]:
results_neutral = pd.DataFrame(evaluate_model("NeuTraL", y_test_binary, y_preds_neutral_binary))
results_neutral

# GOAD

# RCA

In [13]:
from deepod.models.tabular import RCA

In [31]:
clf_rca = RCA(epochs=10)
clf_rca.fit(normal_data, y=None)
y_preds_rca = clf_rca.predict(X_test)

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=61, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=61, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals

100%|██████████| 10/10 [00:09<00:00,  1.02it/s]
100%|██████████| 10/10 [00:07<00:00,  1.28it/s]


In [32]:
y_preds_rca_binary = np.where(y_preds_rca == 0, 'normal', 'anomaly')

In [33]:
results_rca = pd.DataFrame(evaluate_model("RCA", y_test_binary, y_preds_rca_binary))
results_rca

Unnamed: 0,model,accuracy,precision,recall,f1
0,RCA,0.556845,0.444045,0.904086,0.595573


# RDP

In [17]:
from deepod.models.tabular import RDP

In [18]:
clf_rdp = RDP(epochs=10)
clf_rdp.fit(normal_data, y=None)
y_preds_rdp = clf_rdp.predict(X_test)

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=61, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000038, time: 3.7s
epoch 10, training loss: 0.000051, time: 3.7s
Start Inference on the training data...


testing: 100%|██████████| 1163/1163 [00:01<00:00, 1126.64it/s]
testing: 100%|██████████| 806/806 [00:00<00:00, 1125.93it/s]


In [19]:
y_preds_rdp_binary = np.where(y_preds_rdp == 0, 'normal', 'anomaly')

In [20]:
results_rdp = pd.DataFrame(evaluate_model("RDP", y_test_binary, y_preds_rdp_binary))
results_rdp

Unnamed: 0,model,accuracy,precision,recall,f1
0,RDP,0.831086,0.709294,0.901452,0.793911


# REPEN

In [18]:
from deepod.models.tabular import REPEN

In [19]:
clf_repen = REPEN(epochs=1)
clf_repen.fit(normal_data, y=None)
y_preds_repdn = clf_repen.predict(X_test)

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=41, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)


KeyboardInterrupt: 

In [None]:
y_preds_repen_binary = np.where(y_preds_repdn == 0, 'normal', 'anomaly')

In [None]:
results_repen = pd.DataFrame(evaluate_model("REPEN", y_test_binary, y_preds_repen_binary))
results_repen

# Deep SVDD

In [21]:
from deepod.models.tabular import DeepSVDD

In [22]:
clf_deepsvdd = DeepSVDD(epochs=1)
clf_deepsvdd.fit(normal_data, y=None)
y_preds_deepsvdd = clf_deepsvdd.predict(X_test)

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=61, out_features=100, bias=False)
      (act_layer): ReLU()
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): ReLU()
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.009288, time: 1.3s
Start Inference on the training data...


testing: 100%|██████████| 1163/1163 [00:00<00:00, 2543.64it/s]
testing: 100%|██████████| 806/806 [00:00<00:00, 2533.19it/s]


In [23]:
y_preds_deepsvdd_binary = np.where(y_preds_deepsvdd == 0, 'normal', 'anomaly')

In [24]:
results_deepsvdd = pd.DataFrame(evaluate_model("DeepSVDD", y_test_binary, y_preds_deepsvdd_binary))
results_deepsvdd

Unnamed: 0,model,accuracy,precision,recall,f1
0,DeepSVDD,0.808227,0.675287,0.902742,0.772622


# Results

In [23]:
results_df = pd.concat([results_dif, results_slad], ignore_index=True)
results_df.to_csv('results/DeepOD_results.csv', index=False)

results_df

Unnamed: 0,model,accuracy,precision,recall,f1
0,DIF,0.581255,0.455126,0.812473,0.58343
1,SLAD,0.83258,0.711558,0.901613,0.79539
