# Imports

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split, GridSearchCV
from deepod.metrics import tabular_metrics
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

from tqdm import tqdm
from time import time

In [3]:
X_train = np.load('data/x_train.npy')
y_train = np.load('data/y_train.npy')

X_test = np.load('data/x_test.npy')
y_test = np.load('data/y_test.npy')

X_val = np.load('data/x_val.npy')
y_val = np.load('data/y_val.npy')

In [4]:
X_train = np.concatenate((X_train, X_val), axis=0)
y_train = np.concatenate((y_train, y_val), axis=0)

# Playground

In [5]:
np.unique(y_train, return_counts=True)

(array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
        17, 18]),
 array([ 37592,   2279,   1674,  18202,  17706,     23,   4058,  30357,
         16645,     90,   4762, 324955,  61707,    812,  25292,   1693,
           367,    916,  11789]))

11 is normal class

In [6]:
y_test_binary = np.where(y_test == 11, 'normal', 'anomaly')

In [7]:
normal_data = X_train[y_train == 11] # 11 is normal class

In [8]:
len(normal_data)

324955

# Helpers

In [9]:
def evaluate_model(model, y_test, y_pred):
    positive_class = "normal"
    
    accuracy = accuracy_score(y_test, y_pred)
    
    precision = precision_score(y_test, y_pred, pos_label=positive_class)
    recall = recall_score(y_test, y_pred, pos_label=positive_class)
    f1 = f1_score(y_test, y_pred, pos_label=positive_class)
    
    metrics = {
        'model': [model],
        'accuracy': [accuracy],
        'precision': [precision],
        'recall': [recall],
        'f1': [f1],
    }
    
    return metrics

# DIF

In [8]:
from deepod.models.tabular import DeepIsolationForest

In [11]:
clf_dif = DeepIsolationForest()
clf_dif.fit(normal_data, y=None)
y_preds_dif = clf_dif.predict(X_test)

Start Training...


100%|██████████| 50/50 [02:49<00:00,  3.40s/it]


Start Inference on the training data...
Start Inference...


100%|██████████| 50/50 [11:17<00:00, 13.55s/it]


Start Inference...


100%|██████████| 50/50 [04:24<00:00,  5.29s/it]


In [40]:
y_preds_dif_binary = np.where(y_preds_dif == 0, 'normal', 'anomaly')

In [57]:
results_dif = pd.DataFrame(evaluate_model("DIF", y_test_binary, y_preds_dif_binary))
results_dif

Unnamed: 0,model,accuracy,precision,recall,f1
0,DIF,0.866398,0.86819,0.907101,0.887219


# SLAD

In [42]:
from deepod.models.tabular import SLAD

In [43]:
clf_slad = SLAD()
clf_slad.fit(normal_data, y=None)
y_preds_slad = clf_slad.predict(X_test)

Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 50, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  9 10 11 12 13 14 16 17 18 19 20 22 23 24 25 26 27
 28 30 31 32 33 34 35 36 37 38 40 41 42 44 45 46 47 48 49 50 51 53 54 55
 56 57]
epoch  1, training loss: 0.646864, time: 7.6s
epoch 10, training loss: 0.641175, time: 6.9s
epoch 20, training loss: 0.641144, time: 7.0s
epoch 30, training loss: 0.641133, time: 6.9s
epoch 40, training loss: 0.641124, time: 6.9s
epoch 50, training loss: 0.641126, time: 6.8s
epoch 60, training loss: 0.641109, time: 6.9s
epoch 70, training loss: 0.641107, time: 6.9s
epoch 80, training loss: 0.641108, time: 6.8s
epoch 90, training loss: 0.641101, time: 6.9s
epoch100, training loss: 0.641104, time: 6.9s
Start Inference on the training data...


In [51]:
y_preds_slad_binary = np.where(y_preds_slad == 0, 'normal', 'anomaly')

In [56]:
results_slad = pd.DataFrame(evaluate_model("SLAD", y_test_binary, y_preds_slad_binary))
results_slad

Unnamed: 0,model,accuracy,precision,recall,f1
0,SLAD,0.880468,0.895327,0.898743,0.897032


# ICL

In [11]:
from deepod.models.tabular import ICL

In [22]:
clf_icl = ICL(epochs=5)
clf_icl.fit(normal_data, y=None)
y_preds_icl = clf_icl.predict(X_test)

Start Training...
ensemble size: 2
kernel size: 10
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=48, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=10, out_features=50, bias=

testing: 100%|██████████| 5078/5078 [00:08<00:00, 573.49it/s]
testing: 100%|██████████| 5078/5078 [00:08<00:00, 584.94it/s]
testing: 100%|██████████| 2192/2192 [00:03<00:00, 562.98it/s]
testing: 100%|██████████| 2192/2192 [00:03<00:00, 557.01it/s]


In [23]:
y_preds_icl_binary = np.where(y_preds_icl == 0, 'normal', 'anomaly')

In [24]:
results_icl = pd.DataFrame(evaluate_model("ICL", y_test_binary, y_preds_icl_binary))
results_icl

Unnamed: 0,model,accuracy,precision,recall,f1
0,ICL,0.881858,0.895635,0.90107,0.898344


# NeuTraL

In [25]:
from deepod.models.tabular import NeuTraL

In [32]:
clf_neutral = NeuTraL(epochs=1)
clf_neutral.fit(normal_data, y=None)
y_preds_neutral = clf_neutral.predict(X_test)

Start Training...
ensemble size: 1
epoch  1, training loss: 0.135441, time: 35.9s
Start Inference on the training data...


In [33]:
y_preds_neutral_binary = np.where(y_preds_neutral == 0, 'normal', 'anomaly')

In [34]:
results_neutral = pd.DataFrame(evaluate_model("NeuTraL", y_test_binary, y_preds_neutral_binary))
results_neutral

Unnamed: 0,model,accuracy,precision,recall,f1
0,NeuTraL,0.901334,0.92782,0.899679,0.913533


# GOAD

# RCA

In [10]:
from deepod.models.tabular import RCA

In [17]:
clf_rca = RCA(epochs=10)
clf_rca.fit(normal_data, y=None)
y_preds_rca = clf_rca.predict(X_test)

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=58, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=58, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals

100%|██████████| 10/10 [00:45<00:00,  4.59s/it]
100%|██████████| 10/10 [00:20<00:00,  2.00s/it]


In [18]:
y_preds_rca_binary = np.where(y_preds_rca == 0, 'normal', 'anomaly')

In [19]:
results_rca = pd.DataFrame(evaluate_model("RCA", y_test_binary, y_preds_rca_binary))
results_rca

Unnamed: 0,model,accuracy,precision,recall,f1
0,RCA,0.848934,0.848958,0.899223,0.873368


# RDP

In [20]:
from deepod.models.tabular import RDP

In [27]:
clf_rdp = RDP(epochs=10)
clf_rdp.fit(normal_data, y=None)
y_preds_rdp = clf_rdp.predict(X_test)

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=58, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000019, time: 17.7s
epoch 10, training loss: 0.000020, time: 17.9s
Start Inference on the training data...


testing: 100%|██████████| 5078/5078 [00:04<00:00, 1060.20it/s]
testing: 100%|██████████| 2192/2192 [00:02<00:00, 1030.77it/s]


In [28]:
y_preds_rdp_binary = np.where(y_preds_rdp == 0, 'normal', 'anomaly')

In [29]:
results_rdp = pd.DataFrame(evaluate_model("RDP", y_test_binary, y_preds_rdp_binary))
results_rdp

Unnamed: 0,model,accuracy,precision,recall,f1
0,RDP,0.884133,0.902058,0.897438,0.899742


# REPEN

In [30]:
from deepod.models.tabular import REPEN

In [32]:
clf_repen = REPEN(epochs=1)
clf_repen.fit(normal_data, y=None)
y_preds_repdn = clf_repen.predict(X_test)

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=58, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 13.316550, time: 974.8s
Start Inference on the training data...


testing: 100%|██████████| 5078/5078 [00:01<00:00, 2931.16it/s]
testing: 100%|██████████| 2192/2192 [00:00<00:00, 2749.33it/s]


In [34]:
y_preds_repen_binary = np.where(y_preds_repdn == 0, 'normal', 'anomaly')

In [35]:
results_repen = pd.DataFrame(evaluate_model("REPEN", y_test_binary, y_preds_repen_binary))
results_repen

Unnamed: 0,model,accuracy,precision,recall,f1
0,REPEN,0.755616,0.73428,0.90603,0.811164


# Deep SVDD

In [36]:
from deepod.models.tabular import DeepSVDD

In [46]:
clf_deepsvdd = DeepSVDD(epochs=1)
clf_deepsvdd.fit(normal_data, y=None)
y_preds_deepsvdd = clf_deepsvdd.predict(X_test)

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=58, out_features=100, bias=False)
      (act_layer): ReLU()
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): ReLU()
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.001419, time: 5.4s
Start Inference on the training data...


testing: 100%|██████████| 5078/5078 [00:01<00:00, 2651.14it/s]
testing: 100%|██████████| 2192/2192 [00:00<00:00, 2511.73it/s]


In [47]:
y_preds_deepsvdd_binary = np.where(y_preds_deepsvdd == 0, 'normal', 'anomaly')

In [48]:
results_deepsvdd = pd.DataFrame(evaluate_model("DeepSVDD", y_test_binary, y_preds_deepsvdd_binary))
results_deepsvdd

Unnamed: 0,model,accuracy,precision,recall,f1
0,DeepSVDD,0.795564,0.780753,0.89979,0.836055


# Results

In [58]:
results_df = pd.concat([results_dif, results_slad], ignore_index=True)
results_df.to_csv('results/DeepOD_results.csv', index=False)

results_df

Unnamed: 0,model,accuracy,precision,recall,f1
0,DIF,0.866398,0.86819,0.907101,0.887219
1,SLAD,0.880468,0.895327,0.898743,0.897032
