# Imports

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, make_scorer
from deepod.metrics import tabular_metrics

from tqdm import tqdm
from time import time

In [3]:
X_train = np.load('data/x_train.npy')
y_train = np.load('data/y_train.npy')

X_test = np.load('data/x_test.npy')
y_test = np.load('data/y_test.npy')

#X_val = np.load('data/x_val.npy')
#y_val = np.load('data/y_val.npy')

In [4]:
#X_train = np.concatenate((X_train, X_val), axis=0)
#y_train = np.concatenate((y_train, y_val), axis=0)

# Playground

In [5]:
np.unique(y_train, return_counts=True)

(array([0, 1, 2, 3, 4]), array([   166,    210,  62760,   6637, 885235]))

4 is normal class

In [6]:
y_test_binary = np.where(y_test == 4, 'normal', 'anomaly')

In [7]:
normal_data = X_train[y_train == 4] # normal class

In [8]:
len(normal_data)

885235

# Helpers

In [9]:
def evaluate_model(model, y_test, y_pred):
    positive_class = "normal"
    
    accuracy = accuracy_score(y_test, y_pred)
    
    precision = precision_score(y_test, y_pred, pos_label=positive_class)
    recall = recall_score(y_test, y_pred, pos_label=positive_class)
    f1 = f1_score(y_test, y_pred, pos_label=positive_class)
    
    metrics = {
        'model': [model],
        'accuracy': [accuracy],
        'precision': [precision],
        'recall': [recall],
        'f1': [f1],
    }
    
    return metrics

# DIF

In [11]:
from deepod.models.tabular import DeepIsolationForest

In [12]:
clf_dif = DeepIsolationForest()
clf_dif.fit(normal_data, y=None)
y_preds_dif = clf_dif.predict(X_test)

Start Training...


100%|██████████| 50/50 [06:41<00:00,  8.03s/it]


Start Inference on the training data...
Start Inference...


100%|██████████| 50/50 [17:23<00:00, 20.88s/it]


Start Inference...


100%|██████████| 50/50 [04:26<00:00,  5.34s/it]


In [17]:
y_preds_dif_binary = np.where(y_preds_dif == 0, 'normal', 'anomaly')

In [18]:
results_dif = evaluate_model("DIF", y_test_binary, y_preds_dif_binary)
results_dif

{'model': ['DIF'],
 'accuracy': [0.9277872948193321],
 'precision': [0.9277872948193321],
 'recall': [1.0],
 'f1': [0.9625411447752925]}

# SLAD

In [9]:
from deepod.models.tabular import SLAD

In [10]:
clf_slad = SLAD()
clf_slad.fit(normal_data, y=None)
y_preds_slad = clf_slad.predict(X_test)

Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 41, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41]
epoch  1, training loss: 0.506606, time: 18.9s
epoch 10, training loss: 0.503751, time: 17.9s
epoch 20, training loss: 0.503722, time: 19.0s
epoch 30, training loss: 0.503705, time: 18.3s
epoch 40, training loss: 0.503693, time: 17.9s
epoch 50, training loss: 0.503690, time: 17.9s
epoch 60, training loss: 0.503697, time: 18.1s
epoch 70, training loss: 0.503689, time: 18.1s
epoch 80, training loss: 0.503688, time: 18.3s
epoch 90, training loss: 0.503690, time: 18.1s
epoch100, training loss: 0.503688, time: 18.2s
Start Inference on the training data...


In [11]:
y_preds_slad_binary = np.where(y_preds_slad == 0, 'normal', 'anomaly')

In [13]:
results_slad = pd.DataFrame(evaluate_model("SLAD", y_test_binary, y_preds_slad_binary))
results_slad

Unnamed: 0,model,accuracy,precision,recall,f1
0,SLAD,0.910636,1.0,0.90368,0.949403


# ICL

In [9]:
from deepod.models.tabular import ICL

In [10]:
clf_icl = ICL(epochs=5)
clf_icl.fit(normal_data, y=None)
y_preds_icl = clf_icl.predict(X_test)

Start Training...
ensemble size: 2
kernel size: 10
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=31, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=10, out_features=50, bias=

testing: 100%|██████████| 13832/13832 [00:24<00:00, 559.60it/s]
testing: 100%|██████████| 13832/13832 [00:24<00:00, 562.10it/s]
testing: 100%|██████████| 3731/3731 [00:06<00:00, 582.08it/s]
testing: 100%|██████████| 3731/3731 [00:06<00:00, 586.22it/s]


In [11]:
y_preds_icl_binary = np.where(y_preds_icl == 0, 'normal', 'anomaly')

In [12]:
results_icl = pd.DataFrame(evaluate_model("ICL", y_test_binary, y_preds_icl_binary))
results_icl

Unnamed: 0,model,accuracy,precision,recall,f1
0,ICL,0.876668,0.965714,0.898985,0.931156


# NeuTraL

In [13]:
from deepod.models.tabular import NeuTraL

In [14]:
clf_neutral = NeuTraL(epochs=1)
clf_neutral.fit(normal_data, y=None)
y_preds_neutral = clf_neutral.predict(X_test)

Start Training...
ensemble size: 1
epoch  1, training loss: 0.134671, time: 106.3s
Start Inference on the training data...


RuntimeError: CUDA out of memory. Tried to allocate 2.00 MiB (GPU 0; 10.75 GiB total capacity; 9.35 GiB already allocated; 2.69 MiB free; 9.96 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [None]:
y_preds_neutral_binary = np.where(y_preds_neutral == 0, 'normal', 'anomaly')

In [None]:
results_neutral = pd.DataFrame(evaluate_model("NeuTraL", y_test_binary, y_preds_neutral_binary))
results_neutral

# GOAD

# RCA

In [10]:
from deepod.models.tabular import RCA

In [11]:
clf_rca = RCA(epochs=10)
clf_rca.fit(normal_data, y=None)
y_preds_rca = clf_rca.predict(X_test)

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=41, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=41, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals

100%|██████████| 10/10 [02:01<00:00, 12.19s/it]
100%|██████████| 10/10 [00:32<00:00,  3.28s/it]


In [12]:
y_preds_rca_binary = np.where(y_preds_rca == 0, 'normal', 'anomaly')

In [13]:
results_rca = pd.DataFrame(evaluate_model("RCA", y_test_binary, y_preds_rca_binary))
results_rca

Unnamed: 0,model,accuracy,precision,recall,f1
0,RCA,0.907327,0.99998,0.900132,0.947432


# RDP

In [14]:
from deepod.models.tabular import RDP

In [15]:
clf_rdp = RDP(epochs=10)
clf_rdp.fit(normal_data, y=None)
y_preds_rdp = clf_rdp.predict(X_test)

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=41, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000004, time: 39.2s
epoch 10, training loss: 0.000004, time: 40.1s
Start Inference on the training data...


testing: 100%|██████████| 13832/13832 [00:13<00:00, 1043.60it/s]
testing: 100%|██████████| 3731/3731 [00:03<00:00, 1063.66it/s]


In [16]:
y_preds_rdp_binary = np.where(y_preds_rdp == 0, 'normal', 'anomaly')

In [17]:
results_rdp = pd.DataFrame(evaluate_model("RDP", y_test_binary, y_preds_rdp_binary))
results_rdp

Unnamed: 0,model,accuracy,precision,recall,f1
0,RDP,0.879687,1.0,0.870323,0.930666


# REPEN

In [18]:
from deepod.models.tabular import REPEN

In [19]:
clf_repen = REPEN(epochs=1)
clf_repen.fit(normal_data, y=None)
y_preds_repdn = clf_repen.predict(X_test)

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=41, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)


KeyboardInterrupt: 

In [None]:
y_preds_repen_binary = np.where(y_preds_repdn == 0, 'normal', 'anomaly')

In [None]:
results_repen = pd.DataFrame(evaluate_model("REPEN", y_test_binary, y_preds_repen_binary))
results_repen

# Deep SVDD

In [20]:
from deepod.models.tabular import DeepSVDD

In [27]:
clf_deepsvdd = DeepSVDD(epochs=1)
clf_deepsvdd.fit(normal_data, y=None)
y_preds_deepsvdd = clf_deepsvdd.predict(X_test)

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=41, out_features=100, bias=False)
      (act_layer): ReLU()
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): ReLU()
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000710, time: 15.2s
Start Inference on the training data...


testing: 100%|██████████| 13832/13832 [00:05<00:00, 2522.36it/s]
testing: 100%|██████████| 3731/3731 [00:01<00:00, 2541.90it/s]


In [28]:
y_preds_deepsvdd_binary = np.where(y_preds_deepsvdd == 0, 'normal', 'anomaly')

In [29]:
results_deepsvdd = pd.DataFrame(evaluate_model("DeepSVDD", y_test_binary, y_preds_deepsvdd_binary))
results_deepsvdd

Unnamed: 0,model,accuracy,precision,recall,f1
0,DeepSVDD,0.908144,0.999735,0.901233,0.947932


# Results

In [None]:
results_df = pd.concat([results_dif, results_slad], ignore_index=True)
results_df.to_csv('results/DeepOD_results.csv', index=False)

results_df