In [1]:
from multi_modal_edge_ai.models.anomaly_detection.data_access.parser import parse_file_with_idle
from multi_modal_edge_ai.models.anomaly_detection.ml_models.local_outlier_factor import LOF
from multi_modal_edge_ai.models.anomaly_detection.ml_models.isolation_forest import IForest
from multi_modal_edge_ai.models.anomaly_detection.ml_models.one_class_svm import OCSVM
from multi_modal_edge_ai.models.anomaly_detection.ml_models.autoencoder import Autoencoder
from multi_modal_edge_ai.models.anomaly_detection.ml_models.lstm_autoencoder import LSTMAutoencoder
from multi_modal_edge_ai.models.anomaly_detection.train_and_eval.model_validator import model_train_eval
from multi_modal_edge_ai.models.anomaly_detection.train_and_eval.hyperparameter_config import HyperparameterConfig as Hparams
import torch.nn as nn


data = parse_file_with_idle("../../public_datasets/Aruba_Idle_Squashed.csv")

In [2]:
 # Basic LOF model with changed hyperparameters
# Novelty set to True means that the model will use LocalOutlierFactor for novelty detection and data is outlier-free during training

lof = LOF()
lofParams = {
    "n_neighbors": 300,
    "algorithm": "auto",
    "leaf_size": 40,
    "metric": "minkowski",
    "p": 2,
    "contamination": "auto",
    "novelty": True
}

hp = Hparams(lof_hparams=lofParams, clean_test_data_ratio = 0.1,
             window_size= 8, window_slide= 1, one_hot=False)
(avg, cm) = model_train_eval(lof, data, hp)
print(avg)
print(cm)
(tp, tn, fp, fn) = (cm[1][1], cm[0][0], cm[0][1], cm[1][0])
print("Accuracy: ", (tp + tn) / (tp + tn + fp + fn))
print("Precision: ", tp / (tp + fp))
print("Recall: ", tp / (tp + fn))
print("F1: ", 2 * tp/(2 * tp + fp + fn))


clean_df: 8890 anomalous_df: 729
5
0.5905411163187048
[[1376   82]
 [ 879   10]]
Accuracy:  0.5905411163187048
Precision:  0.943758573388203
Recall:  0.61019955654102
F1:  0.7411796391058443


In [3]:
# Basic LOF model with changed hyperparameters and Novelty set to True and batch size of 16
# Novelty set to True means that the model will use LocalOutlierFactor for novelty detection and data is outlier-free during training

lof = LOF()
lofParams = {
    "n_neighbors": 4000,
    "algorithm": "auto",
    "leaf_size": 400,
    "metric": "minkowski",
    "p": 2,
    "contamination": 0.001,
    "novelty": True
}

hp = Hparams(lof_hparams=lofParams, clean_test_data_ratio = 0.15, window_size= 8, window_slide= 1)
(avg, cm) = model_train_eval(lof, data, hp)
print(avg)
print(cm)
(tp, tn, fp, fn) = (cm[1][1], cm[0][0], cm[0][1], cm[1][0])
print("Accuracy: ", (tp + tn) / (tp + tn + fp + fn))
print("Precision: ", tp / (tp + fp))
print("Recall: ", tp / (tp + fn))
print("F1: ", 2 * tp/(2 * tp + fp + fn))

clean_df: 8890 anomalous_df: 729
12
0.5069867431028305
[[ 111 1347]
 [  29 1304]]
Accuracy:  0.5069867431028305
Precision:  0.07613168724279835
Recall:  0.7928571428571428
F1:  0.13892365456821026


In [4]:
# Basic LOF model with changed hyperparameters and Novelty set to True and batch size of 64
# Novelty set to True means that the model will use LocalOutlierFactor for novelty detection and data is outlier-free during training

lof = LOF()
lofParams = {
    "n_neighbors": 2000,
    "algorithm": "auto",
    "leaf_size": 300,
    "metric": "minkowski",
    "p": 2,
    "contamination": "auto",
    "novelty": True
}

hp = Hparams(lof_hparams=lofParams, clean_test_data_ratio = 0.15, window_size= 8, window_slide= 1, one_hot=False)
(avg, cm) = model_train_eval(lof, data, hp)
print(avg)
print(cm)
(tp, tn, fp, fn) = (cm[1][1], cm[0][0], cm[0][1], cm[1][0])
print("Accuracy: ", (tp + tn) / (tp + tn + fp + fn))
print("Precision: ", tp / (tp + fp))
print("Recall: ", tp / (tp + fn))
print("F1: ", 2 * tp/(2 * tp + fp + fn))

clean_df: 8890 anomalous_df: 729
5
0.5206019347903977
[[ 166 1292]
 [  46 1287]]
Accuracy:  0.5206019347903977
Precision:  0.11385459533607682
Recall:  0.7830188679245284
F1:  0.19880239520958085


In [3]:
iforest = IForest()

iforestParams = {
    "n_estimators": 1000,
    "max_samples": "auto",
    "contamination": 0.1,
    "max_features": 1.0,
    "bootstrap": False,
    "n_jobs": -1,
    "random_state": 42,
    "verbose": 0,
}

hp = Hparams(i_forest_hparams=iforestParams, window_size= 8, window_slide= 1)
(avg, cm) = model_train_eval(iforest, data, hp)
print(avg)
print(cm)
(tp, tn, fp, fn) = (cm[1][1], cm[0][0], cm[0][1], cm[1][0])
print("Accuracy: ", (tp + tn) / (tp + tn + fp + fn))
print("Precision: ", tp / (tp + fp))
print("Recall: ", tp / (tp + fn))
print("F1: ", 2 * tp/(2 * tp + fp + fn))

clean_df: 8890 anomalous_df: 729
12


KeyboardInterrupt: 

In [None]:
iforest = IForest()

iforestParams = {
    "n_estimators": 1000,
    "max_samples": "auto",
    "contamination": 0.1,
    "max_features": 1.0,
    "bootstrap": False,
    "n_jobs": -1,
    "random_state": 42,
    "verbose": 0,
}

hp = Hparams(i_forest_hparams=iforestParams, window_size= 8, window_slide= 1, one_hot=False)
(avg, cm) = model_train_eval(iforest, data, hp)
print(avg)
print(cm)
(tp, tn, fp, fn) = (cm[1][1], cm[0][0], cm[0][1], cm[1][0])
print("Accuracy: ", (tp + tn) / (tp + tn + fp + fn))
print("Precision: ", tp / (tp + fp))
print("Recall: ", tp / (tp + fn))
print("F1: ", 2 * tp/(2 * tp + fp + fn))

In [4]:
ocsvm = OCSVM()

ovscmParams = {
    "kernel": "rbf",
    "degree": 3,
    "gamma": "scale",
    "coef0": 0.0,
    "tol": 0.001,
    "nu": 0.01,
    "shrinking": True,
    "cache_size": 2400,
    "verbose": False,
    "max_iter": -1,
}

hp = Hparams(ocsvm_hparams=ovscmParams, anomaly_generation_ratio = 11, clean_test_data_ratio = 0.25,window_size= 8, window_slide= 1,one_hot=False)
(avg, cm) = model_train_eval(ocsvm, data, hp)
print(avg)
print(cm)
(tp, tn, fp, fn) = (cm[1][1], cm[0][0], cm[0][1], cm[1][0])
print("Accuracy: ", (tp + tn) / (tp + tn + fp + fn))
print("Precision: ", tp / (tp + fp))
print("Recall: ", tp / (tp + fn))
print("F1: ", 2 * tp/(2 * tp + fp + fn))


clean_df: 8890 anomalous_df: 729
5
[1 0 1 ... 1 1 0]
[1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0

In [None]:
autoenc = Autoencoder([96,64,32,16], [16,32,64,96],nn.ReLU(), nn.Sigmoid())

hp = Hparams(batch_size=16,  n_epochs=10, anomaly_generation_ratio = 11, clean_test_data_ratio=0.2, window_size= 8, window_slide= 1)
(avg, cm) = model_train_eval(autoenc, data, hp)
print(avg)
print(cm)
(tp, tn, fp, fn) = (cm[1][1], cm[0][0], cm[0][1], cm[1][0])
print("Accuracy:", (tp + tn) / (tp + tn + fp + fn))
print("Precision: ", tp / (tp + fp))
print("Recall: ", tp / (tp + fn))
print("F1: ", 2 * tp / (2 * tp + fp + fn))

In [9]:
autoenc = Autoencoder([40, 32, 16, 8], [8, 16, 32, 40], nn.ReLU(), nn.Sigmoid())

hp = Hparams(batch_size=32, n_epochs=16, clean_test_data_ratio=0.2,window_size=8, window_slide=1, one_hot=False)
(avg, cm) = model_train_eval(autoenc, data, hp)
print(avg)
print(cm)
(tp, tn, fp, fn) = (cm[1][1], cm[0][0], cm[0][1], cm[1][0])
print("Accuracy:", (tp + tn) / (tp + tn + fp + fn))
print("Precision: ", tp / (tp + fp))
print("Recall: ", tp / (tp + fn))
print("F1: ", 2 * tp / (2 * tp + fp + fn))

clean_df: 8890 anomalous_df: 729
5
[0 0 0 ... 1 0 1]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

In [None]:
ltsm = LSTMAutoencoder(12, 8, 8, 16, 16)

hp = Hparams(batch_size=16, n_epochs=32, clean_test_data_ratio=0.2, window_size=8, window_slide=1, one_hot=True)

(avg, cm) = model_train_eval(ltsm, data, hp)
print(avg)
print(cm)
(tp, tn, fp, fn) = (cm[1][1], cm[0][0], cm[0][1], cm[1][0])
print("Accuracy:", (tp + tn) / (tp + tn + fp + fn))
print("Precision: ", tp / (tp + fp))
print("Recall: ", tp / (tp + fn))
print("F1: ", 2 * tp / (2 * tp + fp + fn))


clean_df: 8890 anomalous_df: 729
12
Epoch 1 training loss: 0.07760713093037244
Epoch 2 training loss: 0.07725755977669586
Epoch 3 training loss: 0.0772386808702628
Epoch 4 training loss: 0.07725333552849474
Epoch 5 training loss: 0.07720437902104449
Epoch 6 training loss: 0.07721456329896627
Epoch 7 training loss: 0.0772136117869106
Epoch 8 training loss: 0.07721375107463486
Epoch 9 training loss: 0.07718426470631153
Epoch 10 training loss: 0.07720113484895001
Epoch 11 training loss: 0.07719907346490616
Epoch 12 training loss: 0.07718318898107174
Epoch 13 training loss: 0.07717363352660002
Epoch 14 training loss: 0.07718654360969973
Epoch 15 training loss: 0.07717847603837319
Epoch 16 training loss: 0.07719744948766648
Epoch 17 training loss: 0.07721296846458041
Epoch 18 training loss: 0.07720602415680164
Epoch 19 training loss: 0.07716414992486968
Epoch 20 training loss: 0.07716112782344792
Epoch 21 training loss: 0.07718445156324819
Epoch 22 training loss: 0.07716832268737986
Epoch 2