In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

import sklearn
from sklearn.svm import SVC
from sklearn.calibration import CalibratedClassifierCV
from sklearn.model_selection import GridSearchCV

import tensorflow as tf


np.random.seed(0)
tf.random.set_seed(0)

In [None]:
!git clone https://github.com/MLinApp-FP01-Team7-24/OurProject.git
%cd OurProject

In [3]:
window_size = 20
k_pa = 1 # valori tra 0 e 1

In [None]:
from Models.lstm_vae.data import get_data_windows

data_train, data_cal, label_cal, data_test, label_test = get_data_windows(window_size, k_pa)

print(data_train.shape)
print(data_cal.shape, label_cal.shape)
print(data_test.shape, label_test.shape)


In [5]:
x_dim = data_train.shape[2]
lstm_h_dim = 10
z_dim = 10

batch_size = 64
epoch_num = 20

In [None]:
from Models.lstm_vae.model import LSTM_VAE

opt = tf.keras.optimizers.Adam(epsilon=1e-6, amsgrad=True)
scheduler = tf.keras.callbacks.LearningRateScheduler(lambda x: 1e-3 * 0.9 ** x)

X_train = tf.convert_to_tensor(data_train, dtype='float32')
train_dataset = tf.data.Dataset.from_tensor_slices(X_train)
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size, drop_remainder=True)

model = LSTM_VAE(window_size, x_dim, lstm_h_dim, z_dim, dtype='float32')
model.compile(optimizer=opt)
history = model.fit(train_dataset, epochs=epoch_num, shuffle=False, callbacks=[scheduler]).history
model.summary()
model.save_weights('./lstm_vae_ckpt')

In [None]:
from google.colab import files

files.download('./lstm_vae_ckpt.data-00000-of-00001')
files.download('./lstm_vae_ckpt.index')

In [None]:
score_cal = model.anomaly_score(data_cal)
label_cal = label_cal[:score_cal.shape[0]]

In [None]:
optimize = True
param_grid = {'estimator__C': np.logspace(0, 5, 6), 'estimator__gamma': np.logspace(-5, 0, 6)}

if optimize:
  cal_search = GridSearchCV(CalibratedClassifierCV(SVC(probability=True), cv=3), param_grid, cv=3, verbose=1, scoring='f1')
  cal_search.fit(score_cal, label_cal)
  cal_model = cal_search.best_estimator_
  print(cal_search.best_params_)
else:
  cal_model = CalibratedClassifierCV(SVC(probability=True, C=1, gamma=0.0001))
  cal_model.fit(score_cal, label_cal[:score_cal.shape[0]])

In [None]:
score_test = model.anomaly_score(data_test)
label_test = label_test[:score_test.shape[0]]

y_pred_test = cal_model.predict(score_test)
y_score_test = cal_model.predict_proba(score_test)[:, 1]

In [None]:
f1 = sklearn.metrics.f1_score(label_test, y_pred_test)
f1

In [None]:
fpr, tpr, thresholds = sklearn.metrics.roc_curve(label_test, y_score_test)
roc_auc = sklearn.metrics.auc(fpr, tpr)

plt.plot(fpr, tpr, label='ROC curve (area = %0.4f)' % roc_auc)
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic')
plt.legend()
plt.show()

roc_auc

In [None]:
precison, recall, thresholds = sklearn.metrics.precision_recall_curve(label_test, y_score_test)
prc_auc = sklearn.metrics.auc(recall, precison)

plt.plot(recall, precison, label='PRC curve (area = %0.4f)' % prc_auc)
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision Recall Curve')
plt.legend()
plt.show()

prc_auc