In [1]:
# !pip3 install pandas
# !pip3 install seaborn
# !pip3 install --upgrade tensorflow-gpu
# !pip3 install import-ipynb
# !pip3 install cuda-python

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 

from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import math
import pickle
import time
import sys

import import_ipynb
import AttackDetectionModel

import tensorflow as tf
tf.compat.v1.disable_eager_execution()
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K
from tensorflow.keras import optimizers

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.metrics import f1_score, confusion_matrix
from sklearn.utils import shuffle

from bayes_opt import BayesianOptimization

%matplotlib inline

importing Jupyter notebook from AttackDetectionModel.ipynb


In [2]:
gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction = 0.333)
sess = tf.compat.v1.Session(config = tf.compat.v1.ConfigProto(gpu_options = gpu_options))

# Data Pre-Processing

In [3]:
number_features = 35

In [4]:
df_benign, df_attack = AttackDetectionModel.get_files_botiot("../../botiot")

df_benign = np.concatenate([df_benign, df_benign, df_benign, df_benign, df_benign, df_benign, df_benign, df_benign, df_benign, 
                            df_benign, df_benign, df_benign, df_benign, df_benign, df_benign, df_benign, df_benign, df_benign, 
                            df_benign, df_benign, df_benign, df_benign, df_benign, df_benign, df_benign, df_benign, df_benign, 
                            df_benign, df_benign, df_benign, df_benign, df_benign, df_benign, df_benign, df_benign, df_benign,
                            df_benign, df_benign, df_benign, df_benign, df_benign, df_benign, df_benign, df_benign, df_benign])
df_benign = shuffle(df_benign)

In [5]:
# Normalize with the min-max scaler
scaler = MinMaxScaler()
df_benign_norm = scaler.fit_transform(df_benign)
df_attack_norm = scaler.fit_transform(df_attack)

# Sets

In [6]:
# Train
len_benign_train = int(0.7 * len(df_benign_norm))
X_train = df_benign_norm[:len_benign_train]

In [7]:
# Test
X_test_benign = df_benign_norm[len_benign_train:]
X_test = np.concatenate([X_test_benign, df_attack_norm])

Y_test = np.ones(len(X_test))
Y_test[:len(X_test_benign)] = 0

# Model

In [8]:
# Parameters
original_dim = X_train.shape[1]
input_shape = (original_dim,)
intermediate_dim = int(original_dim / 2)
latent_dim = int(original_dim / 3)

# Initial values
epochs = 5
learning_rate = 0.0001
batch_size = 10
anomaly_threshold = 0.05

# Dictionary
dict_params = { 'learning_rate': learning_rate, 'batch_size': round(batch_size), 'epochs': round(epochs)}
pbounds = { 'learning_rate': (0.000001, 0.001), 'batch_size': (10, 100), 'epochs': (50, 1000)}

In [9]:
# KL Loss function
def vae_loss(x, x_decoded_mean):
    # Compute the average MSE error, then scale it up (sum on all axes)
    
    reconstruction_loss = K.sum(K.square(x - x_decoded_mean))
    
    # Compute the KL loss
    
    kl_loss = - 0.5 * K.sum(1 + z_var - K.square(z_mean) - K.square(K.exp(z_var)), axis=-1)
    
    # Return the average loss over all 
    
    total_loss = K.mean(reconstruction_loss + kl_loss) # Total_loss = reconstruction_loss + kl_loss 
    return total_loss

# (1) Reconstruction Loss - Forces the encoder to generate latent features that minimize the reconstruction error, or else is
# penalized
# (2) KL Loss - Forces the distribution generated by the encoder to be similar to the prior probability of the input vector, 
# pushing latent feature space to normality

In [10]:
# Encoder
inputs, encoder, z_var, z_mean = AttackDetectionModel.vae_encoder(input_shape, 
                                                                  intermediate_dim, 
                                                                  latent_dim, 
                                                                  AttackDetectionModel.reparametrization)
encoder.summary()

Model: "encoder"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 encoder_input (InputLayer)     [(None, 35)]         0           []                               
                                                                                                  
 dense (Dense)                  (None, 17)           612         ['encoder_input[0][0]']          
                                                                                                  
 z_mean (Dense)                 (None, 11)           198         ['dense[0][0]']                  
                                                                                                  
 z_var (Dense)                  (None, 11)           198         ['dense[0][0]']                  
                                                                                            

In [11]:
# Decoder
decoder = AttackDetectionModel.vae_decoder(intermediate_dim, latent_dim, original_dim)
outputs = decoder(encoder(inputs))
decoder.summary()

Model: "decoder"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 z_sampling (InputLayer)     [(None, 11)]              0         
                                                                 
 dense_1 (Dense)             (None, 17)                204       
                                                                 
 dense_2 (Dense)             (None, 35)                630       
                                                                 
Total params: 834
Trainable params: 834
Non-trainable params: 0
_________________________________________________________________


# Hyperparameter Optimization

In [12]:
def maximize_training(X_train = X_train, 
                      X_test = X_test, 
                      Y_test = Y_test, 
                      inputs = inputs, 
                      outputs = outputs, 
                      vae_loss = vae_loss,
                      learning_rate = learning_rate,
                      batch_size = batch_size,
                      epochs = epochs,
                      anomaly_threshold = anomaly_threshold):    
    
    # Create model
    opt_adam = optimizers.Adam(learning_rate = dict_params['learning_rate'], clipvalue = 0.5)
    model = Model(inputs, outputs, name = 'vae_mlp')
    model.compile(optimizer = opt_adam, loss = vae_loss)

    # Train
    history = model.fit(X_train, 
                        X_train, 
                        shuffle = True, 
                        verbose = 0,
                        epochs = dict_params['epochs'], 
                        batch_size = dict_params['batch_size'])
    
    # Maximize the f1-score
    X_pred_opt = model.predict(X_test)
    error_vector_opt = AttackDetectionModel.get_error_term(X_pred_opt, X_test, _rmse = False)
    Y_pred_opt = (error_vector_opt > anomaly_threshold)
    f1 = f1_score(Y_test, Y_pred_opt)
    
    return f1

In [13]:
'''
# Apply Bayesian optimization to choose the best hyperparameters

opt = BayesianOptimization(f = maximize_training,
                           pbounds = pbounds,
                           verbose = 2, 
                           random_state = 1)

opt_start = time.time()

opt.maximize(init_points = 5, n_iter = 5)

opt_end = time.time()
opt_time = opt_end - opt_start
print("Optimization time:", opt_time)

learning_rate = opt.max['params']['learning_rate']
epochs = round(opt.max['params']['epochs'])
batch_size = round(opt.max['params']['batch_size'])
'''

'\n# Apply Bayesian optimization to choose the best hyperparameters\n\nopt = BayesianOptimization(f = maximize_training,\n                           pbounds = pbounds,\n                           verbose = 2, \n                           random_state = 1)\n\nopt_start = time.time()\n\nopt.maximize(init_points = 5, n_iter = 5)\n\nopt_end = time.time()\nopt_time = opt_end - opt_start\nprint("Optimization time:", opt_time)\n\nlearning_rate = opt.max[\'params\'][\'learning_rate\']\nepochs = round(opt.max[\'params\'][\'epochs\'])\nbatch_size = round(opt.max[\'params\'][\'batch_size\'])\n'

# Predict Attacks

In [14]:
# Training step with the best hyperparameters
learning_rate = 0.0005007978127917845
epochs = 550
batch_size = 19
opt_time = 1151.2620151042938
anomaly_threshold = 0.043

In [15]:
for i in range(1):
    print("Iteration " + str(i))
    
    # Train
    vae_model, train_time = AttackDetectionModel.train(X_train, inputs, outputs, vae_loss, learning_rate, epochs, batch_size)
    
    # Set the optimized anomaly threshold
    #anomaly_threshold = AttackDetectionModel.get_anomaly_threshold(X_train, vae_model)
    
    # Test
    X_pred, test_time = AttackDetectionModel.test(X_test, vae_model)
    Y_test, Y_pred = AttackDetectionModel.get_prediction(Y_test, X_pred, X_test, anomaly_threshold, vae_model)
    
    # Metrics
    acc, f1, pre, rec = AttackDetectionModel.get_scores(Y_test, Y_pred)
    
    # Print results
    AttackDetectionModel.print_results(number_features,
                                       learning_rate,
                                       epochs,
                                       batch_size,
                                       anomaly_threshold,
                                       X_train,
                                       X_test,
                                       opt_time,
                                       train_time,
                                       test_time,
                                       acc,
                                       f1,
                                       pre,
                                       rec,
                                       Y_test,
                                       Y_pred,
                                       "Results/botiot.txt")
    
    print("")

Iteration 0
Train on 15025 samples
Epoch 1/550
Epoch 2/550
Epoch 3/550
Epoch 4/550
Epoch 5/550
Epoch 6/550
Epoch 7/550
Epoch 8/550
Epoch 9/550
Epoch 10/550
Epoch 11/550
Epoch 12/550
Epoch 13/550
Epoch 14/550
Epoch 15/550
Epoch 16/550
Epoch 17/550
Epoch 18/550
Epoch 19/550
Epoch 20/550
Epoch 21/550
Epoch 22/550
Epoch 23/550
Epoch 24/550
Epoch 25/550
Epoch 26/550
Epoch 27/550
Epoch 28/550
Epoch 29/550
Epoch 30/550
Epoch 31/550
Epoch 32/550
Epoch 33/550
Epoch 34/550
Epoch 35/550
Epoch 36/550
Epoch 37/550
Epoch 38/550
Epoch 39/550
Epoch 40/550
Epoch 41/550
Epoch 42/550
Epoch 43/550
Epoch 44/550
Epoch 45/550
Epoch 46/550
Epoch 47/550
Epoch 48/550
Epoch 49/550
Epoch 50/550
Epoch 51/550
Epoch 52/550
Epoch 53/550
Epoch 54/550
Epoch 55/550
Epoch 56/550
Epoch 57/550
   19/15025 [..............................] - ETA: 1s - loss: 4.9362

KeyboardInterrupt: 

# Metrics

In [None]:
# Examinig the latent space generated by the encoder
X_encoded = encoder.predict(X_test)

pca = PCA(n_components=2)
X_analysis = pca.fit_transform(X_encoded)

In [None]:
plt.figure(figsize=(8, 8))
sns.scatterplot(x = X_analysis[:, 0], y = X_analysis[:, 1], s = 20, hue = Y_pred)
plt.grid()
plt.savefig("Results/PCA/Botiot")
plt.show()

# Orange ones are anomalies

In [None]:
plt.figure(figsize = (8, 8))
sns.scatterplot(x = X_analysis[:, 0], y = X_analysis[:, 1], s = 20, hue = Y_test)
plt.grid()
plt.savefig("Results/PCA Groundtruth/Botiot")
plt.show()

# Ground truth

In [None]:
conf_matrix = confusion_matrix(Y_test, Y_pred)

fig, ax = plt.subplots(figsize = (5, 5))
ax.matshow(conf_matrix, cmap = plt.cm.Blues, alpha = 0.3)
for i in range(conf_matrix.shape[0]):
    for j in range(conf_matrix.shape[1]):
        ax.text(x = j, y = i,s = conf_matrix[i, j], va = 'center', ha = 'center', size = 'large')
 
plt.xlabel('Predictions', fontsize=12)
plt.ylabel('Actuals', fontsize=12)
plt.title('Confusion Matrix', fontsize=14)
plt.savefig("Results/Confusion Matrix/Botiot")
plt.show()