In [52]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt


In [99]:
import NSL_KDDdata

x_train , y_train= NSL_KDDdata.train_data()
x_test , y_test = NSL_KDDdata.test_data()

In [100]:
def original_dimension(x_train):
    # print(x_train.shape[1])
    return(x_train.shape[1])

In [101]:
def train_test_shape(x_train, x_test):
    return (x_train.shape, x_test.shape)

In [None]:
print("original dimension of training set: ",train_test_shape(x_train, x_test)[0])
print("original dimension of testing set:  ",train_test_shape(x_train, x_test)[1])
print("original dimension of a data point: ",train_test_shape(x_train, x_test)[1][1:])

In [132]:
#sampling
def sampling(args, latent_dim = 8):
    z_mean, z_log_var = args
    epsilon = tf.keras.backend.random_normal(shape=(tf.shape(z_mean)[0], latent_dim))
    return z_mean + tf.exp(0.5 * z_log_var) * epsilon

In [133]:
def VAE(latent_dim , x_train, epochs, original_dim):
    # Encoder
    encoder_inputs = keras.Input(shape=(original_dim,))
    x = layers.Dense(128, activation="relu")(encoder_inputs)
    z_mean = layers.Dense(latent_dim)(x)
    z_log_var = layers.Dense(latent_dim)(x)

    # Sampling layer - samples mean and log variance of the distribution
    z = layers.Lambda(sampling)([z_mean, z_log_var], latent_dim)

    # Decoder
    decoder_inputs = keras.Input(shape=(latent_dim,))
    x = layers.Dense(128, activation="relu")(decoder_inputs)
    outputs = layers.Dense(original_dim, activation="sigmoid")(x)

    encoder = keras.Model(encoder_inputs, [z_mean, z_log_var, z], name="encoder")
    decoder = keras.Model(decoder_inputs, outputs, name="decoder")

    # Define the VAE model
    outputs = decoder(encoder(encoder_inputs)[2])
    vae = keras.Model(encoder_inputs, outputs, name="vae")
    

    # Reconstruction loss
    reconstruction_loss = keras.losses.binary_crossentropy(encoder_inputs, outputs)
    reconstruction_loss *= original_dim

    # KL loss
    kl_loss = 1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var)
    kl_loss = tf.reduce_sum(kl_loss, axis=-1)
    kl_loss *= -0.5
    vae_loss = tf.reduce_mean(reconstruction_loss + kl_loss)
    vae.add_loss(vae_loss)


    vae.compile(optimizer='adam')
    vae.fit(x_train, epochs = epochs, batch_size=64)

    return encoder, decoder

In [134]:
x_test.shape


(22544, 40)

In [135]:
num_epohs = 20
encoder, decoder = VAE(8 , x_train, num_epohs, original_dimension(x_train))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [136]:
compressed_data, b, c = encoder.predict(x_test)
reconstructed_data = decoder.predict(compressed_data)



In [137]:
compressed_data.shape

(22544, 8)

In [138]:
import random

n = 1

for i in range(n):
    a = random.randint(0, 22544)
    print("Actual: \n", x_test[a], "\nPredicted: \n", reconstructed_data[a], "\nReduced Dimension: \n", compressed_data[a])

Actual: 
 [0.000e+00 1.000e+00 2.050e+02 2.845e+03 0.000e+00 0.000e+00 0.000e+00
 0.000e+00 0.000e+00 1.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 2.000e+00
 2.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 1.000e+00 0.000e+00
 0.000e+00 3.000e+01 2.550e+02 1.000e+00 0.000e+00 3.000e-02 3.000e-02
 0.000e+00 0.000e+00 0.000e+00 0.000e+00 2.100e+01] 
Predicted: 
 [nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan] 
Reduced Dimension: 
 [nan nan nan nan nan nan nan nan]


In [143]:
compressed_train , _, _ = encoder.predict(x_train)
compressed_test , _, _ = encoder.predict(x_test)



In [144]:
import pandas as pd
df = pd.DataFrame(compressed_train)
df.to_csv('data/compressed_data_train.csv')

df2 = pd.DataFrame(compressed_test)
df2.to_csv('data/compressed_data_test.csv')
