# <center> Vanilla Generative Adversarial Network for NLS-KDD <center/>

In [89]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

import preprocessing
from classifiers import *
from utils import *
from matplotlib import pyplot as plt
%matplotlib inline
import tensorflow as tf
from tensorflow.keras.layers import Dense, Input, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K
from scipy.stats import norm
from tensorflow.keras.losses import kullback_leibler_divergence
# K.clear_session()

## Read Data & Standard Scale

In [90]:
train,test, label_mapping = preprocessing.get_data(encoding="Label")
x_train,y_train = train.drop("label",axis=1),train.label.values
x_test , y_test =  test.drop("label",axis=1),test.label.values

Scaler = StandardScaler()
x_train = Scaler.fit_transform(x_train)
x_test = Scaler.transform(x_test)

## Define Generator, Descriminator & Full Generative Adversarial Network

In [5]:
def create_discriminator(data_dim, min_num_neurones):
    model = tf.keras.models.Sequential(name='Discriminator')
    
    model.add(Dense(min_num_neurones*2, activation='relu',input_dim = data_dim ))
    #model.add(Dropout(0.2))
    model.add(Dense(min_num_neurones, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    
    model.compile(loss='binary_crossentropy', optimizer="sgd")
    
    return model

# d = create_discriminator(3,3)
# d.summary()

In [6]:
def create_generator(data_dim, min_num_neurones,noise_dim):
    
    model = tf.keras.models.Sequential(name='Generator')
    
    model.add(Dense(min_num_neurones, activation='relu',input_dim = noise_dim ))
    model.add(Dense(min_num_neurones*2, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(min_num_neurones*4, activation='tanh'))
    #model.add(Dropout(0.2))
    model.add(Dense(data_dim))
    
    model.compile(loss='binary_crossentropy', optimizer="sgd")
    
    return model
# g = create_generator(3,3,3)
# g.summary()

In [None]:
def create_gan(discriminator, generator, z_dim):
    discriminator.trainable=False
    gan_input = Input(shape=(z_dim,))
    #x = generator(gan_input)
    #gan_output= discriminator(x)
    
    gan_output = discriminator(generator(gan_input))
    gan= Model(inputs = gan_input, outputs = gan_output)
    gan.compile(loss='binary_crossentropy', optimizer='sgd')
    return gan
# gan = create_gan(d,g,3)
# gan.summary()

## Define batch generation & GAN training 

In [8]:
def get_batch(X, batch_size=1):
    """
    Parameters:
    -----------
    X : ndarray
        The input data to sample a into batch
    size : int (default = 1)
        Batch size

    Return Value: ndarray - random choice of samples from the input X of batch_size
    """
    batch_ix = np.random.choice(len(X), batch_size, replace=False)
    return X[batch_ix]

In [9]:
def training(arguments,X):
    
    [rand_noise_dim, nb_steps, batch_size,D_epochs, G_epochs, min_num_neurones] = arguments
    
    data_dim = X.shape[1]
    combined_loss, disc_loss_generated, disc_loss_real = [], [], []
    
    # Creating GAN
    generator = create_generator(data_dim,min_num_neurones,rand_noise_dim)
    discriminator = create_discriminator(data_dim,min_num_neurones)
    adversarial_model = create_gan(discriminator, generator,rand_noise_dim)
    
    #Start training
    for epoch in range(1,nb_steps + 1 ):
        K.set_learning_phase(1)
        
        #Train Discriminator
        discriminator.trainable=True
        for i in range(D_epochs):
            np.random.seed(i+epoch)
        
            noise = np.random.normal(0,1, size=(batch_size, rand_dim))
            generated_samples = generator.predict(noise)
            real_samples = get_batch(X,batch_size)
            
            d_l_r = discriminator.train_on_batch(real_samples, np.random.uniform(low=0.999, high=1.0, size=batch_size))
            d_l_g = discriminator.train_on_batch(generated_samples, np.random.uniform(low=0.0, high=0.0001, size=batch_size))
        
        #Freeze Discriminator
        print("D lr : ".format(discriminator.optimizer.get_config()))
        discriminator.trainable = False
        disc_loss_generated.append(d_l_g)
        disc_loss_real.append(d_l_r)
        
        #Train Generator
        for i in range(G_epochs):
            np.random.seed(i+epoch)
            
            noise = np.random.normal(0,1, size = (batch_size, rand_dim))
            loss = adversarial_model.train_on_batch(noise, np.random.uniform(low=0.999, high=1.0, size=batch_size))
            print("GAN lr : ".format(discriminator.optimizer.get_config()))
        combined_loss.append(loss)
        
        #Do checkpointing
        if epoch % 10 == 0:
            K.set_learning_phase(0)
            test_size = len(X)

            z = np.random.normal(3,2,size=(test_size, rand_dim))
            g_z = generator.predict(z)
            
            '''
            p = norm.pdf(X.T)
            q = norm.pdf(g_z.T)

            norm_p = p/p.sum(axis=1,keepdims=1)
            norm_q = q/q.sum(axis=1,keepdims=1)

            tf_kl = kullback_leibler_divergence(tf.convert_to_tensor(norm_p, np.float32), tf.convert_to_tensor(norm_q, np.float32))
            with tf.Session() as sess:
                print("Tensorflow kullback_leibler_divergence : {}".format(round(sum(sess.run(tf_kl)))))

            print("Ephoc : {} ,Loss on fake: {}, Loss on real : {}".format(epoch,d_l_g, d_l_r))
            '''
            fake_pred = np.array(adversarial_model.predict(z)).ravel()
            real_pred = np.array(discriminator.predict(X)).ravel()

            modelAccuracy(fake_pred,real_pred)
        
    return dict({"generator_model":generator,"discriminator_model":discriminator,\
            "combined_model":adversarial_model,"generator_loss":combined_loss,\
            "disc_loss_generated":disc_loss_generated,"disc_loss_real": disc_loss_real})
        

## Filter Train samples and set training parameters

In [10]:
#Generative Adversarial Networks
att_ind = np.where(train.label != label_mapping["normal"])[0]

x = x_train[att_ind]
n_to_generate = 2000

rand_dim = 32
base_n_count = 100

combined_ep = 100
batch_size = 128 if len(x) > 128 else len(x)

ep_d = 1
ep_g = 2
learning_rate = 0.0001#5e-5

## Training GAN

In [11]:
arguments = [rand_dim, combined_ep, batch_size, ep_d,ep_g, base_n_count]
res = training(arguments,x)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use tf.cast instead.
Discriminator accuracy on Fake : 0.5300699300699301, Real : 0.906634828586048
Discriminator accuracy on Fake : 0.029046563192904655, Real : 0.9469895957700836
Discriminator accuracy on Fake : 0.0005457956677468873, Real : 0.9595258400136449
Discriminator accuracy on Fake : 1.7056114617090227e-05, Real : 0.9671158110182501
Discriminator accuracy on Fake : 0.0, Real : 0.9674398771959748
Discriminator accuracy on Fake : 5.116834385127068e-05, Real : 0.9673375405082723
Discriminator accuracy on Fake : 0.0006993006993006993, Real : 0.9685314685314685
Discriminator accuracy on Fake : 0.0008528057308545113, Real : 0.9743134913866621
Discriminator accuracy on Fake : 0.00040934675081016544, Real : 0.9740747057820228
Discriminator accuracy on Fake : 0.0, Real : 0.9

KeyboardInterrupt: 

In [None]:
res["combined_model"].summary()