# <center> Vanilla Generative Adversarial Network for NLS-KDD <center/>

In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

from preprocessing import *
from classifiers import *
from utils import *

from matplotlib import pyplot as plt
%matplotlib inline

import tensorflow as tf
from tensorflow.keras.layers import Dense, Input, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K


Using TensorFlow backend.


## Read Data & Standard Scale

In [2]:
train,test, label_mapping = get_data(encoding="Label")
data_cols = list(train.columns[ train.columns != 'label' ])
x_train , x_test = preprocess(train,test,data_cols,"Robust",True)

y_train = x_train.label.values
y_test = x_test.label.values

data_cols = list(x_train.columns[ x_train.columns != 'label' ])

## Define Generator, Descriminator & Full Generative Adversarial Network

In [3]:
def create_discriminator(data_dim, min_num_neurones):
    model = tf.keras.models.Sequential(name='Discriminator')
    
    model.add(Dense(min_num_neurones*2, activation='relu',input_dim = data_dim ))
    model.add(Dense(min_num_neurones, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    
    model.compile(loss='binary_crossentropy', optimizer="sgd")
    
    return model


In [4]:
def create_generator(data_dim, min_num_neurones,noise_dim):
    
    model = tf.keras.models.Sequential(name='Generator')
    
    model.add(Dense(min_num_neurones, activation='relu',input_dim = noise_dim ))
    model.add(Dense(min_num_neurones*2, activation='relu'))
    model.add(Dense(min_num_neurones*4, activation='tanh'))
    
    model.add(Dense(data_dim))
    
    model.compile(loss='binary_crossentropy', optimizer="sgd")
    
    return model

In [5]:
def create_gan(discriminator, generator, z_dim):
    discriminator.trainable=False
    gan_input = Input(shape=(z_dim,))
    x = generator(gan_input)
    gan_output= discriminator(x)
    
    #gan_output = discriminator(generator(gan_input))
    gan= Model(inputs = gan_input, outputs = gan_output)
    gan.compile(loss='binary_crossentropy', optimizer='sgd')
    return gan

## Define batch generation & GAN training 

In [6]:
def get_batch(X, batch_size=1):
    """
    Parameters:
    -----------
    X : ndarray
        The input data to sample a into batch
    size : int (default = 1)
        Batch size

    Return Value: ndarray - random choice of samples from the input X of batch_size
    """
    batch_ix = np.random.choice(len(X), batch_size, replace=False)
    return X[batch_ix]

In [7]:
def training(arguments,X):
    
    [rand_noise_dim, nb_steps, batch_size,D_epochs, G_epochs, min_num_neurones] = arguments
    
    data_dim = X.shape[1]
    combined_loss, disc_loss_generated, disc_loss_real = [], [], []
    
    # Creating GAN
    generator = create_generator(data_dim,min_num_neurones,rand_noise_dim)
    discriminator = create_discriminator(data_dim,min_num_neurones)
    adversarial_model = create_gan(discriminator, generator,rand_noise_dim)
    
    #Start training
    for epoch in range(1,nb_steps + 1 ):
        K.set_learning_phase(1)
        
        #Train Discriminator
        discriminator.trainable=True
        for i in range(D_epochs):
            np.random.seed(i+epoch)
        
            noise = np.random.normal(0,1, size=(batch_size, rand_dim))
            generated_samples = generator.predict(noise)
            real_samples = get_batch(X,batch_size)
            
            d_l_r = discriminator.train_on_batch(real_samples, np.random.uniform(low=0.999, high=1.0, size=batch_size))
            d_l_g = discriminator.train_on_batch(generated_samples, np.random.uniform(low=0.0, high=0.0001, size=batch_size))
        
        #Freeze Discriminator
        discriminator.trainable = False
        disc_loss_generated.append(d_l_g)
        disc_loss_real.append(d_l_r)
        
        #Train Generator
        for i in range(G_epochs):
            np.random.seed(i+epoch)
            
            noise = np.random.normal(0,1, size = (batch_size, rand_dim))
            loss = adversarial_model.train_on_batch(noise, np.random.uniform(low=0.999, high=1.0, size=batch_size))
            
        combined_loss.append(loss)
        
        #Do checkpointing
        if epoch % 10 == 0:
            K.set_learning_phase(0)
            test_size = len(X)

            z = np.random.normal(3,2,size=(test_size, rand_dim))
            g_z = generator.predict(z)
            
            '''
            p = norm.pdf(X.T)
            q = norm.pdf(g_z.T)

            norm_p = p/p.sum(axis=1,keepdims=1)
            norm_q = q/q.sum(axis=1,keepdims=1)

            tf_kl = kullback_leibler_divergence(tf.convert_to_tensor(norm_p, np.float32), tf.convert_to_tensor(norm_q, np.float32))
            with tf.Session() as sess:
                print("Tensorflow kullback_leibler_divergence : {}".format(round(sum(sess.run(tf_kl)))))

            print("Ephoc : {} ,Loss on fake: {}, Loss on real : {}".format(epoch,d_l_g, d_l_r))
            '''
            fake_pred = np.array(adversarial_model.predict(z)).ravel()
            real_pred = np.array(discriminator.predict(X)).ravel()

            modelAccuracy(fake_pred,real_pred)
        
    return dict({"generator_model":generator,"discriminator_model":discriminator,\
            "combined_model":adversarial_model,"generator_loss":combined_loss,\
            "disc_loss_generated":disc_loss_generated,"disc_loss_real": disc_loss_real})
        

## Filter Train samples and set training parameters

In [13]:
K.clear_session()
#Generative Adversarial Networks
att_ind = np.where(y_train == label_mapping["probe"])[0]

x = x_train[data_cols].values[att_ind]
n_to_generate = 2000

rand_dim = 32
base_n_count = 100

combined_ep = 1000
batch_size = 128 if len(x) > 128 else len(x)

ep_d = 1
ep_g = 2
learning_rate = 0.0001#5e-5

## Training GAN

In [14]:
arguments = [rand_dim, combined_ep, batch_size, ep_d,ep_g, base_n_count]
res = training(arguments,x)

Discriminator accuracy on Fake : 0.04855875831485588, Real : 0.9216612655637045
Discriminator accuracy on Fake : 0.0961794303257718, Real : 0.9250554323725055
Discriminator accuracy on Fake : 0.8928534879754392, Real : 0.922667576326113
Discriminator accuracy on Fake : 0.8955824663141736, Real : 0.9191881289442265
Discriminator accuracy on Fake : 0.22681221217806583, Real : 0.904929217124339
Discriminator accuracy on Fake : 0.0016714992324748422, Real : 0.8680027289783387
Discriminator accuracy on Fake : 0.0003922906361930752, Real : 0.65650690772642
Discriminator accuracy on Fake : 0.1783728466655296, Real : 0.5787651373017226
Discriminator accuracy on Fake : 0.7988060719768036, Real : 0.4528739553129797
Discriminator accuracy on Fake : 0.999420092103019, Real : 0.0863721644209449
Discriminator accuracy on Fake : 1.0, Real : 0.0
Discriminator accuracy on Fake : 1.0, Real : 0.0
Discriminator accuracy on Fake : 1.0, Real : 0.0
Discriminator accuracy on Fake : 1.0, Real : 0.3308374552276

In [15]:
res["discriminator_model"].summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 200)               7000      
_________________________________________________________________
dense_5 (Dense)              (None, 100)               20100     
_________________________________________________________________
dense_6 (Dense)              (None, 1)                 101       
Total params: 54,402
Trainable params: 27,201
Non-trainable params: 27,201
_________________________________________________________________
