# Sandbox environment for testing WGAN-GP on timeseries data

## Import libraries and configure Jupyter Notebook environment 

In [None]:
# Import lib and config notebook with `DataAnalyst package`
#from DataAnalyst.utils.libraries import * #load all packages into JN
from DataAnalyst import IPythonConfig
IPythonConfig.config_jedi()
IPythonConfig.set_cwd()
IPythonConfig.set_autoreload()
IPythonConfig.config_libs()

# Tensorflow
import tensorflow as tf 
import tensorflow.keras as tfk

# Sklearn
from sklearn.model_selection import train_test_split

Working directory: C:\Users\ilias\Desktop\Stress Testing with GANs-Gdrive\Stress Testing with GANs\IRRBB

In [2]:
# Import in-house modules
# -------------------------
from src.mdl import GANBase
from src.utils.nnvis import GANvis
from src.utils.toydata_generator import dataGenerator
from src.utils.parser import Parser

# Test out Generator

## Generate the "real" dataset

The real dataset is a bivariate `Gaussian` dataset with samples being drawn from:
$$
N~(\mu = [10, 10], \begin{bmatrix}
                    1 & 0 \\
                    0 & 1
                    \end{bmatrix})
$$

In [31]:
# Create "real" dataset
n, v= 1000, 2
distribution= "multivariate Gaussian"
mu, std= [2, 2], [[1, 0.7], [0.7, 1]]
Pr = dataGenerator.generate_data(n, v, distribution, loc= mu, scale= std)
Pr.head()

Unnamed: 0,feature_1,feature_2
0,2.94,2.18
1,2.57,2.26
2,1.7,1.19
3,1.19,0.63
4,3.34,3.4


## Drawing from our untrained `Generator` 

Let's now initialize our `GAN` model and draw samples from the untrained `Generator` and compare the samples with the ones coming from the real dataset.

We see that the original dataset has an elliptical distribution.

text ...

In [32]:
# Init base class of GAN
gan= GANBase(Pr)
gan.init_discriminator(output_activation=tfk.activations.sigmoid)
gan.init_generator()
# Sample real and generated data
Pr_sampled= gan.generate_data('Pr', n= 1000, as_df= True)
Pg_sampled= gan.generate_data('Pg', n= 1000, as_df= True)
# Plot samples
GANvis.plot_scatter(data= (Pr_sampled, Pg_sampled)).show(config= GANvis.config)

# Test out Discriminator

In [33]:
# Parse data into an (X, y) tuple
X, y= Parser.to_ml_tuple(data= (Pr_sampled, Pg_sampled))
# Compute and plot decision boundary of the Discriminator
fig = GANvis.plot_decision_boundary(data= (X, y) , mdl= gan.D).show(config=GANvis.config)

In [34]:
# Train the Discriminator now for a couple of epochs
# Create train and test split 
xtrain, xtest, ytrain, ytest= train_test_split(X, y, test_size= 0.25, random_state=0)
# Train Discriminator
gan.D.compile(loss='BinaryCrossentropy', metrics= ['Accuracy'])
gan.D.fit(x= xtrain, y= ytrain, epochs= 10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x2552eb57c40>

# Test out WGAN-GP 

In [36]:
from src.model.wgan_gp import WGANGP

In [41]:
wgan = WGANGP(D= gan.D, G= gan.G)

In [49]:
type(Pr_sampled)

pandas.core.frame.DataFrame

In [93]:
wgan.get_GP(Pr_sampled.iloc[:10, :], Pg_sampled.iloc[:10, :]).numpy()

0.9008168

In [84]:
gan.D.summary()

Model: "Discriminator"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_layer (InputLayer)    [(None, 2)]               0         
                                                                 
 hidden_layer_1 (Dense)      (None, 64)                192       
                                                                 
 hidden_layer_2 (Dense)      (None, 32)                2080      
                                                                 
 hidden_layer_3 (Dense)      (None, 16)                528       
                                                                 
 output_layer (Dense)        (None, 1)                 17        
                                                                 
Total params: 2,817
Trainable params: 2,817
Non-trainable params: 0
_________________________________________________________________
