# Script: GAN_GENERATOR
<h4><span style="color:blue">Juan José Hoyos Urcué</span></h4>

### Dataset and Python libraries Upload

In [None]:
import pandas as pd
import numpy as np
import os
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras import Model
from tensorflow.keras.optimizers import Adam

df = pd.read_excel("../../1-without_data_augmentation/preprocesado.xlsx")

In [None]:
target_class = 0 # change target to generate cure or fail samples
df = df[df["cure_or_Fail"]==target_class]
df = df.drop(['cure_or_Fail'], axis=1)
df.shape

(15, 9)

In [None]:

class GAN():
    
    def __init__(self, gan_args):
        [self.batch_size, lr, self.noise_dim,
         self.data_dim, layers_dim] = gan_args

        self.generator = Generator(self.batch_size).\
            build_model(input_shape=(self.noise_dim,), dim=layers_dim, data_dim=self.data_dim)

        self.discriminator = Discriminator(self.batch_size).\
            build_model(input_shape=(self.data_dim,), dim=layers_dim)

        optimizer = Adam(lr, 0.5)

        # Build and compile the discriminator
        self.discriminator.compile(loss='binary_crossentropy',
                                   optimizer=optimizer,
                                   metrics=['accuracy'])

        # The generator takes noise as input and generate new registers
        z = Input(shape=(self.noise_dim,))
        record = self.generator(z)

        # For the combined model we will only train the generator
        self.discriminator.trainable = False

        # The discriminator takes generated registers as input and determines validity
        validity = self.discriminator(record)

        # The combined model  (stacked generator and discriminator)
        # Trains the generator to fool the discriminator
        self.combined = Model(z, validity)
        self.combined.compile(loss='binary_crossentropy', optimizer=optimizer)

    def get_data_batch(self, train, batch_size, seed=0):
        # iterate through shuffled indices, so every sample gets covered evenly

        start_i = (batch_size * seed) % len(train)
        stop_i = start_i + batch_size
        shuffle_seed = (batch_size * seed) // len(train)
        np.random.seed(shuffle_seed)
        train_ix = np.random.choice(list(train.index), replace=False, size=len(train))  # wasteful to shuffle every time
        train_ix = list(train_ix) + list(train_ix)  # duplicate to cover ranges past the end of the set
        x = train.loc[train_ix[start_i: stop_i]].values
        return np.reshape(x, (batch_size, -1))
        
    def train(self, data, train_arguments):
        [cache_prefix, epochs, sample_interval] = train_arguments
        
        data_cols = data.columns

        # Adversarial ground truths
        valid = np.ones((self.batch_size, 1))
        fake = np.zeros((self.batch_size, 1))

        for epoch in range(epochs):    
            # ---------------------
            #  Train Discriminator
            # ---------------------
            batch_data = self.get_data_batch(data, self.batch_size)
            noise = tf.random.normal((self.batch_size, self.noise_dim))

            # Generate a batch of new registers
            gen_data = self.generator.predict(noise)
    
            # Train the discriminator
            d_loss_real = self.discriminator.train_on_batch(batch_data, valid)
            d_loss_fake = self.discriminator.train_on_batch(gen_data, fake)
            d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
    
            # ---------------------
            #  Train Generator
            # ---------------------
            noise = tf.random.normal((self.batch_size, self.noise_dim))
            # Train the generator (to have the discriminator label samples as valid)
            g_loss = self.combined.train_on_batch(noise, valid)
    
            # Plot the progress
            print("%d [D loss: %f, acc.: %.2f%%] [G loss: %f]" % (epoch, d_loss[0], 100 * d_loss[1], g_loss))
    
            # If at save interval => save generated events
            if epoch % sample_interval == 0:
                #Test here data generation step
                # save model checkpoints
                model_checkpoint_base_name = 'model/' + cache_prefix + '_{}_model_weights_step_{}.h5'
                self.generator.save_weights(model_checkpoint_base_name.format('generator', epoch))
                self.discriminator.save_weights(model_checkpoint_base_name.format('discriminator', epoch))

                #Here is generating the data
                z = tf.random.normal((432, self.noise_dim))
                gen_data = self.generator(z)
                print('generated_data')

    def save(self, path, name):
        assert os.path.isdir(path) == True, \
            "Please provide a valid path. Path must be a directory."
        model_path = os.path.join(path, name)
        self.generator.save_weights(model_path)  # Load the generator
        return
    
    def load(self, path):
        assert os.path.isdir(path) == True, \
            "Please provide a valid path. Path must be a directory."
        self.generator = Generator(self.batch_size)
        self.generator = self.generator.load_weights(path)
        return self.generator
    
class Generator():
    def __init__(self, batch_size):
        self.batch_size=batch_size
        
    def build_model(self, input_shape, dim, data_dim):
        input = Input(shape=input_shape, batch_size=self.batch_size)
        x = Dense(dim, activation='relu')(input)
        x = Dense(dim * 2, activation='relu')(x)
        x = Dense(data_dim)(x)
        return Model(inputs=input, outputs=x)

class Discriminator():
    def __init__(self,batch_size):
        self.batch_size=batch_size
    
    def build_model(self, input_shape, dim):
        input = Input(shape=input_shape, batch_size=self.batch_size)
        x = Dense(dim * 4, activation='relu')(input)
        x = Dense(dim, activation='relu')(x)
        x = Dense(1, activation='sigmoid')(x)
        return Model(inputs=input, outputs=x)

In [None]:
data_cols = df.columns

In [None]:
# training configuration
noise_dim = 11
dim = 10 # reconsiderar unidades por capa --> 
batch_size = 26 # cabe todo en memoria

log_step = 100
epochs = 30000
learning_rate = 5e-4
models_dir = 'model'

In [None]:
#Define the GAN and training parameters
df[data_cols] = df[data_cols]

print(df.shape[1])

gan_args = [batch_size, learning_rate, noise_dim, df.shape[1], dim]
train_args = ['', epochs, log_step]

9


In [None]:
!mkdir model
!mkdir model/gan
!mkdir model/gan/saved

mkdir: cannot create directory ‘model’: File exists
mkdir: cannot create directory ‘model/gan’: File exists
mkdir: cannot create directory ‘model/gan/saved’: File exists


In [None]:
model = GAN

#Training the GAN model chosen: Vanilla GAN, CGAN, DCGAN, etc.
synthesizer = model(gan_args)
synthesizer.train(df, train_args)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
25049 [D loss: 0.351198, acc.: 88.46%] [G loss: 2.389475]
25050 [D loss: 0.376256, acc.: 86.54%] [G loss: 1.559804]
25051 [D loss: 0.403380, acc.: 96.15%] [G loss: 1.725591]
25052 [D loss: 0.417023, acc.: 90.38%] [G loss: 1.935994]
25053 [D loss: 0.450861, acc.: 82.69%] [G loss: 1.194587]
25054 [D loss: 0.403040, acc.: 84.62%] [G loss: 1.827568]
25055 [D loss: 0.414404, acc.: 82.69%] [G loss: 2.164593]
25056 [D loss: 0.458595, acc.: 80.77%] [G loss: 1.780565]
25057 [D loss: 0.459997, acc.: 80.77%] [G loss: 1.359942]
25058 [D loss: 0.457690, acc.: 80.77%] [G loss: 1.709636]
25059 [D loss: 0.425168, acc.: 82.69%] [G loss: 1.227289]
25060 [D loss: 0.428699, acc.: 86.54%] [G loss: 1.638329]
25061 [D loss: 0.481865, acc.: 82.69%] [G loss: 1.883433]
25062 [D loss: 0.428257, acc.: 86.54%] [G loss: 2.236468]
25063 [D loss: 0.395357, acc.: 88.46%] [G loss: 2.206558]
25064 [D loss: 0.423701, acc.: 82.69%] [G loss: 1.287036]
25065 [

In [None]:
!mkdir model/gan

mkdir: cannot create directory ‘model/gan’: File exists


In [None]:
synthesizer.save('model/gan/saved', 'generator_patients')

In [None]:
synthesizer.generator.summary()

Model: "model_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_7 (InputLayer)         [(26, 11)]                0         
_________________________________________________________________
dense_12 (Dense)             (26, 10)                  120       
_________________________________________________________________
dense_13 (Dense)             (26, 20)                  220       
_________________________________________________________________
dense_14 (Dense)             (26, 9)                   189       
Total params: 529
Trainable params: 529
Non-trainable params: 0
_________________________________________________________________


In [None]:
synthesizer.discriminator.summary()

Model: "model_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_8 (InputLayer)         [(26, 9)]                 0         
_________________________________________________________________
dense_15 (Dense)             (26, 40)                  400       
_________________________________________________________________
dense_16 (Dense)             (26, 10)                  410       
_________________________________________________________________
dense_17 (Dense)             (26, 1)                   11        
Total params: 821
Trainable params: 0
Non-trainable params: 821
_________________________________________________________________


In [None]:
models = {'GAN': ['GAN', False, synthesizer.generator]}

In [None]:
# Setup parameters visualization parameters
seed = 17
test_size = 30 # number of fraud cases
noise_dim = 11

np.random.seed(seed)
z = np.random.normal(size=(test_size, noise_dim))

[model_name, with_class, generator_model] = models['GAN']
g_z = generator_model.predict(z)

gen_samples = pd.DataFrame(g_z, columns=data_cols)
gen_samples["cure_or_fail"]=target_class
gen_samples.to_excel("generated{}.xlsx".format(target_class), index = False) # save generated data