In [2]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import os
from keras.layers.merge import _Merge, concatenate, dot
from keras.layers.normalization import BatchNormalization
from matplotlib import colors as mcol
from keras.layers.advanced_activations import LeakyReLU
from keras.optimizers import Adam
from keras import backend as K
from matplotlib.colors import LogNorm 
from functools import partial
from keras.models import Model, Sequential, model_from_json
from keras.layers import (
    Input,
    Dense,
    Reshape,
    Flatten,
    Dropout,
    ActivityRegularization,
    Lambda,
    Concatenate,
    Permute,
    Convolution1D,
    MaxPooling1D,
    AveragePooling1D,
    GlobalAveragePooling1D,
)



# Setting up Environment varialbes
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
IMAGE_DIR_PATH = "gallery/"

In [3]:
data = np.random.randn(2000,4)
data.shape

(2000, 4)

In [4]:
def MMD_loss(x, y):
    """
    This loss function matches the true distribution to the generated ones.
    x = true
    y = fake
    
    """
    
    sigma = 0.1
    x1 = x[:HALF_BATCH, :]
    x2 = x[HALF_BATCH:, :]
    y1 = y[:HALF_BATCH, :]
    y2 = y[HALF_BATCH:, :]  
    
    x1_x2 = K.sum(K.exp(sigma/((x1-x2)*(x1-x2)+sigma)))/HALF_BATCH
    y1_y2 = K.sum(K.exp(sigma/((y1-y2)*(y1-y2)+sigma)))/HALF_BATCH
    x_y = K.sum(K.exp(sigma/((x-y)*(x-y)+sigma)))/BATCH_SIZE
    
    return (x1_x2 + y1_y2 - 2*x_y)*(x1_x2 + y1_y2 - 2*x_y) 

In [9]:
def wasserstein_loss(y_true, y_pred): 
    
    """Calculates the Wasserstein loss for a sample batch.
    The Wasserstein loss function is very simple to calculate. In a standard GAN, the
    discriminator has a sigmoid output, representing the probability that samples are
    real or generated. In Wasserstein GANs, however, the output is linear with no
    activation function! Instead of being constrained to [0, 1], the discriminator wants
    to make the distance between its output for real and generated samples as
    large as possible.
    The most natural way to achieve this is to label generated samples -1 and real
    samples 1, instead of the 0 and 1 used in normal GANs, so that multiplying the
    outputs by the labels will give you the loss immediately.
    Note that the nature of this loss means that it can be (and frequently will be)
    less than 0."""
    
    return K.mean(y_true * y_pred)

In [10]:
def gradient_penalty_loss(y_true, y_pred,
                          averaged_samples, gradient_penalty_weight):
    gradients = K.gradients(y_pred, averaged_samples)[0]
    gradients_sqr = K.square(gradients)
    gradients_sqr_sum = K.sum(
        gradients_sqr, axis=np.arange(1, len(gradients_sqr.shape))
    )
    gradient_l2_norm = K.sqrt(gradients_sqr_sum)
    gradient_penalty = gradient_penalty_weight * K.square(1 - gradient_l2_norm)
    return K.mean(gradient_penalty)


class RandomWeightedAverage(_Merge):
    """Takes a randomly-weighted average of two tensors. In geometric terms,
    this outputs a random point on the line between each pair of input points.
    """
    def _merge_function(self, inputs):
        weights = K.random_uniform((BATCH_SIZE, 1))
        return (weights * inputs[0]) + ((1 - weights) * inputs[1])


In [11]:
# def add_feature(x):
    
#     px = x[:, 0:1]
#     py = x[:, 1:2]
#     pz = x[:, 2:3]
#     energy = K.sqrt( (px * px) + (py * py) + (pz * pz) )

#     return energy

In [12]:
def make_generator():
    visible = Input(shape=(100,))
    hidden1 = Dense(512)(visible)
    LR = LeakyReLU()(hidden1)
    hidden2 = Dense(512)(LR)
    LR = LeakyReLU()(hidden2)
    hidden3 = Dense(512)(LR)
    LR = LeakyReLU(alpha=0.2)(hidden3)
    hidden4 = Dense(512)(LR)
    LR = LeakyReLU()(hidden4)
    hidden5 = Dense(512)(LR)
    LR = LeakyReLU()(hidden5)
    output = Dense(4)(LR) # this 4 represents data columns without FAT (extended features)
#     energy = Lambda(add_feature)(output)
#     outputmerge = concatenate([output, energy])
    generator = Model(inputs=visible, outputs=[output])

    return generator

In [13]:
generator = make_generator()
generator.summary()

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 100)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 512)               51712     
_________________________________________________________________
leaky_re_lu_1 (LeakyReLU)    (None, 512)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 512)               262656    
_________________________________________________________________
leaky_re_lu_2 (LeakyReLU)    (None, 512)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 512)               262656    
_________________________________________________________________
leaky_re_lu_3 (LeakyReLU)    (None, 512)               0   

In [14]:
def make_discriminator():
    visible = Input(shape=(4,)) # This is the output vector from the generator/data size
    hidden1 = Dense(512)(visible)
    LR = LeakyReLU()(hidden1)
    DR = Dropout(rate=0.1)(LR)
    hidden2 = Dense(512)(DR)
    LR = LeakyReLU(alpha=0.2)(hidden2)
    DR = Dropout(rate=0.1)(LR)
    hidden3 = Dense(512)(DR)
    LR = LeakyReLU()(hidden3)
    DR = Dropout(rate=0.1)(LR)
    hidden4 = Dense(512)(DR)
    LR = LeakyReLU()(hidden4)
    DR = Dropout(rate=0.1)(LR)
    hidden5 = Dense(512)(DR)
    LR = LeakyReLU()(hidden5)
    DR = Dropout(rate=0.1)(LR)
    output = Dense(1)(DR)
    discriminator = Model(inputs=[visible], outputs=output)

    return discriminator

In [15]:
discriminator = make_discriminator()
discriminator.summary()

Model: "model_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 4)                 0         
_________________________________________________________________
dense_7 (Dense)              (None, 512)               2560      
_________________________________________________________________
leaky_re_lu_6 (LeakyReLU)    (None, 512)               0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_8 (Dense)              (None, 512)               262656    
_________________________________________________________________
leaky_re_lu_7 (LeakyReLU)    (None, 512)               0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 512)               0   

In [16]:
# HALF_BATCH and FULL_BATCH SIZES
HALF_BATCH = 200
BATCH_SIZE = HALF_BATCH * 2

# The training ratio is the number of discriminator updates
TRAINING_RATIO = 5
GRADIENT_PENALTY_WEIGHT = 10


def make_MMD():
    visible = Input(shape=(4,))
    MMD = Model(inputs=visible, output=visible)

    return MMD

In [17]:
generator.summary()

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 100)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 512)               51712     
_________________________________________________________________
leaky_re_lu_1 (LeakyReLU)    (None, 512)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 512)               262656    
_________________________________________________________________
leaky_re_lu_2 (LeakyReLU)    (None, 512)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 512)               262656    
_________________________________________________________________
leaky_re_lu_3 (LeakyReLU)    (None, 512)               0   

In [18]:
# Train the model
EPOCHS = 10
def train_FAT_GAN(data):
    generator = make_generator()
    discriminator = make_discriminator()
    MMD = make_MMD()

    for layer in discriminator.layers:
        layer.trainable = False
    discriminator.trainable = False
    
    generator_input = Input(shape=(100,))
    generator_layers = generator(generator_input)
    discriminator_layers_for_generator = discriminator(generator_layers)
    MMD_Layers_for_generator = MMD(generator_layers)
    
    generator_model = Model(
        inputs=generator_input,
        outputs=[discriminator_layers_for_generator, MMD_Layers_for_generator],
    )
    # We use the Adam paramaters from Gulrajani et al.
    generator_model.compile(
        optimizer=Adam(0.0001, beta_1=0.5, beta_2=0.9),
        loss=[wasserstein_loss, MMD_loss],
    )
    generator_model.summary()

    for layer in discriminator.layers:
        layer.trainable = True
    discriminator.trainable = True
        
        
        
    for layer in generator.layers:
        layer.trainable = False
    generator.trainable = False
    
    
    real_samples = Input(shape=data.shape[1:])
    generator_input_for_discriminator = Input(shape=(100,))
    
    generated_samples_for_discriminator = generator(generator_input_for_discriminator)
    discriminator_output_from_generator = discriminator(generated_samples_for_discriminator)
    discriminator_output_from_real_samples = discriminator(real_samples)

    # We also need to generate weighted-averages of real and generated samples,
    # to use for the gradient norm penalty.
    averaged_samples = RandomWeightedAverage()(
        [real_samples, generated_samples_for_discriminator]
    )

    # We then run these samples through the discriminator as well.
    # Note that we never really use the discriminator output for these samples,
    # we're only running them to get the gradient norm for the gradient
    # penalty loss.
    averaged_samples_out = discriminator(averaged_samples)

    # The gradient penalty loss function requires the input averaged
    # samples to get gradients. However, Keras loss functions can only have
    # two arguments, y_true and y_pred. We get around this by making
    # a partial() of the function with the averaged samples here.
    partial_gp_loss = partial(
        gradient_penalty_loss,
        averaged_samples=averaged_samples,
        gradient_penalty_weight=GRADIENT_PENALTY_WEIGHT,
    )
    # Functions need names or Keras will throw an error
    partial_gp_loss.__name__ = "gradient_penalty"

    # If we don't concatenate the real and generated samples, however,
    # we get three outputs: One of the generated samples, one of the real
    # samples, and one of the averaged samples, all of size
    # BATCH_SIZE. This works neatly!
    discriminator_model = Model(
        inputs=[real_samples, generator_input_for_discriminator],
        outputs=[
            discriminator_output_from_real_samples,
            discriminator_output_from_generator,
            averaged_samples_out,
        ],
    )
    # We use the Adam paramaters from Gulrajani et al. We use the Wasserstein
    # loss for both the real and generated samples, and the gradient penalty
    # loss for the averaged samples
    discriminator_model.compile(
        optimizer=Adam(0.0001, beta_1=0.5, beta_2=0.9),
        loss=[wasserstein_loss, wasserstein_loss, partial_gp_loss],
    )
    discriminator_model.summary()

    # We make three label vectors for training. positive_y is the label
    # vector for real samples, with value 1. negative_y is the label vector
    # for generated samples, with value -1. The dummy_y vector is passed to the
    # gradient_penalty loss function and is not used.
    positive_y = np.ones((BATCH_SIZE, 1), dtype=np.float32)
    negative_y= -positive_y
    dummy_y = np.zeros((BATCH_SIZE, 1), dtype=np.float32)

    # Training FAT-GAN for 200,000 epochs
    for epoch in range(EPOCHS):
        np.random.shuffle(data)
        discriminator_loss = []
        generator_loss = []
        minibatches_size = BATCH_SIZE * TRAINING_RATIO
            
        for i in range(int(data.shape[0] // (BATCH_SIZE * TRAINING_RATIO))):
            discriminator_minibatches = data[
                i * minibatches_size: (i + 1) * minibatches_size
            ]
            noise = np.random.normal(0, 1, [BATCH_SIZE * TRAINING_RATIO, 100])

            for j in range(TRAINING_RATIO):
                image_batch = discriminator_minibatches[
                    j * BATCH_SIZE: (j + 1) * BATCH_SIZE
                ]
                noise_batch = noise[j * BATCH_SIZE: (j + 1) * BATCH_SIZE]

                discriminator_loss.append(
                    discriminator_model.train_on_batch(
                        [image_batch, noise_batch], [positive_y, negative_y, dummy_y] # positive and negative?? check
                    )
                )
                
            noise = np.random.normal(0, 1, [BATCH_SIZE, 100])
            generator_loss.append(
                generator_model.train_on_batch(noise, [positive_y, image_batch])
            )
        print(epoch, generator_loss)

        # save every 1000 epochs
        if epoch % 100 == 0:
            print(epoch)
            SAMPLE_SIZE = data.shape[0]
            noise = np.random.normal(0, 1, [SAMPLE_SIZE, 100])
            results = generator.predict(noise)
            for i in range(4):    
                plt.hist(results[:,i], histtype = 'step', bins = 100)
                plt.hist(data[:,i], histtype = 'step', bins = 100)
                plt.legend(['GAN', 'True'])
                plt.show()
#             generator.save_weights( IMAGE_DIR_PATH + "generator" + str(epoch // 100).zfill(5) + ".h5")

In [20]:
train_FAT_GAN(data)