Creating a GAN based of the example from this blog post: https://blog.paperspace.com/implementing-gans-in-tensorflow/

Working with a single column in the data for experimental purposes. (i.e. Heart Rate)

In [6]:
# Import libraries
import tensorflow as tf
import pandas as pd
import numpy as np

from sklearn.preprocessing import MinMaxScaler

In [5]:
# Load the data ie load only the heart rate column and use 10,000 observations
data = pd.read_csv('final.csv', usecols= ['Heart Rate'], nrows= 10000)

data = data.values # Convert to numpy array for TensorFlow compatibility

# Normalize the data 
scaler = MinMaxScaler()
data = scaler.fit_transform(data)

# Define batch size - number of training samples used in a single iteration
batch_size = 64

# Function to randomly select a batch of samples from the dataset for training.
def sample_data(n= batch_size):
    """
    Inputs n: The number of samples to be drawn in a single batch. It defaults to batch_size.

    Uses np.random.randint to generate n random integers between 0 and the total number of 
    samples in the dataset (data.shape[0])

    Purpose: provide a random batch of samples from the dataset during each training iteration. 
    This randomness helps in training the model more effectively by exposing it to different parts of the dataset in each iteration, 
    promoting better generalization and reducing overfitting.

    """
    indices = np.random.randint(0, data.shape[0], n)
    return data[indices]

In [7]:
# Generator Network
def generator(Z, hsize = [16, 16], reuse = False):
    """ 
    Inputs: Z - placeholder for random input samples, hsize - List defining the number of units in hidden layers
    reuse - Boolean to reuse the same layers

    Layers: h1 - first hidden layer with leaky ReLU activation, h2- second hidden layer with leaky ReLU activation
    out - output layer generating a 2D vector

    This function creates a fully connected neural network with two hidden layers and outputs a 2D vector, 
    matching the dimensions of the real dataset. The goal is for the generator to learn the distribution of the real data.
    """
    with tf.compat.v1.variable_scope("GAN/Generator",reuse=reuse):
        h1 = tf.keras.layers.Dense(hsize[0], activation=tf.nn.leaky_relu)(Z)
        h2 = tf.keras.layers.Dense(hsize[1], activation=tf.nn.leaky_relu)(h1)
        out = tf.keras.layers.Dense(2)(h2)

    return out

In [8]:
# Discrimnator Network
def discriminator(X, hsize = [16, 16], reuse = False):
    """ 
    Inputs: X - placeholder for input samples (real or generated), hsize - List defining the number of units in hidden layers
    reuse - Boolean to reuse the same layers

    Layers: h1 - first hidden layer with leaky ReLU activation, h2- second hidden layer with leaky ReLU activation,
    h3: Third hidden layer, fixed to 2 units for 2D visualization, out - Output layer generating a logit prediction.

    The discriminator evaluates whether the input samples are real or generated. The output consists of 
    logit prediction: indicates the probability of the input being real and h3 Output: Feature transformation learned by the discriminator, 
    useful for visualization
    """
    with tf.compat.v1.variable_scope("GAN/Discriminator",reuse=reuse):
        h1 = tf.keras.layers.Dense(hsize[0], activation=tf.nn.leaky_relu)(X)
        h2 = tf.keras.layers.Dense(hsize[1], activation=tf.nn.leaky_relu)(h1)
        h3 = tf.keras.layers.Dense(2)(h2)
        out = tf.keras.layers.Dense(1)(h3)

    return out, h3

In [10]:
# Paceholders X and Z for real samples and random noise
tf.compat.v1.disable_eager_execution() 

num_features = data.shape[1] # Determines number of features in the data
X = tf.compat.v1.placeholder(tf.float32, [None, num_features]) # X: placeholder for real data
Z = tf.compat.v1.placeholder(tf.float32, [None, 100]) # Z: placeholder for random noise

In [11]:
# Graph for generating samples from Generator Network and feeding real and generated data to Discriminator
"""
G_sample : Output from the Generator network, generated from random noise (Z)
r_logits : Logit predictions for real samples X
r_rep : Feature representation from the Discriminator for real samples.
f_logits : Logit predictions for generated samples G_sample
g_rep : Feature representation from the Discriminator for generated samples

This set up creates a computational graph where:

** The Generator creates fake samples from random noise.
** The Discriminator evaluates both real and fake samples.
** The Discriminator's parameters are reused to ensure consistent training.
"""

# Generate samples from the Generator network
G_sample = generator(Z)

# Get logits and feature representation for real samples from the Discriminator network
r_logits, r_rep = discriminator(X)

# Get logits and feature representation for generated samples, reusing the Discriminator network
f_logits, g_rep = discriminator(G_sample, reuse = True)

In [12]:
# Define loss functions for generator and discriminator
"""
Discriminator loss: 
- Compares real samples' logits (r_logits) to a tensor of ones (indicating real samples).
- Compares generated samples' logits (f_logits) to a tensor of zeros (indicating fake samples).
- The mean of both losses represents how well the Discriminator distinguishes real from fake samples.

Generator loss:
- Compares generated samples' logits (f_logits) to a tensor of ones 
(indicating the Generator's goal to fool the Discriminator into thinking the samples are real).
-  The mean of this loss measures how well the Generator fools the Discriminator.
"""
# Discriminator Loss
disc_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits = r_logits, labels = tf.ones_like(r_logits))+ 
                           tf.nn.sigmoid_cross_entropy_with_logits(logits = f_logits, labels = tf.zeros_like(f_logits)))

# Generator Loss
gen_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits = f_logits, labels = tf.ones_like(f_logits)))

In [13]:
# Define optimisers for the networks using RMSProprOptimizer
"""
Purpose: Optimizers are crucial for updating the network weights to minimize the loss functions.
By specifying the var_list, we ensure that each optimizer only updates the variables of the respective network.

This setup allows the GAN to train both networks in an adversarial manner, 
improving the Generator's ability to create realistic data and the Discriminator's ability to distinguish real from fake data.
"""
# Fetch variables for Generator and Discriminator
gen_vars = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.GLOBAL_VARIABLES, scope = "GAN/Generator")
disc_vars = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.GLOBAL_VARIABLES, scope = "GAN/Discriminator")

# Define RMSProp Optimizer for both networks
gen_step = tf.compat.v1.train.RMSPropOptimizer(learning_rate = 0.001).minimize(gen_loss, var_list = gen_vars) # G Train step
disc_step = tf.compat.v1.train.RMSPropOptimizer(learning_rate = 0.001).minimize(disc_loss, var_list = disc_vars) # D Train step



Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


In [15]:
# Train both the networks in an alternating way.
sess = tf.compat.v1.Session()
tf.compat.v1.global_variables_initializer().run(session = sess)

for i in range(100001):
    X_batch = sample_data(n=batch_size)  # Fetch real data samples
    Z_batch = np.random.uniform(-1, 1, size=[batch_size, 100])  # Generate random noise

    # Train the Discriminator
    _, dloss = sess.run([disc_step, disc_loss], feed_dict={X: X_batch, Z: Z_batch})

    # Train the Generator
    _, gloss = sess.run([gen_step, gen_loss], feed_dict={Z: Z_batch})

    if i % 1000 == 0:  # Print every 1000 iterations
        print("Iterations: %d\t Discriminator loss: %.4f\t Generator loss: %.4f" % (i, dloss, gloss))


Iterations: 0	 Discriminator loss: 1.3720	 Generator loss: 0.6845
Iterations: 1000	 Discriminator loss: 0.0000	 Generator loss: 16.6114
Iterations: 2000	 Discriminator loss: 0.0000	 Generator loss: 17.9515
Iterations: 3000	 Discriminator loss: 0.0000	 Generator loss: 18.5246
Iterations: 4000	 Discriminator loss: 0.0000	 Generator loss: 18.8943
Iterations: 5000	 Discriminator loss: 0.0000	 Generator loss: 19.1676
Iterations: 6000	 Discriminator loss: 0.0000	 Generator loss: 19.3847
Iterations: 7000	 Discriminator loss: 0.0000	 Generator loss: 19.5646
Iterations: 8000	 Discriminator loss: 0.0000	 Generator loss: 19.7183
Iterations: 9000	 Discriminator loss: 0.0000	 Generator loss: 19.8523
Iterations: 10000	 Discriminator loss: 0.0000	 Generator loss: 19.9711
Iterations: 11000	 Discriminator loss: 0.0000	 Generator loss: 20.0780
Iterations: 12000	 Discriminator loss: 0.0000	 Generator loss: 20.1751
Iterations: 13000	 Discriminator loss: 0.0000	 Generator loss: 20.2639
Iterations: 14000	 D