In [10]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers
import matplotlib.pyplot as plt
import os
from PIL import Image, ImageDraw
import imageio

In [11]:
# Create a directory to save images
if not os.path.exists('gan_images'):
    os.makedirs('gan_images')

# Data Preprocessing

### **Normalization**: The pixel values are scaled to the range [0, 1] for better training. Normalization helps in speeding up the convergence during training.

### **Reshape**: The images are reshaped to include a channel dimension (for grayscale images), making it compatible with the neural network input.

In [12]:
# Load MNIST dataset
(X_train, _), (_, _) = tf.keras.datasets.mnist.load_data()
X_train = (X_train.astype(np.float32) - 127.5) / 127.5  # Normalize to [-1, 1]
X_train = np.expand_dims(X_train, axis=-1)

# Build the Generator Model

### **Dense Layers:** The generator takes a random noise vector of size 100 as input and generates a 28x28 image. The first layer transforms the noise into a larger representation.

### **Activation Functions:** ReLU (Rectified Linear Unit) is used for the hidden layer, and sigmoid is used for the output layer to ensure pixel values are in the [0, 1] range.

In [13]:
def build_generator(latent_dim=50):
    model = tf.keras.Sequential()
    model.add(layers.Dense(256, input_dim=latent_dim))
    model.add(layers.LeakyReLU(alpha=0.2))
    model.add(layers.BatchNormalization(momentum=0.8))
    model.add(layers.Dense(512))
    model.add(layers.LeakyReLU(alpha=0.2))
    model.add(layers.BatchNormalization(momentum=0.8))
    model.add(layers.Dense(1024))
    model.add(layers.LeakyReLU(alpha=0.2))
    model.add(layers.BatchNormalization(momentum=0.8))
    model.add(layers.Dense(28 * 28 * 1, activation='tanh'))
    model.add(layers.Reshape((28, 28, 1)))
    return model

generator = build_generator(latent_dim=50)
generator.summary()

Model: "sequential_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_28 (Dense)            (None, 256)               13056     
                                                                 
 leaky_re_lu_20 (LeakyReLU)  (None, 256)               0         
                                                                 
 batch_normalization_12 (Bat  (None, 256)              1024      
 chNormalization)                                                
                                                                 
 dense_29 (Dense)            (None, 512)               131584    
                                                                 
 leaky_re_lu_21 (LeakyReLU)  (None, 512)               0         
                                                                 
 batch_normalization_13 (Bat  (None, 512)              2048      
 chNormalization)                                     

# Build the Discriminator Model

### **Flatten Layer:** The discriminator processes images by flattening them into a 1D vector before feeding them into fully connected layers.

### **Output Layer:** The output is a single value between 0 and 1, representing the probability that the input image is real.

In [14]:
# Discriminator model
def build_discriminator():
    model = tf.keras.Sequential()
    model.add(layers.Flatten(input_shape=(28, 28, 1)))
    model.add(layers.Dense(512))
    model.add(layers.LeakyReLU(alpha=0.2))
    model.add(layers.Dense(256))
    model.add(layers.LeakyReLU(alpha=0.2))
    model.add(layers.Dense(1, activation='sigmoid'))
    return model

discriminator = build_discriminator()
discriminator.summary()

Model: "sequential_9"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_4 (Flatten)         (None, 784)               0         
                                                                 
 dense_32 (Dense)            (None, 512)               401920    
                                                                 
 leaky_re_lu_23 (LeakyReLU)  (None, 512)               0         
                                                                 
 dense_33 (Dense)            (None, 256)               131328    
                                                                 
 leaky_re_lu_24 (LeakyReLU)  (None, 256)               0         
                                                                 
 dense_34 (Dense)            (None, 1)                 257       
                                                                 
Total params: 533,505
Trainable params: 533,505
Non-tr

# Compile the Models & Train the Model

### **Loss Function:** Binary cross-entropy is used because it is a binary classification problem (real vs. fake).

### **Optimizer:** Adam optimizer is chosen for its efficiency in training deep learning models.

### **Epochs:** The GAN is trained for a set number of epochs, with each epoch consisting of generating images and training the models.

### **Noise Generation:** Random noise is generated for the generator to create fake images.

In [15]:
def save_imgs(epoch, latent_dim, generator, image_grid_rows=4, image_grid_cols=4):
    noise = np.random.normal(0, 1, (image_grid_rows * image_grid_cols, latent_dim))
    gen_imgs = generator.predict(noise)

    # Rescale images 0 - 1
    gen_imgs = 0.5 * gen_imgs + 0.5

    fig, axs = plt.subplots(image_grid_rows, image_grid_cols, figsize=(4, 4))
    cnt = 0
    for i in range(image_grid_rows):
        for j in range(image_grid_cols):
            axs[i, j].imshow(gen_imgs[cnt, :, :, 0], cmap='gray')
            axs[i, j].axis('off')
            cnt += 1
    fig.savefig(f"gan_images/mnist_{epoch}.png")
    plt.close()


In [16]:
def build_gan(generator, discriminator, latent_dim=50, optimizer='adam'):
    if optimizer == 'rmsprop':
        opt = tf.keras.optimizers.legacy.RMSprop(learning_rate=0.0002)
    elif optimizer == 'sgd':
        opt = tf.keras.optimizers.legacy.SGD(learning_rate=0.0002)
    else:
        opt = tf.keras.optimizers.legacy.Adam(learning_rate=0.0002)
    
    discriminator.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])
    discriminator.trainable = False
    
    gan_input = layers.Input(shape=(latent_dim,))
    generated_img = generator(gan_input)
    gan_output = discriminator(generated_img)
    
    gan = tf.keras.Model(gan_input, gan_output)
    gan.compile(optimizer=opt, loss='binary_crossentropy')
    
    return gan

optimizer_choice = 'rmsprop'  # Change this to 'sgd' or 'adam'

# Build models
latent_dim = 50
generator = build_generator(latent_dim)
discriminator = build_discriminator()
gan = build_gan(generator, discriminator, latent_dim, optimizer=optimizer_choice)

# Training function
def train(epochs, batch_size=128, save_interval=1000, latent_dim=50):
    real = np.ones((batch_size, 1))
    fake = np.zeros((batch_size, 1))

    for epoch in range(epochs):
        idx = np.random.randint(0, X_train.shape[0], batch_size)
        real_imgs = X_train[idx]

        noise = np.random.normal(0, 1, (batch_size, latent_dim))
        gen_imgs = generator.predict(noise)

        d_loss_real = discriminator.train_on_batch(real_imgs, real)
        d_loss_fake = discriminator.train_on_batch(gen_imgs, fake)
        d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

        g_loss = gan.train_on_batch(noise, real)

        if epoch % save_interval == 0:
            print(f"{epoch} [D loss: {d_loss[0]}, acc.: {100 * d_loss[1]}%] [G loss: {g_loss}]")
            save_imgs(epoch, latent_dim, generator) 
            
# Train the GAN for 10,000 epochs
train(epochs=10000, batch_size=64, save_interval=1000, latent_dim=50)

0 [D loss: 0.7899618744850159, acc.: 17.96875%] [G loss: 0.6991278529167175]












1000 [D loss: 0.539291262626648, acc.: 75.0%] [G loss: 1.530820608139038]












2000 [D loss: 0.6639774143695831, acc.: 62.5%] [G loss: 1.2190728187561035]














3000 [D loss: 0.6552568674087524, acc.: 60.9375%] [G loss: 1.1305855512619019]












4000 [D loss: 0.6791459619998932, acc.: 60.9375%] [G loss: 1.0426626205444336]












5000 [D loss: 0.686917245388031, acc.: 53.125%] [G loss: 1.0290558338165283]














6000 [D loss: 0.6838889122009277, acc.: 54.6875%] [G loss: 1.0130226612091064]












7000 [D loss: 0.6776703000068665, acc.: 53.90625%] [G loss: 1.0519092082977295]












8000 [D loss: 0.6584527790546417, acc.: 57.8125%] [G loss: 0.9860671162605286]














9000 [D loss: 0.6707321405410767, acc.: 54.6875%] [G loss: 1.0305590629577637]














# Image Generation and Visualization

In [19]:
def create_gif():
    images = []
    for epoch in range(0, 10001, 1000):
        filename = f"gan_images/mnist_{epoch}.png"
        if os.path.exists(filename):
            images.append(imageio.imread(filename))
        else:
            print(f"Warning: {filename} does not exist.")
    
    if images:
        imageio.mimsave('gan_training.gif', images, fps=1)
    else:
        print("No images to create GIF.")


# Impact of Batch Size on GAN Training:

### Small Batch Size (e.g., 16, 32):

Faster Updates: Small batch sizes lead to more frequent updates, which can help the model learn quickly at the start of training.

Noisier Gradients: The smaller the batch size, the noisier the gradients during training, which may lead to more variability in performance between epochs.

Potential Instability: With GANs, small batch sizes can sometimes lead to instability, as both the generator and discriminator struggle to adapt to each other's changing behavior. This can result in unstable images or mode collapse.

### Medium Batch Size (e.g., 64, 128):

Balance Between Stability and Update Frequency: A medium batch size is often a good trade-off. It gives the model enough data to calculate a more accurate gradient, while still updating reasonably frequently.

Typical for GANs: Many GAN implementations use medium batch sizes, as they provide a stable learning process without causing large memory overhead.

### Large Batch Size (e.g., 256, 512):

Slower Updates: Larger batch sizes result in fewer updates per epoch, which can make the learning process slower.

More Stable Training: Larger batches provide more accurate gradient estimates, which can stabilize training, but at the cost of slower convergence. It also requires more memory and may reduce the diversity of updates in the early stages.

Less Noise: Larger batches reduce the variability in gradient estimates, resulting in smoother learning curves but potentially slower exploration of the parameter space.