<a href="https://colab.research.google.com/github/ACatapang/cmsc191-final-project/blob/main/GAN_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Library Installation
We install the necessary libraries for GAN.

In [None]:
!pip install tensorflow numpy matplotlib



# Library Import, Dataset, and Generator and Discriminator Definition
We import the needed libraries, download the dataset which is in this case from the CIFAR-10 dataset, and then implement the generator and discriminator components.

## Importing Libraries

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import keras
import tensorflow_datasets as tfds
import tensorflow as tf
import time
import random
import cv2
from tensorflow.keras import layers, models, callbacks
from tensorflow.keras.preprocessing.image import img_to_array

## Importing dataset from Google Drive

In [None]:
from google.colab import drive

drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!ls /content/drive/Shareddrives/"CMSC 191"/"CMSC 191 GenAI Datasets"

'Mango Preprocessed'


In [None]:
train_ds = keras.utils.image_dataset_from_directory(
    directory="/content/drive/Shareddrives/CMSC 191/CMSC 191 GenAI Datasets",
    labels='inferred',
    label_mode='categorical',
    batch_size=64,
    image_size=(30, 40))

Found 1620 files belonging to 1 classes.


In [None]:
# prompt: resize the train_ds to 28x28 images

def resize_images(image, label):
  image = tf.image.resize(image, [30, 40])
  return image, label

resized_train_ds = train_ds.map(resize_images)

In [None]:
# (x_train, _), (_, _) = tfds.as_numpy(resized_train_ds)
x_train = []
y_train = []
for images, labels in resized_train_ds:
    x_train.extend(images.numpy())
    y_train.extend(labels.numpy())

x_train = np.array(x_train)
y_train = np.array(y_train)

x_train = x_train.astype('float32') / 255.0  # Normalize to [0, 1]
x_train = (x_train - 0.5) / 0.5  # Scale to [-1, 1] for GAN

# Parameters
latent_dim = 100
num_epochs = 100
batch_size = 64
sample_interval = 1000
output_dir = 'gan_output'

# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)
# x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:])))

# Generator model
def build_generator():
    model = models.Sequential()
    model.add(layers.Input(shape=(latent_dim,)))
    model.add(layers.Dense(256))
    model.add(layers.LeakyReLU(negative_slope=0.2))
    model.add(layers.BatchNormalization(momentum=0.8))
    model.add(layers.Dense(512))
    model.add(layers.LeakyReLU(negative_slope=0.2))
    model.add(layers.BatchNormalization(momentum=0.8))
    model.add(layers.Dense(1024))
    model.add(layers.LeakyReLU(negative_slope=0.2))
    model.add(layers.BatchNormalization(momentum=0.8))
    model.add(layers.Dense(30 * 40 * 3, activation='tanh'))
    model.add(layers.Reshape((30, 40, 3)))
    return model

# Discriminator model
def build_discriminator():
    model = models.Sequential()
    model.add(layers.Input(shape=(30, 40, 3)))
    model.add(layers.Flatten())
    model.add(layers.Dense(512))
    model.add(layers.LeakyReLU(negative_slope=0.2))
    model.add(layers.Dense(256))
    model.add(layers.LeakyReLU(negative_slope=0.2))
    model.add(layers.Dense(1, activation='sigmoid'))
    return model

# Building the Model Components and Training
We build the model components discriminator and generator. We then train the model given the defined parameters.

In [None]:
# Start time
start_time = time.time()

batch_size = 64
num_epochs = 512

# Compile models
discriminator = build_discriminator()
discriminator.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Create the combined model
generator = build_generator()
z = layers.Input(shape=(latent_dim,))
img = generator(z)
discriminator.trainable = False
validity = discriminator(img)

combined = models.Model(z, validity)
combined.compile(loss='binary_crossentropy', optimizer='adam')

# Training
def sample_images(generator, epoch, rows=5, cols=5):
    noise = np.random.normal(0, 1, (rows * cols, latent_dim))
    generated_imgs = generator.predict(noise)
    generated_imgs = 0.5 * generated_imgs + 0.5  # Rescale to [0, 1]

    plt.figure(figsize=(10, 10))
    for i in range(rows * cols):
        plt.subplot(rows, cols, i + 1)
        plt.imshow(generated_imgs[i])
        plt.axis('off')
    plt.savefig(f"{output_dir}/gan_generated_epoch_{epoch}.png")
    plt.close()

# Optionally set up TensorBoard logging
tensorboard_callback = callbacks.TensorBoard(log_dir='logs', histogram_freq=1)

# Training
for epoch in range(num_epochs):
    # Train Discriminator
    idx = np.random.randint(0, x_train.shape[0], batch_size)
    real_imgs = x_train[idx]

    noise = np.random.normal(0, 1, (batch_size, latent_dim))
    fake_imgs = generator.predict(noise)

    d_loss_real = discriminator.train_on_batch(real_imgs, np.ones((batch_size, 1)))
    d_loss_fake = discriminator.train_on_batch(fake_imgs, np.zeros((batch_size, 1)))

    print(f"d_loss_real: {d_loss_real}, d_loss_fake: {d_loss_fake}")

    d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

    # Train Generator
    noise = np.random.normal(0, 1, (batch_size, latent_dim))
    g_loss = combined.train_on_batch(noise, np.ones((batch_size, 1)))

    # Check g_loss structure
    print(f"g_loss: {g_loss}")

    # Ensure g_loss is scalar
    g_loss_value = g_loss if isinstance(g_loss, float) else g_loss[0]  # Adjust according to actual structure

    # Print progress
    if epoch % sample_interval == 0:
        print(f"Epoch {epoch} | D loss: {d_loss[0]:.4f}, D accuracy: {100 * d_loss[1]:.2f}% | G loss: {g_loss_value:.4f}")
        sample_images(generator, epoch)

        # Log to TensorBoard
        with tf.summary.create_file_writer('logs').as_default():
            tf.summary.scalar('D loss', d_loss[0], step=epoch)
            tf.summary.scalar('D accuracy', d_loss[1], step=epoch)
            tf.summary.scalar('G loss', g_loss_value, step=epoch)


# Save models
generator.save(f"{output_dir}/generator_model.h5")
discriminator.save(f"{output_dir}/discriminator_model.h5")

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step 




d_loss_real: [array(0.38887602, dtype=float32), array(1., dtype=float32)], d_loss_fake: [array(0.5477079, dtype=float32), array(0.7109375, dtype=float32)]
g_loss: [array(0.5477079, dtype=float32), array(0.5477079, dtype=float32), array(0.7109375, dtype=float32)]
Epoch 0 | D loss: 0.4683, D accuracy: 85.55% | G loss: 0.5477
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 204ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step




d_loss_real: [array(0.49468565, dtype=float32), array(0.8072917, dtype=float32)], d_loss_fake: [array(0.576751, dtype=float32), array(0.625, dtype=float32)]




g_loss: [array(0.576751, dtype=float32), array(0.576751, dtype=float32), array(0.625, dtype=float32)]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
d_loss_real: [array(0.53878516, dtype=float32), array(0.7, dtype=float32)], d_loss_fake: [array(0.61037797, dtype=float32), array(0.5833333, dtype=float32)]
g_loss: [array(0.61037797, dtype=float32), array(0.61037797, dtype=float32), array(0.5833333, dtype=float32)]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
d_loss_real: [array(0.58121777, dtype=float32), array(0.64285713, dtype=float32)], d_loss_fake: [array(0.66308737, dtype=float32), array(0.5625, dtype=float32)]
g_loss: [array(0.66308737, dtype=float32), array(0.66308737, dtype=float32), array(0.5625, dtype=float32)]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
d_loss_real: [array(0.63277197, dtype=float32), array(0.6111111, dtype=float32)], d_loss_fake: [array(0.7402639, dtype=float32), array(0.55, dtype=

# Image generation
We generate sample images using the model.

In [None]:
def display_original_and_generated_images(generator, train_ds, num_images):
    # Get a batch of original images
    for original_images, _ in train_ds.take(1):  # Take one batch
        break

    # Generate images from the GAN
    noise = np.random.normal(0, 1, (num_images, latent_dim))  # Generate random noise
    generated_imgs = generator.predict(noise)
    generated_imgs = 0.5 * generated_imgs + 0.5  # Rescale to [0, 1]

    # Placeholder to store MSE values
    mse_values = []

    plt.figure(figsize=(20, 8))

    # # Display original images
    # for i in range(num_images):
    #     plt.subplot(2, num_images, i + 1)
    #     plt.imshow(original_images[i].numpy().astype("uint8"))  # Retain original colors
    #     plt.axis('off')
    #     plt.title("Original")

    # # Display generated images
    # for i in range(num_images):
    #     plt.subplot(2, num_images, num_images + i + 1)
    #     plt.imshow(generated_imgs[i])  # Display generated images in color
    #     plt.axis('off')
    #     plt.title("Generated")

    for i in range(num_images):
      # Original image
      original = original_images[i].numpy().reshape(30, 40, 3)  # Assuming x_test is the dataset
      # Reconstructed image
      reconstructed = generated_imgs[i].reshape(30, 40, 3)  # Assuming encoded_imgs are the reconstructions
      # Convert to grayscale
      original = cv2.cvtColor(original, cv2.COLOR_BGR2GRAY)
      reconstructed = cv2.cvtColor(reconstructed, cv2.COLOR_BGR2GRAY)
      # Compute MSE
      mse = np.mean((original - reconstructed) ** 2)
      mse_values.append(mse)

      # Display Original Image
      ax = plt.subplot(3, num_images, i + 1)
      plt.imshow(original)
      plt.title(f"\nOriginal {i+1}")
      ax.get_xaxis().set_visible(False)
      ax.get_yaxis().set_visible(False)

      # Display Reconstructed Image
      ax = plt.subplot(3, num_images, i + 1 + num_images)
      plt.imshow(reconstructed)
      plt.title(f"\nReconstructed {i+1}")
      ax.get_xaxis().set_visible(False)
      ax.get_yaxis().set_visible(False)

      # Display MSE Value
      ax = plt.subplot(3, num_images, i + 1 + 2 * num_images)
      plt.text(0.5, 0.5, f"MSE: {mse:.4f}", fontsize=12, ha='center', va='center')
      plt.axis('off')


    # Adjust layout and show the plot
    plt.tight_layout()
    plt.show()

    # Print MSE values for reference
    for i, mse in enumerate(mse_values, 1):
        print(f"Image {i}: MSE = {mse:.4f}")

    # Compute the average MSE across all images
    average_mse = np.mean(mse_values)
    print(f"\nAverage MSE: {average_mse:.4f}")



# End timer
print(f"batch_size = {batch_size}")
print(f"epochs = {num_epochs}")
end_time = time.time()
elapsed_time = end_time - start_time
print(f"time = {elapsed_time:.2f} seconds")

In [None]:
# Get a batch of original images
for original_images, _ in train_ds.take(1):  # Take one batch
    break

display_original_and_generated_images(generator, train_ds, num_images=6)