<a href="https://colab.research.google.com/github/Enkrumah14/mannyNkrumahGenAi/blob/main/Problem1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#author : Manny Nkrumah
#file : Problem1.ipynb
#assignment # : #2
#date : 10/11/24
#description : This code implements a Variational Autoencoder (VAE) using TensorFlow and Keras to process the SVHN (Street View House Numbers) dataset.

##############################################################################################################################
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import loadmat

# Load data
train_data = loadmat('train_32x32.mat')
test_data = loadmat('test_32x32.mat')

# Preprocess the data
x_train = np.transpose(train_data['X'], (3, 0, 1, 2)).astype('float32') / 255.0
x_test = np.transpose(test_data['X'], (3, 0, 1, 2)).astype('float32') / 255.0
y_train = train_data['y'].flatten()
y_test = test_data['y'].flatten()

print(f'Training data shape: {x_train.shape}')
print(f'Test data shape: {x_test.shape}')

# Sampling layer (Reparameterization trick)
class Sampling(layers.Layer):
    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon

# Build the encoder
def build_encoder(input_shape, latent_dim):
    inputs = layers.Input(shape=input_shape)
    x = layers.Conv2D(32, 3, activation='relu', strides=2, padding='same')(inputs)
    x = layers.Conv2D(64, 3, activation='relu', strides=2, padding='same')(x)
    x = layers.Flatten()(x)
    x = layers.Dense(16, activation='relu')(x)
    z_mean = layers.Dense(latent_dim, name='z_mean')(x)
    z_log_var = layers.Dense(latent_dim, name='z_log_var')(x)
    z = Sampling()([z_mean, z_log_var])
    return models.Model(inputs, [z_mean, z_log_var, z], name='encoder')

# Build the decoder
def build_decoder(latent_dim):
    latent_inputs = layers.Input(shape=(latent_dim,))
    x = layers.Dense(8*8*64, activation='relu')(latent_inputs)
    x = layers.Reshape((8, 8, 64))(x)
    x = layers.Conv2DTranspose(64, 3, activation='relu', strides=2, padding='same')(x)
    x = layers.Conv2DTranspose(32, 3, activation='relu', strides=2, padding='same')(x)
    outputs = layers.Conv2DTranspose(3, 3, activation='sigmoid', padding='same')(x)
    return models.Model(latent_inputs, outputs, name='decoder')

# Functions to create encoder and decoder
def create_encoder(latent_dim):
    return build_encoder((32, 32, 3), latent_dim)

def create_decoder(latent_dim):
    return build_decoder(latent_dim)

# Complete VAE model
class VAE(models.Model):
    def __init__(self, encoder, decoder, **kwargs):
        super(VAE, self).__init__(**kwargs)
        self.encoder = encoder
        self.decoder = decoder
        self.total_loss_tracker = tf.keras.metrics.Mean(name="total_loss")
        self.reconstruction_loss_tracker = tf.keras.metrics.Mean(name="reconstruction_loss")
        self.kl_loss_tracker = tf.keras.metrics.Mean(name="kl_loss")

    @property
    def metrics(self):
        return [
            self.total_loss_tracker,
            self.reconstruction_loss_tracker,
            self.kl_loss_tracker,
        ]

    def train_step(self, data):
        with tf.GradientTape() as tape:
            z_mean, z_log_var, z = self.encoder(data)
            reconstruction = self.decoder(z)
            reconstruction_loss = tf.reduce_mean(
                tf.reduce_sum(
                    tf.keras.losses.binary_crossentropy(data, reconstruction),
                    axis=(1, 2),
                )
            )
            kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
            kl_loss = tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1))
            total_loss = reconstruction_loss + kl_loss
        grads = tape.gradient(total_loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
        self.total_loss_tracker.update_state(total_loss)
        self.reconstruction_loss_tracker.update_state(reconstruction_loss)
        self.kl_loss_tracker.update_state(kl_loss)
        return {
            "loss": self.total_loss_tracker.result(),
            "reconstruction_loss": self.reconstruction_loss_tracker.result(),
            "kl_loss": self.kl_loss_tracker.result(),
        }

    def call(self, inputs):
        z_mean, z_log_var, z = self.encoder(inputs)
        reconstructed = self.decoder(z)
        return reconstructed

# Callbacks for model checkpoint and early stopping
checkpoint_callback = ModelCheckpoint('vae_model.keras', save_best_only=True, monitor='loss', mode='min', verbose=1)
early_stopping_callback = EarlyStopping(monitor='loss', patience=10, verbose=1)

# VAE model training code
latent_dims = [2, 5, 10]
for dim in latent_dims:
    print(f'Training VAE with latent dimension: {dim}')

    # Create encoder and decoder for the given latent dimension
    encoder = create_encoder(dim)
    decoder = create_decoder(dim)

    # Initialize and compile VAE without specifying loss
    vae = VAE(encoder, decoder)
    vae.compile(optimizer=tf.keras.optimizers.Adam(), loss=lambda y_true, y_pred: 0.0)  # Dummy loss

    # Train the VAE model
    history = vae.fit(
        x_train,  # Pass x_train as both input and target
        epochs=50,
        batch_size=128,
        validation_data=(x_test, x_test),  # Validation also uses the same input for both input and target
        callbacks=[checkpoint_callback, early_stopping_callback]
    )

# Function to plot reconstructions
def plot_reconstruction(model, data):
    _, _, z = model.encoder.predict(data)
    decoded_imgs = model.decoder.predict(z)
    return decoded_imgs

# Visualize original and reconstructed images
reconstructed = plot_reconstruction(vae, x_test[:10])
n = 10  # Number of images to display
plt.figure(figsize=(20, 4))
for i in range(n):
    # Display original images
    ax = plt.subplot(2, n, i + 1)
    plt.imshow(x_test[i])
    plt.title("Original")
    plt.gray()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)

    # Display reconstructed images
    ax = plt.subplot(2, n, i + 1 + n)
    plt.imshow(reconstructed[i])
    plt.title("Reconstructed")
    plt.gray()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
plt.show()


Training data shape: (73257, 32, 32, 3)
Test data shape: (26032, 32, 32, 3)
Training VAE with latent dimension: 2
Epoch 1/50
[1m573/573[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 257ms/step - kl_loss: 2.3668 - loss: 692.5465 - reconstruction_loss: 690.1796
Epoch 1: loss improved from inf to 680.63116, saving model to vae_model.keras
[1m573/573[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m152s[0m 261ms/step - kl_loss: 2.3696 - loss: 692.5258 - reconstruction_loss: 690.1561 - val_kl_loss: 0.0000e+00 - val_reconstruction_loss: 0.0000e+00 - val_total_loss: 0.0000e+00
Epoch 2/50



  return {key: serialize_keras_object(value) for key, value in obj.items()}


[1m573/573[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 262ms/step - kl_loss: 4.5048 - loss: 664.9999 - reconstruction_loss: 660.4954
Epoch 2: loss improved from 680.63116 to 663.33160, saving model to vae_model.keras
[1m573/573[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m203s[0m 263ms/step - kl_loss: 4.5042 - loss: 664.9970 - reconstruction_loss: 660.4930 - val_kl_loss: 0.0000e+00 - val_reconstruction_loss: 0.0000e+00 - val_total_loss: 0.0000e+00
Epoch 3/50
[1m573/573[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 258ms/step - kl_loss: 3.4327 - loss: 660.7715 - reconstruction_loss: 657.3389
Epoch 3: loss improved from 663.33160 to 660.01318, saving model to vae_model.keras
[1m573/573[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m148s[0m 259ms/step - kl_loss: 3.4324 - loss: 660.7702 - reconstruction_loss: 657.3378 - val_kl_loss: 0.0000e+00 - val_reconstruction_loss: 0.0000e+00 - val_total_loss: 0.0000e+00
Epoch 4/50
[1m573/573[0m [32m━━━━━━━━━━━━━━━━━━━━[0