<a href="https://colab.research.google.com/github/Rtniewi/kcwiertniewicz-IDS/blob/main/Assignment3_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

//***Katrina Cwiertniewicz
//*** CSC 330
//10/--/2024
//Assignment 3: VAE on the SVHN Dataset
####The purpose of this assignment is to create and train a Variational Autoencoder model in Keras to learn representations of the Street View House Numbers dataset and explore its performance with different latent dimensions.

In [30]:
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow.keras import (
    layers,
    models,
    datasets,
    callbacks,
    losses,
    optimizers,
    metrics,
)

from scipy.stats import norm

## 0. Parameters <a name="parameters"></a>

In [31]:
IMAGE_SIZE = 32
BATCH_SIZE = 128
VALIDATION_SPLIT = 0.2
EPOCHS = 50
BETA = 500

# Task 1: Load and Preprocess the SVHN Dataset

In [32]:
!wget http://ufldl.stanford.edu/housenumbers/train_32x32.mat
!wget http://ufldl.stanford.edu/housenumbers/test_32x32.mat

--2024-10-09 15:10:18--  http://ufldl.stanford.edu/housenumbers/train_32x32.mat
Resolving ufldl.stanford.edu (ufldl.stanford.edu)... 171.64.68.10
Connecting to ufldl.stanford.edu (ufldl.stanford.edu)|171.64.68.10|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 182040794 (174M) [text/plain]
Saving to: ‘train_32x32.mat.1’


2024-10-09 15:11:36 (2.22 MB/s) - ‘train_32x32.mat.1’ saved [182040794/182040794]

--2024-10-09 15:11:36--  http://ufldl.stanford.edu/housenumbers/test_32x32.mat
Resolving ufldl.stanford.edu (ufldl.stanford.edu)... 171.64.68.10
Connecting to ufldl.stanford.edu (ufldl.stanford.edu)|171.64.68.10|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 64275384 (61M) [text/plain]
Saving to: ‘test_32x32.mat.1’


2024-10-09 15:12:02 (2.46 MB/s) - ‘test_32x32.mat.1’ saved [64275384/64275384]



In [33]:
from scipy.io import loadmat

# Load data
train_data = loadmat('train_32x32.mat')
test_data = loadmat('test_32x32.mat')

# Preprocess the data
x_train = np.transpose(train_data['X'], (3, 0, 1, 2)).astype('float32') / 255.0
y_train = train_data['y'].flatten()
x_test = np.transpose(test_data['X'], (3, 0, 1, 2)).astype('float32') / 255.0
y_test = test_data['y'].flatten()

# Display the shape of the datasets
print(f'Training data shape: {x_train.shape}')
print(f'Test data shape: {x_test.shape}')



Training data shape: (73257, 32, 32, 3)
Test data shape: (26032, 32, 32, 3)


# Task 2: Build a Variational Autoencoder

In [34]:
class Sampling(layers.Layer):
    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = K.random_normal(shape=(batch, dim))
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon

In [35]:
def build_vae(latent_dim):
  # Encoder
  encoder_input = layers.Input(
      shape=(IMAGE_SIZE, IMAGE_SIZE, 3), name="encoder_input"
  )
  x = layers.Conv2D(32, (3, 3), strides=2, activation="relu", padding="same")(
      encoder_input
  )
  x = layers.Conv2D(64, (3, 3), strides=2, activation="relu", padding="same")(x)
  x = layers.Conv2D(128, (3, 3), strides=2, activation="relu", padding="same")(x)
  shape_before_flattening = K.int_shape(x)[1:]  # the decoder will need this!

  x = layers.Flatten()(x)
  z_mean = layers.Dense(latent_dim, name="z_mean")(x)
  z_log_var = layers.Dense(latent_dim, name="z_log_var")(x)
  z = Sampling()([z_mean, z_log_var])

  encoder = models.Model(encoder_input, [z_mean, z_log_var, z], name="encoder")
  encoder.summary()

  # Decoder

  decoder_input = layers.Input(shape=(latent_dim,), name="decoder_input")
  x = layers.Dense(np.prod(shape_before_flattening))(decoder_input)
  x = layers.Reshape(shape_before_flattening)(x)
  x = layers.Conv2DTranspose(
      128, (3, 3), strides=2, activation="relu", padding="same"
  )(x)
  x = layers.Conv2DTranspose(
      64, (3, 3), strides=2, activation="relu", padding="same"
  )(x)
  x = layers.Conv2DTranspose(
      32, (3, 3), strides=2, activation="relu", padding="same"
  )(x)
  decoder_output = layers.Conv2D(
      3,
      (3, 3),
      strides=1,
      activation="sigmoid",
      padding="same",
      name="decoder_output",
  )(x)

  decoder = models.Model(decoder_input, decoder_output)
  decoder.summary()

  # Create a variational autoencoder
  vae = VAE(encoder, decoder)
  return vae

In [36]:
class VAE(models.Model):
    def __init__(self, encoder, decoder, **kwargs):
        super(VAE, self).__init__(**kwargs)
        self.encoder = encoder
        self.decoder = decoder
        self.total_loss_tracker = metrics.Mean(name="total_loss")
        self.reconstruction_loss_tracker = metrics.Mean(
            name="reconstruction_loss"
        )
        self.kl_loss_tracker = metrics.Mean(name="kl_loss")

    @property
    def metrics(self):
        return [
            self.total_loss_tracker,
            self.reconstruction_loss_tracker,
            self.kl_loss_tracker,
        ]

    def call(self, inputs):
        """Call the model on a particular input."""
        z_mean, z_log_var, z = self.encoder(inputs)
        reconstruction = self.decoder(z)
        return z_mean, z_log_var, reconstruction

    def train_step(self, data):
        """Step run during training."""
        with tf.GradientTape() as tape:
            z_mean, z_log_var, reconstruction = self(data)
            reconstruction_loss = tf.reduce_mean(
                BETA
                * losses.binary_crossentropy(
                    data, reconstruction, axis=(1, 2, 3)
                )
            )
            kl_loss = tf.reduce_mean(
                tf.reduce_sum(
                    -0.5
                    * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var)),
                    axis=1,
                )
            )
            total_loss = reconstruction_loss + kl_loss

        grads = tape.gradient(total_loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))

        self.total_loss_tracker.update_state(total_loss)
        self.reconstruction_loss_tracker.update_state(reconstruction_loss)
        self.kl_loss_tracker.update_state(kl_loss)

        return {m.name: m.result() for m in self.metrics}

    def test_step(self, data):
        """Step run during validation."""
        if isinstance(data, tuple):
            data = data[0]

        z_mean, z_log_var, reconstruction = self(data)
        reconstruction_loss = tf.reduce_mean(
            BETA
            * losses.binary_crossentropy(data, reconstruction, axis=(1, 2, 3))
        )
        kl_loss = tf.reduce_mean(
            tf.reduce_sum(
                -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var)),
                axis=1,
            )
        )
        total_loss = reconstruction_loss + kl_loss

        return {
            "loss": total_loss,
            "reconstruction_loss": reconstruction_loss,
            "kl_loss": kl_loss,
        }


# 3. Train the VAE with different Latent Dimensions

In [None]:
def main():
  latent_dims = [2,5,10]
  global vae
  for dim in latent_dims:
      print(f'Training VAE with latent dimension: {dim}')
      vae = build_vae(latent_dim=dim)
      vae.compile(optimizer='adam', loss=losses.binary_crossentropy)
      vae.fit(x_train, epochs=EPOCHS, batch_size=BATCH_SIZE, validation_data=(x_test, x_test))

main()

Training VAE with latent dimension: 2


Epoch 1/50
[1m573/573[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m314s[0m 541ms/step - kl_loss: 1.6688 - reconstruction_loss: 330.0923 - total_loss: 331.7611 - val_kl_loss: 3.0453 - val_loss: 316.5638 - val_reconstruction_loss: 313.5186
Epoch 2/50
[1m573/573[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m311s[0m 543ms/step - kl_loss: 2.7747 - reconstruction_loss: 318.5342 - total_loss: 321.3089 - val_kl_loss: 3.0398 - val_loss: 316.0790 - val_reconstruction_loss: 313.0392
Epoch 3/50
[1m573/573[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m317s[0m 553ms/step - kl_loss: 2.8063 - reconstruction_loss: 318.2515 - total_loss: 321.0578 - val_kl_loss: 3.0230 - val_loss: 316.3757 - val_reconstruction_loss: 313.3527
Epoch 4/50
[1m573/573[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m320s[0m 558ms/step - kl_loss: 2.8306 - reconstruction_loss: 318.1977 - total_loss: 321.0284 - val_kl_loss: 2.9726 - val_loss: 316.4064 - val_reconstruction_loss: 313.4338
Epoch 5/50
[1m573/573[0m [32m

Epoch 1/50
[1m573/573[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m319s[0m 544ms/step - kl_loss: 1.8279 - reconstruction_loss: 330.5837 - total_loss: 332.4117 - val_kl_loss: 4.1474 - val_loss: 315.3253 - val_reconstruction_loss: 311.1779
Epoch 2/50
[1m573/573[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m313s[0m 528ms/step - kl_loss: 4.0964 - reconstruction_loss: 315.4297 - total_loss: 319.5260 - val_kl_loss: 4.3906 - val_loss: 314.9046 - val_reconstruction_loss: 310.5140
Epoch 3/50
[1m573/573[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m311s[0m 509ms/step - kl_loss: 4.2284 - reconstruction_loss: 315.0374 - total_loss: 319.2657 - val_kl_loss: 4.5535 - val_loss: 314.8383 - val_reconstruction_loss: 310.2848
Epoch 4/50
[1m573/573[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m334s[0m 530ms/step - kl_loss: 4.4138 - reconstruction_loss: 314.7990 - total_loss: 319.2127 - val_kl_loss: 4.7003 - val_loss: 313.8398 - val_reconstruction_loss: 309.1394
Epoch 5/50
[1m573/573[0m [32m

Task 4: Visualize the Results

# Reconstruct using the variational autoencoder

In [None]:
n_to_predict = 5000
example_images = x_test[:n_to_predict]
example_labels = y_test[:n_to_predict]

In [None]:
# Create autoencoder predictions and display
z_mean, z_log_var, reconstructions = vae.predict(example_images)
print("Examples of real house numbers")
display(example_images)
print("Reconstructions")
display(reconstructions)

# Embed using the encoder

In [None]:
# Encode the example images
z_mean, z_var, z = vae.encoder.predict(example_images)

In [None]:
# Some examples of the embeddings
print(z[:10])

In [None]:
# Show the encoded points in 2D space
figsize = 8

plt.figure(figsize=(figsize, figsize))
plt.scatter(z[:, 0], z[:, 1], c="black", alpha=0.5, s=3)
plt.show()

# Generate using the decoder

In [None]:
# Sample some points in the latent space, from the standard normal distribution
grid_width, grid_height = (6, 3)
z_sample = np.random.normal(size=(grid_width * grid_height, 10))

In [None]:
# Decode the sampled points
reconstructions = vae.decoder.predict(z_sample)

In [None]:
# Convert original embeddings and sampled embeddings to p-values
p = norm.cdf(z)
p_sample = norm.cdf(z_sample)

In [None]:
sample = norm.cdf(z_sample)
# Draw a plot of...
figsize = 8
plt.figure(figsize=(figsize, figsize))

# ... the original embeddings ...
plt.scatter(z[:, 0], z[:, 1], c="black", alpha=0.5, s=2)

# ... and the newly generated points in the latent space
plt.scatter(z_sample[:, 0], z_sample[:, 1], c="#00B0F0", alpha=1, s=40)
plt.show()

# Add underneath a grid of the decoded images
fig = plt.figure(figsize=(figsize, grid_height * 2))
fig.subplots_adjust(hspace=0.4, wspace=0.4)

for i in range(grid_width * grid_height):
    ax = fig.add_subplot(grid_height, grid_width, i + 1)
    ax.axis("off")
    ax.text(
        0.5,
        -0.35,
        str(np.round(z_sample[i, :], 1)),
        fontsize=10,
        ha="center",
        transform=ax.transAxes,
    )
    ax.imshow(reconstructions[i, :, :], cmap="Greys")

# Explore the latent space

In [None]:
# Colour the embeddings by their label (clothing type - see table)
figsize = 8
fig = plt.figure(figsize=(figsize * 2, figsize))
ax = fig.add_subplot(1, 2, 1)
plot_1 = ax.scatter(
    z[:, 0], z[:, 1], cmap="rainbow", c=example_labels, alpha=0.8, s=3
)
plt.colorbar(plot_1)
ax = fig.add_subplot(1, 2, 2)
plot_2 = ax.scatter(
    p[:, 0], p[:, 1], cmap="rainbow", c=example_labels, alpha=0.8, s=3
)
plt.show()

In [None]:
# Colour the embeddings by their label (clothing type - see table)
figsize = 12
grid_size = 15
plt.figure(figsize=(figsize, figsize))
plt.scatter(
    p[:, 0], p[:, 1], cmap="rainbow", c=example_labels, alpha=0.8, s=300
)
plt.colorbar()

x = norm.ppf(np.linspace(0, 1, grid_size))
y = norm.ppf(np.linspace(1, 0, grid_size))
xv, yv = np.meshgrid(x, y)
xv = xv.flatten()
yv = yv.flatten()
grid = np.array(list(zip(xv, yv)))

reconstructions = decoder.predict(grid)
# plt.scatter(grid[:, 0], grid[:, 1], c="black", alpha=1, s=10)
plt.show()

fig = plt.figure(figsize=(figsize, figsize))
fig.subplots_adjust(hspace=0.4, wspace=0.4)
for i in range(grid_size**2):
    ax = fig.add_subplot(grid_size, grid_size, i + 1)
    ax.axis("off")
    ax.imshow(reconstructions[i, :, :], cmap="Greys")