# VAE Image generation

Cassandra Maldonado

In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import os

In [2]:
tf.random.set_seed(42)
np.random.seed(42)

In [3]:
# Parameters
latent_dim = 2
input_shape = (200, 200, 3)

In [4]:
# Defining the encoder
encoder_inputs = keras.Input(shape=input_shape)
x = layers.Conv2D(32, 3, activation="relu", strides=2, padding="same")(encoder_inputs)
x = layers.Conv2D(64, 3, activation="relu", strides=2, padding="same")(x)
x = layers.Conv2D(128, 3, activation="relu", strides=2, padding="same")(x)
x = layers.Conv2D(256, 3, activation="relu", strides=2, padding="same")(x)
x = layers.Flatten()(x)
x = layers.Dense(16, activation="relu")(x)

z_mean = layers.Dense(latent_dim, name="z_mean")(x)
z_log_var = layers.Dense(latent_dim, name="z_log_var")(x)

In [5]:
# Sampling layer, this layer will sample from the learned latent space using the mean and log variance from the encoder.
class Sampling(layers.Layer):
    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon

z = Sampling()([z_mean, z_log_var])

In [6]:
# Building the encoder model, it will take the input image and output the mean and log variance of the latent space and the sampled latent vector.
encoder = keras.Model(encoder_inputs, [z_mean, z_log_var, z], name="encoder")
encoder.summary()

In [7]:
# Defining the decoder, which will take the sampled latent vector and reconstruct the image.
# The decoder will reverse the operations of the encoder and it will take the latent vector and upsample it back to the original image size.
# After 4 Conv2D layers with strides=2, the 200x200 image is reduced to 200/(2^4) = 12.5
# We'll use 13x13 as the starting point for the decoder
latent_inputs = keras.Input(shape=(latent_dim,))
x = layers.Dense(13 * 13 * 256, activation="relu")(latent_inputs)
x = layers.Reshape((13, 13, 256))(x)
x = layers.Conv2DTranspose(128, 3, activation="relu", strides=2, padding="same")(x)
x = layers.Conv2DTranspose(64, 3, activation="relu", strides=2, padding="same")(x)
x = layers.Conv2DTranspose(32, 3, activation="relu", strides=2, padding="same")(x)
# The final layer ensures we get exactly 200x200x3 output
decoder_outputs = layers.Conv2DTranspose(3, 3, activation="sigmoid", strides=2, padding="same", 
                                         output_padding=1)(x)


In [8]:
# Building the decoder model, it will take the latent vector and output the reconstructed image.
decoder = keras.Model(latent_inputs, decoder_outputs, name="decoder")
decoder.summary()

In [10]:
# Defining the VAE model
# Define the VAE model as a custom model with a custom loss function
class VAE(keras.Model):
    def __init__(self, encoder, decoder, **kwargs):
        super(VAE, self).__init__(**kwargs)
        self.encoder = encoder
        self.decoder = decoder
        self.total_loss_tracker = keras.metrics.Mean(name="total_loss")
        self.reconstruction_loss_tracker = keras.metrics.Mean(name="reconstruction_loss")
        self.kl_loss_tracker = keras.metrics.Mean(name="kl_loss")

    @property
    def metrics(self):
        return [
            self.total_loss_tracker,
            self.reconstruction_loss_tracker,
            self.kl_loss_tracker,
        ]

    def train_step(self, data):
        with tf.GradientTape() as tape:
            # Get encoder outputs
            z_mean, z_log_var, z = self.encoder(data)
            
            # Decode the latent vector
            reconstruction = self.decoder(z)
            
            # Compute reconstruction loss (ensuring shapes match)
            reconstruction_loss = tf.reduce_mean(
                tf.reduce_sum(
                    keras.losses.mean_squared_error(
                        tf.keras.layers.Flatten()(data),
                        tf.keras.layers.Flatten()(reconstruction)
                    ), axis=1
                )
            )
            
            # Compute KL divergence loss
            kl_loss = -0.5 * tf.reduce_mean(
                1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var)
            )
            
            # Calculate total loss
            total_loss = reconstruction_loss + kl_loss
        
        # Compute gradients and update weights
        grads = tape.gradient(total_loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
        
        # Update metrics
        self.total_loss_tracker.update_state(total_loss)
        self.reconstruction_loss_tracker.update_state(reconstruction_loss)
        self.kl_loss_tracker.update_state(kl_loss)
        
        return {
            "loss": self.total_loss_tracker.result(),
            "reconstruction_loss": self.reconstruction_loss_tracker.result(),
            "kl_loss": self.kl_loss_tracker.result(),
        }

# VAE model
vae = VAE(encoder, decoder)
vae.compile(optimizer=keras.optimizers.Adam())

In [11]:
# For demonstration purposes, create synthetic images
def create_synthetic_image(color=(255, 0, 0)):
    """Create a synthetic test image with a gradient"""
    img = np.zeros((250, 250, 3), dtype=np.float32)
    for i in range(250):
        for j in range(250):
            img[i, j, 0] = color[0] * (i / 250.0)  # Red channel
            img[i, j, 1] = color[1] * (j / 250.0)  # Green channel
            img[i, j, 2] = color[2] * ((i+j) / 500.0)  # Blue channel
    return np.expand_dims(img / 255.0, 0)  # Normalize and add batch dimension

In [12]:
# Create two different synthetic images
pic_1 = create_synthetic_image(color=(255, 100, 50))  # Reddish gradient
pic_2 = create_synthetic_image(color=(50, 100, 255))  # Bluish gradient

In [13]:
# Crop to 200x200 as specified
pic_1 = pic_1[:, -201:-1, 0:200, :]
pic_2 = pic_2[:, -201:-1, 0:200, :]

In [14]:
# Function to show images
def show_plot(images):
    plt.figure(figsize=(10, 5))
    for i in range(min(images.shape[0], 5)):  # Show up to 5 images
        plt.subplot(1, min(images.shape[0], 5), i + 1)
        img = np.clip(images[i], 0, 1)  # Ensure values are in [0,1]
        plt.imshow(img)
        plt.axis("off")
    plt.tight_layout()
    plt.show()

In [15]:
# 1. Train VAE on pic_1
print("Training VAE on pic_1...")
vae.fit(pic_1, epochs=50, batch_size=1)

Training VAE on pic_1...
Epoch 1/50


AttributeError: module 'keras._tf_keras.keras.losses' has no attribute 'mean_squared_error'

In [None]:
# 2. Sample the latent space
print("\nExplaining the Sampling function:")
print("""
The Sampling function is a crucial component of VAEs. It implements the 
"reparameterization trick" which allows backpropagation through randomness.

Given z_mean and z_log_var from the encoder:
1. First, it gets the batch size and latent dimension
2. Then it generates random noise (epsilon) from a normal distribution
3. Finally, it applies the formula: z = z_mean + exp(0.5 * z_log_var) * epsilon

This creates a sample from the learned distribution in the latent space
while keeping the operation differentiable for training.
""")