# Ladder Variational Autoencoder (Ladder VAE) Implementation
This notebook demonstrates the implementation of a Ladder VAE using the CIFAR-10 dataset. It includes the following steps:

1. Data Preparation
2. Encoder Definition
3. Decoder Definition
4. Ladder VAE Model
5. Training
6. Evaluation and Visualization

In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, Model
from tensorflow.keras.datasets import cifar10
import matplotlib.pyplot as plt

## Step 1: Load and Preprocess CIFAR-10 Data

In [2]:
# Load CIFAR-10 dataset
(x_train, _), (x_test, _) = cifar10.load_data()

# Normalize images to the range [0, 1]
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

# Input dimensions
input_dim = x_train.shape[1:]  # (32, 32, 3)
print("Training data shape:", x_train.shape)

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
[1m170498071/170498071[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 0us/step
Training data shape: (50000, 32, 32, 3)


## Step 2: Define the Encoder

In [3]:
class Encoder(Model):
    def __init__(self, latent_dims):
        super(Encoder, self).__init__()
        self.conv_layers = [
            layers.Conv2D(32, (3, 3), activation='relu', padding='same'),
            layers.MaxPooling2D((2, 2)),
            layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
            layers.MaxPooling2D((2, 2)),
        ]
        self.flatten = layers.Flatten()
        self.dense_mu = [layers.Dense(dim) for dim in latent_dims]
        self.dense_log_var = [layers.Dense(dim) for dim in latent_dims]

    def call(self, x):
        for conv in self.conv_layers:
            x = conv(x)
        x = self.flatten(x)
        mu, log_var = [], []
        for dense_mu, dense_log_var in zip(self.dense_mu, self.dense_log_var):
            mu.append(dense_mu(x))
            log_var.append(dense_log_var(x))
        return mu, log_var

## Step 3: Define the Decoder

In [4]:
class Decoder(Model):
    def __init__(self, latent_dims, output_dim):
        super(Decoder, self).__init__()
        self.dense_layers = [
            layers.Dense(8 * 8 * 64, activation='relu'),
            layers.Reshape((8, 8, 64)),
        ]
        self.deconv_layers = [
            layers.Conv2DTranspose(64, (3, 3), activation='relu', padding='same'),
            layers.UpSampling2D((2, 2)),
            layers.Conv2DTranspose(32, (3, 3), activation='relu', padding='same'),
            layers.UpSampling2D((2, 2)),
            layers.Conv2DTranspose(output_dim[-1], (3, 3), activation='sigmoid', padding='same'),
        ]

    def call(self, z):
        x = z[0]  # Start with the top latent variable
        for dense in self.dense_layers:
            x = dense(x)
        for deconv in self.deconv_layers:
            x = deconv(x)
        return x

## Step 4: Define the Ladder VAE

In [7]:
class LadderVAE(Model):
    def __init__(self, encoder, decoder, latent_dims):
        super(LadderVAE, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.latent_dims = latent_dims

    def reparameterize(self, mu, log_var):
        eps = tf.random.normal(shape=tf.shape(mu))
        return mu + tf.exp(0.5 * log_var) * eps

    def call(self, x):
        mu, log_var = self.encoder(x)
        z = [self.reparameterize(m, lv) for m, lv in zip(mu, log_var)]
        reconstruction = self.decoder(z)
        return reconstruction, mu, log_var

    # def compute_loss(self, x):
    #     reconstruction, mu, log_var = self.call(x)
    #     recon_loss = tf.reduce_mean(tf.reduce_sum(tf.keras.losses.binary_crossentropy(x, reconstruction), axis=(1, 2, 3)))
    #     kl_loss = sum([tf.reduce_mean(-0.5 * tf.reduce_sum(1 + lv - tf.square(m) - tf.exp(lv), axis=1)) for m, lv in zip(mu, log_var)])
    #     return recon_loss + kl_loss

    def compute_loss(self, x):
        reconstruction, mu, log_var = self.call(x)
        # The change is in the line below: axis is changed to (1, 2)
        recon_loss = tf.reduce_mean(tf.reduce_sum(tf.keras.losses.binary_crossentropy(x, reconstruction), axis=(1, 2)))
        kl_loss = sum([tf.reduce_mean(-0.5 * tf.reduce_sum(1 + lv - tf.square(m) - tf.exp(lv), axis=1)) for m, lv in zip(mu, log_var)])
        return recon_loss + kl_loss

## Step 5: Train the Ladder VAE

In [8]:
latent_dims = [128, 64, 32]
encoder = Encoder(latent_dims)
decoder = Decoder(latent_dims, input_dim)
ladder_vae = LadderVAE(encoder, decoder, latent_dims)

optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

epochs = 20
batch_size = 64
train_dataset = tf.data.Dataset.from_tensor_slices(x_train).shuffle(10000).batch(batch_size)

for epoch in range(epochs):
    epoch_loss = 0
    for batch in train_dataset:
        with tf.GradientTape() as tape:
            loss = ladder_vae.compute_loss(batch)
        grads = tape.gradient(loss, ladder_vae.trainable_weights)
        optimizer.apply_gradients(zip(grads, ladder_vae.trainable_weights))
        epoch_loss += loss
    print(f"Epoch {epoch + 1}, Loss: {epoch_loss / len(train_dataset):.4f}")

Epoch 1, Loss: 651.5876
Epoch 2, Loss: 632.0425
Epoch 3, Loss: 629.5007
Epoch 4, Loss: 628.2859
Epoch 5, Loss: 627.7608
Epoch 6, Loss: 627.3884
Epoch 7, Loss: 627.1360
Epoch 8, Loss: 626.9698
Epoch 9, Loss: 626.7973
Epoch 10, Loss: 626.6245
Epoch 11, Loss: 626.4365
Epoch 12, Loss: 626.3504
Epoch 13, Loss: 626.2387
Epoch 14, Loss: 626.0740
Epoch 15, Loss: 626.0377
Epoch 16, Loss: 626.0247
Epoch 17, Loss: 625.9297
Epoch 18, Loss: 625.8511
Epoch 19, Loss: 625.8021
Epoch 20, Loss: 625.7709


## Step 6: Evaluate and Visualize

In [None]:
def plot_reconstructions(model, x):
    reconstructions, _, _ = model.call(x)
    fig, axes = plt.subplots(2, 10, figsize=(20, 4))
    for i in range(10):
        axes[0, i].imshow(x[i])
        axes[1, i].imshow(reconstructions[i].numpy())
    plt.show()

plot_reconstructions(ladder_vae, x_test[:10])