In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf

from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, losses, Input, backend
from tensorflow.keras.datasets import fashion_mnist
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

## Using VAEs to Generate Faces
**What's different from the digits example:**
1. The faces data has 3 input channels (RGB) instead of just 1. This means that the final convolutional transpose layer of the decoder needs to have 3 channels. Started in colour, ends in colour.

2. There will be 200 dimensions in the latent space instead of just 2. More dimensions means more features/complexity/detail will be encoded.

3. Batch normalization after each convolution layer to speed up training. Dropout is also used to prevent overfitting.

4. The reconstruction loss factor is increased to 10000. 10000 was found to generate good results.  

5. We use a generator to feed images to the VAE from a folder, rather than loading all the images into memory first.

# Data

In [2]:
from keras.preprocessing.image import ImageDataGenerator
from PIL import Image as PImage
from os import listdir

In [3]:
DATA_FOLDER = 'img_align_celeba_dir'
NUM_IMAGES = 202599
INPUT_SHAPE = (64, 64, 3)
BATCH_SIZE = 32
LATENT_DIM = 200

In [4]:
data_gen = ImageDataGenerator(rescale=1./255)

data_flow = data_gen.flow_from_directory(DATA_FOLDER,
                                         target_size = INPUT_SHAPE[:2],
                                         batch_size = BATCH_SIZE,
                                         shuffle = True,
                                         class_mode = 'input',
                                         interpolation = 'bilinear')

Found 202599 images belonging to 1 classes.


# Architecture

In [100]:
from numpy.core.fromnumeric import nonzero
class Autoencoder(Model):
  def __init__(self, encoder_input_shape, latent_dim):
    super(Autoencoder, self).__init__()
    self.encoder_input_shape = encoder_input_shape
    self.latent_dim = latent_dim
    self.shape_before_flattening = (4, 4, 64)
    self.dense1 = layers.Dense(self.latent_dim, name = 'mu')
    self.dense2 = layers.Dense(self.latent_dim, name = 'log_var')
    self.dense3 = layers.Dense(np.prod(self.shape_before_flattening))

    ### The encoder
    self.encoder = tf.keras.Sequential([
      layers.Input(shape=self.encoder_input_shape, name='encoder_input'),
      layers.Conv2D(filters = 32, kernel_size = (3, 3), strides = 2, padding = 'same'),
      layers.BatchNormalization(),
      layers.LeakyReLU(),
      layers.Dropout(rate = 0.25),
      layers.Conv2D(filters = 64, kernel_size = (3, 3), strides = 2, padding = 'same'),
      layers.BatchNormalization(),
      layers.LeakyReLU(),
      layers.Dropout(rate = 0.25),
      layers.Conv2D(filters = 64, kernel_size = (3, 3), strides = 2, padding = 'same'),
      layers.BatchNormalization(),
      layers.LeakyReLU(),
      layers.Dropout(rate = 0.25),
      layers.Conv2D(filters = 64, kernel_size = (3, 3), strides = 2, padding = 'same'),
      layers.BatchNormalization(),
      layers.LeakyReLU(),
      layers.Dropout(rate = 0.25)
    ])

    ### The decoder
    self.decoder = tf.keras.Sequential([
      layers.Conv2DTranspose(filters = 64, kernel_size = (3, 3), strides=2, padding = 'same'),
      layers.BatchNormalization(),
      layers.LeakyReLU(),
      layers.Dropout(rate = 0.25),
      layers.Conv2DTranspose(filters = 64, kernel_size = (3, 3), strides=2, padding = 'same'),
      layers.BatchNormalization(),
      layers.LeakyReLU(),
      layers.Dropout(rate = 0.25),
      layers.Conv2DTranspose(filters = 32, kernel_size = (3, 3), strides=2, padding = 'same'),
      layers.BatchNormalization(),
      layers.LeakyReLU(),
      layers.Dropout(rate = 0.25),
      layers.Conv2DTranspose(filters = 3,  kernel_size = (3, 3), strides=2, padding = 'same', activation = 'sigmoid')
    ])

  def call(self, x):
    encoded = self.encoder(x)
    shape_before_flattening = backend.int_shape(encoded)[1:]
    encoded = layers.Flatten()(encoded)
    mu = self.dense1(encoded)
    log_var = self.dense2(encoded)
    
    def sampling(args):
      # Samples a random point from the normal distribution (for a specific mu and log_var)
      func_mu, func_log_var = args
      epsilon = backend.random_normal(shape = backend.shape(func_mu), mean = 0, stddev = 1.) # random point in normal distribution
      return mu + backend.exp(func_log_var / 2) * epsilon

    encoder_output = layers.Lambda(sampling, name = 'encoder_output')([mu, log_var]) # samples a point from each distribution

    # Update shape_before_flattening before using it
    self.shape_before_flattening = shape_before_flattening
    
    # identical decoder to a plain autoencoder
    decoded = self.dense3(encoder_output)
    decoded = layers.Reshape(self.shape_before_flattening)(decoded)
    
    # sequential decoder 
    decoder_output = self.decoder(decoded)
    return decoder_output

In [101]:
# Build the model
VAE = Autoencoder(INPUT_SHAPE, LATENT_DIM)

# Training

In [102]:
# compilation
recon_loss_factor = 10000
def vae_loss(y_true, y_pred):
  RMSE_recon_loss = tf.math.reduce_mean(backend.square(y_true - y_pred))
  kl_loss = tf.keras.losses.kullback_leibler_divergence(y_true, y_pred)
  return (recon_loss_factor * RMSE_recon_loss) + kl_loss

In [103]:
optimizer = Adam(learning_rate = 0.0005)
VAE.compile(optimizer = optimizer, loss = vae_loss)

In [None]:
for epoch in range(len(data_flow)):
  epoch_loss = []
  print(f"Epoch: {epoch}")
  batch_no = 0
  for batch in data_flow:
    batch_no += 1
    print(f'On batch {batch_no} / {len(data_flow)}')
    images = batch  # If input and output are the same, images serve as both input and target data
    with tf.GradientTape() as tape:
      reconstructed_images = VAE(images[0])
      loss = vae_loss(images[0], reconstructed_images)
      batch_loss = tf.reduce_mean(loss)
      epoch_loss.append(batch_loss)
    gradients = tape.gradient(batch_loss, VAE.trainable_variables)
    optimizer.apply_gradients(zip(gradients, VAE.trainable_variables))
  print(f'epoch_loss = {np.mean(epoch_loss)}')

# Generating New Faces

In [109]:
n_to_show = 30

new_output_dim = np.random.normal(size = (n_to_show, VAE.latent_dim))

reconst = VAE.decoder.predict(np.array(new_output_dim))

fig = plt.figure(figsize = (18, 5))
fig.subplots_adjust(hspace = 0.4, wspace = 0.4)
for i in range(n_to_show):
  ax = fig.add_subplot(3, 10, i + 1)
  ax.imshow(reconst[i, :, :, :])
  ax.axis('off')

plt.show()

ValueError: in user code:

    File "C:\Users\jorda\anaconda3\envs\GenerativeDL\lib\site-packages\keras\src\engine\training.py", line 2341, in predict_function  *
        return step_function(self, iterator)
    File "C:\Users\jorda\anaconda3\envs\GenerativeDL\lib\site-packages\keras\src\engine\training.py", line 2327, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\jorda\anaconda3\envs\GenerativeDL\lib\site-packages\keras\src\engine\training.py", line 2315, in run_step  **
        outputs = model.predict_step(data)
    File "C:\Users\jorda\anaconda3\envs\GenerativeDL\lib\site-packages\keras\src\engine\training.py", line 2283, in predict_step
        return self(x, training=False)
    File "C:\Users\jorda\anaconda3\envs\GenerativeDL\lib\site-packages\keras\src\utils\traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "C:\Users\jorda\anaconda3\envs\GenerativeDL\lib\site-packages\keras\src\engine\input_spec.py", line 235, in assert_input_compatibility
        raise ValueError(

    ValueError: Exception encountered when calling layer 'sequential_21' (type Sequential).
    
    Input 0 of layer "conv2d_transpose_40" is incompatible with the layer: expected ndim=4, found ndim=2. Full shape received: (None, 200)
    
    Call arguments received by layer 'sequential_21' (type Sequential):
      • inputs=tf.Tensor(shape=(None, 200), dtype=float32)
      • training=False
      • mask=None
