# Chapter 3: Variational Autoencoders

An _autoencoder_ is an artificial neural network with two parts:

- An _encoder_ network which finds a representation of high dimensional data in a lower dimensional, or _latent_, space.

- A _decoder_ network which reconstruct samples of the original data from elements of the latent space.

### Your First Autoencoder

Below is an example of an autoencoder which uses convolutional layers for the encoder network and _convolutional transpose layers_ for the decoder. The model uses MSE for the loss function.

In [0]:
from tensorflow.keras.layers import (Input, Conv2D, LeakyReLU, Flatten, Dense,
                                     Reshape, Conv2DTranspose, Activation)
from tensorflow.keras.models import Model
import tensorflow.keras.backend as K
import numpy as np
from tensorflow.keras.optimizers import Adam

class Autoencoder():
  """Implements an autoencoder in Keras with an API for using the model."""

  def __init__(self, input_dim, encoder_conv_filters, encoder_conv_kernel_size,
               encoder_conv_strides, z_dim, decoder_conv_filters,
               decoder_conv_kernel_size, decoder_conv_strides):
    encoder_input = Input(input_dim, name='encoder_input')
    x = encoder_input
    for i in range(len(encoder_conv_filters)):
      x = Conv2D(filters=encoder_conv_filters[i],
                 kernel_size=encoder_conv_kernel_size[i],
                 strides=encoder_conv_strides[i],
                 padding='same', name='encoder_conv_{}'.format(i))(x)
      x = LeakyReLU()(x)

    shape_before_flattening = K.int_shape(x)[1:]
    x = Flatten()(x)
    encoder_output = Dense(z_dim)(x)
    encoder = Model(encoder_input, encoder_output)

    decoder_input = Input(shape=(z_dim,), name='decoder_input')
    x = Dense(np.prod(shape_before_flattening))(decoder_input)
    x = Reshape(shape_before_flattening)(x)
    for i in range(len(decoder_conv_filters)):
      x = Conv2DTranspose(filters=decoder_conv_filters[i],
                          kernel_size=decoder_conv_kernel_size[i],
                          strides=decoder_conv_strides[i],
                          padding='same', name='decoder_conv_{}'.format(i))(x)
      if i < len(decoder_conv_filters) - 1:
        x = LeakyReLU()(x)
      else:
        x = Activation('sigmoid')(x)
    decoder_output = x
    decoder = Model(decoder_input, decoder_output)
    # Joining the models.
    self.model = Model(encoder_input, decoder(encoder_output))

    self.compiled = False

  def compile(self, learning_rate):
    """Compile the model."""
    if self.compiled:
      return
    opt = Adam(lr=learning_rate)
    mse = lambda y_act, y_pred: K.mean(K.square(y_act - y_pred), axis=(1, 2, 3))
    self.model.compile(opt, loss=mse)
    self.compiled = True

  def fit(self, X, y, batch_size, epochs):
    """Train the model."""
    self.model.fit(X, y, batch_size=batch_size, epochs=epochs, shuffle=True)

In [0]:
# Load the MNIST dataset.

from tensorflow.keras.datasets import mnist

(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [0]:
# Reshape these to be 4D tensors and scale the pixel values to [0, 1].

X_train = X_train.reshape(X_train.shape + (1,)) / 255.0
X_test = X_test.reshape(X_test.shape + (1,)) / 255.0

In [32]:
autoencoder = Autoencoder(input_dim=X_train.shape[1:],
                          encoder_conv_filters=(32, 64, 64, 64),
                          encoder_conv_kernel_size=(3, 3, 3, 3),
                          encoder_conv_strides=(1, 2, 2, 1),
                          z_dim=2,
                          decoder_conv_filters=(64, 64, 32, 1),
                          decoder_conv_kernel_size=(3, 3, 3, 3),
                          decoder_conv_strides=(1, 2, 2, 1))
autoencoder.model.summary()

Model: "model_11"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
encoder_input (InputLayer)   [(None, 28, 28, 1)]       0         
_________________________________________________________________
encoder_conv_0 (Conv2D)      (None, 28, 28, 32)        320       
_________________________________________________________________
leaky_re_lu_21 (LeakyReLU)   (None, 28, 28, 32)        0         
_________________________________________________________________
encoder_conv_1 (Conv2D)      (None, 14, 14, 64)        18496     
_________________________________________________________________
leaky_re_lu_22 (LeakyReLU)   (None, 14, 14, 64)        0         
_________________________________________________________________
encoder_conv_2 (Conv2D)      (None, 7, 7, 64)          36928     
_________________________________________________________________
leaky_re_lu_23 (LeakyReLU)   (None, 7, 7, 64)          0  

In [0]:
# TODO set up checkpoints in Drive and train for 200 epochs.

autoencoder.compile(learning_rate=0.0005)
autoencoder.fit(X_train, X_train, epochs=10, batch_size=32)

### Analysis of the Autoencoder

TODO