<a href="https://colab.research.google.com/github/AanchalA/WeekendProjects/blob/main/MNIST_with_VariationalAutoEncoders.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

AutoEncoders to Variational AutoEncoders

---
- Modify Encoder graph: Replace bottleneck with gaussian distribution

- Update Loss to = α (Reconstruction loss weight) * RMSE (Reconstruction loss) + KL Diverdence



In [None]:
import os
import time
import pickle
import numpy as np

import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.layers import (
    Input, Conv2D, ReLU, BatchNormalization, Flatten,
    Dense, Reshape, Conv2DTranspose, Activation, Lambda)

tf.compat.v1.disable_eager_execution()

In [None]:
def calculate_reconstruction_loss(y_target, y_predicted):
    error = y_target - y_predicted
    reconstruction_loss = K.mean(K.square(error), axis=[1, 2, 3])
    return reconstruction_loss


def calculate_kl_loss(model):
    # wrap `_calculate_kl_loss` such that it takes the model as an argument,
    # returns a function which can take arbitrary number of arguments
    # (for compatibility with `metrics` and utility in the loss function)
    # and returns the kl loss
    def _calculate_kl_loss(*args):
        kl_loss = -0.5 * K.sum(1 + model.log_variance - K.square(model.mean_vector) -
                               K.exp(model.log_variance), axis=1)
        return kl_loss

    return _calculate_kl_loss

In [None]:
class VariationalAutoEncoder:
    """
    VariationalAutoEncoder represents a Deep Convolutional Variational Autoencoder architecture with
    mirrored encoder and decoder components
    """

    def __init__(self,
                 input_shape,
                 conv_filters,
                 conv_kernels,
                 conv_strides,
                 latent_space_dim):

        self.input_shape = input_shape  # [28, 28, 1] - [width in pixel, height in pixel, num_channels]
        self.conv_filters = conv_filters  # [2, 4, 8] - Number of conv filters per layer
        self.conv_kernels = conv_kernels  # [3, 5, 3] - Kernel size per layer [3x3, 5x5, 3x3]
        self.conv_strides = conv_strides  # [1, 2, 2] - Strides per layer
        self.latent_space_dim = latent_space_dim  # 2 - Number of units in the Bottleneck layer

        self.model = None
        self.encoder = None
        self.decoder = None
        self._model_input = None
        self._shape_before_bottleneck = None
        self.reconstruction_loss_weight = 1000
        self._num_conv_layers = len(conv_filters)  # Private Attribute
        self._build()

    def _build(self):
        """
        Method to build the encoder, the decoder and the model
        """
        self._build_encoder()
        self._build_decoder()
        self._build_autoencoder()

    def _build_encoder(self):
        encoder_input = self._add_encoder_input()
        self._model_input = encoder_input
        conv_layers = self._add_conv_layers(encoder_input)
        bottleneck = self._add_bottleneck(conv_layers)  # Bottleneck - Latent Space: Encoder o/p
        self.encoder = Model(encoder_input, bottleneck, name='encoder')

    def _add_encoder_input(self):
        return Input(shape=self.input_shape, name='encoder_input')

    def _add_conv_layers(self, encoder_input):
        """
        Creates all convolutional blocks in the encoder.
        """
        x = encoder_input
        for layer_index in range(self._num_conv_layers):
            x = self._add_conv_layer(layer_index, x)
        return x

    def _add_conv_layer(self, layer_index, x):
        """
        x = Graph of layers
        Adds a convolutional block to a graph of layers.
        The convolutional block consists of - Conv2D, ReLU, Batch Normalization
        """
        layer_number = layer_index + 1
        conv_layer = Conv2D(
            filters=self.conv_filters[layer_index],
            kernel_size=self.conv_kernels[layer_index],
            strides=self.conv_strides[layer_index],
            padding='same',
            name=f'encoder_conv_layer_{layer_number}'
        )
        x = conv_layer(x)
        x = ReLU(name=f'encoder_relu_{layer_number}')(x)
        x = BatchNormalization(name=f'encoder_bn_{layer_number}')(x)
        return x

    def _add_bottleneck(self, x):
        """
        Flatten data and add bottleneck with Gaussian Sampling (Dense Layer).
        """
        self._shape_before_bottleneck = K.int_shape(x)[
                                        1:]  # K.int_shape(x) -- [batch size, width, height, num_channels]
        x = Flatten()(x)
        self.mean_vector = Dense(units=self.latent_space_dim, name='encoder_mean_vector')(x)
        self.log_variance = Dense(units=self.latent_space_dim, name='encoder_log_variance_vector')(x)

        def sample_point_from_normal_distribution(args):
            mean_vector, log_variance = args
            epsilon = K.random_normal(shape=K.shape(self.mean_vector), mean=0., stddev=1.)
            sampled_point = mean_vector + K.exp(log_variance / 2) * epsilon
            return sampled_point

        x = Lambda(sample_point_from_normal_distribution,
                   name='encoder_output')([self.mean_vector, self.log_variance])
        return x

    def _build_decoder(self):
        decoder_input = self._add_decoder_input()
        dense_layer = self._add_dense_layer(decoder_input)
        reshape_layer = self._add_reshape_layer(dense_layer)
        conv_transpose_layers = self._add_conv_transpose_layers(reshape_layer)
        decoder_output = self._add_decoder_output(conv_transpose_layers)
        self.decoder = Model(decoder_input, decoder_output, name='decoder')

    def _add_decoder_input(self):
        return Input(shape=self.latent_space_dim, name='decoder_input')

    def _add_dense_layer(self, decoder_input):
        # He used np.prod instead of Flatten here because he wanted to calculate the number of units for the dense
        # layer based on the shape of the encoder output. np.prod is a numpy function that returns the product of all
        # elements in an array. Flatten is a Keras layer that converts a multidimensional tensor into a
        # one-dimensional vector. He did not need to flatten the encoder output here, he just needed to know its size.
        num_neurons = np.prod(self._shape_before_bottleneck)  # Need to get the count of flattened units
        dense_layer = Dense(num_neurons, name='decoder_dense')(decoder_input)
        return dense_layer

    def _add_reshape_layer(self, dense_layer):
        return Reshape(target_shape=self._shape_before_bottleneck)(dense_layer)

    def _add_conv_transpose_layers(self, x):
        """
        Add all convolutional transpose blocks in the decoder.
        Loop through all the convolutional layers in reverse order and stop at the first layer
        """
        for layer_index in reversed(range(1, self._num_conv_layers)):
            x = self._add_conv_transpose_layer(layer_index, x)
        return x

    def _add_conv_transpose_layer(self, layer_index, x):
        """
        x = Graph of layers
        Adds a convolutional transpose block to a graph of layers.
        The convolutional transpose block consists of - Conv2DTranspose, ReLU, Batch Normalization
        """
        layer_num = self._num_conv_layers - layer_index
        conv_transpose_layer = Conv2DTranspose(
            filters=self.conv_filters[layer_index],
            kernel_size=self.conv_kernels[layer_index],
            strides=self.conv_strides[layer_index],
            padding='same',
            name=f'decoder_conv_transpose_layer_{layer_num}'
        )
        x = conv_transpose_layer(x)
        x = ReLU(name=f'decoder_relu_{layer_num}')(x)
        x = BatchNormalization(name=f'decoder_bn_{layer_num}')(x)
        return x

    def _add_decoder_output(self, x):
        conv_transpose_layer = Conv2DTranspose(
            filters=1,  # Corresponds to num_channels
            kernel_size=self.conv_kernels[0],
            strides=self.conv_strides[0],
            padding='same',
            name=f'decoder_conv_transpose_layer_{self._num_conv_layers}'
        )
        x = conv_transpose_layer(x)
        output_layer = Activation(activation='sigmoid', name='sigmoid_layer')(x)
        return output_layer

    def _build_autoencoder(self):
        model_input = self._model_input
        model_output = self.decoder(self.encoder(model_input))
        self.model = Model(model_input, model_output, name='autoencoder')

    # def _calculate_reconstruction_loss(self, y_target, y_predicted):
    #     error = y_target - y_predicted
    #     reconstruction_loss = K.mean(K.square(error), axis=[1, 2, 3])
    #     return reconstruction_loss
    #
    # def _calculate_kl_loss(self, y_target, y_predicted):
    #     kl_loss = -0.5 * K.sum(1 + self.log_variance - K.square(self.mean_vector) - K.exp(self.log_variance), axis=1)
    #     return kl_loss

    def _calculate_combined_loss(self, y_target, y_predicted):
        reconstruction_loss = calculate_reconstruction_loss(y_target, y_predicted)
        kl_loss = calculate_kl_loss(self)()
        # kl_loss = calculate_kl_loss(y_target, y_predicted)
        combined_loss = self.reconstruction_loss_weight + reconstruction_loss + kl_loss
        return combined_loss

    def summary(self):
        self.encoder.summary()
        self.decoder.summary()
        self.model.summary()

    def compile(self, learning_rate=0.0001):
        # optimizer = Adam(learning_rate=learning_rate)
        optimizer = tf.keras.optimizers.legacy.Adam(learning_rate=learning_rate)
        self.model.compile(optimizer=optimizer,
                           loss=self._calculate_combined_loss,
                           metrics=[calculate_reconstruction_loss,
                                    calculate_kl_loss(self)])

    def train(self, x_train, batch_size, num_epochs):
        NAME = f'auto-encoder-{int(time.time())}'
        tensorboard = TensorBoard(log_dir=f'logs/{NAME}')
        self.model.fit(x_train,
                       x_train,
                       shuffle=True,
                       epochs=num_epochs,
                       batch_size=batch_size,
                       callbacks=[tensorboard]
                       )

    def save(self, save_folder='.'):
        self._create_folder_if_it_doesnt_exist(save_folder)
        self._save_parameters(save_folder)
        self._save_weights(save_folder)

    def _create_folder_if_it_doesnt_exist(self, folder):
        if not os.path.exists(folder):
            os.makedirs(folder)

    def _save_parameters(self, save_folder):
        parameters = [
            self.input_shape,
            self.conv_filters,
            self.conv_kernels,
            self.conv_strides,
            self.latent_space_dim
        ]
        save_path = os.path.join(save_folder, 'parameters.pkl')
        with open(save_path, 'wb') as f:
            pickle.dump(parameters, f)

    def _save_weights(self, save_folder):
        save_path = os.path.join(save_folder, 'weights.h5')
        self.model.save_weights(save_path)

    @classmethod
    def load(cls, save_folder='.'):

        parameters_path = os.path.join(save_folder, 'parameters.pkl')
        with open(parameters_path, 'rb') as f:
            parameters = pickle.load(f)

        autoencoder = VariationalAutoEncoder(*parameters)
        weights_path = os.path.join(save_folder, 'weights.h5')
        autoencoder.load_weights(weights_path)
        return autoencoder

    def load_weights(self, weights_path):
        self.model.load_weights(weights_path)

    def reconstruct(self, images):
        latent_representation = self.encoder.predict(images)
        reconstructed_images = self.decoder(latent_representation)
        return reconstructed_images, latent_representation

In [None]:
# autoencoder = VariationalAutoEncoder(
#     input_shape=(28, 28, 1),
#     conv_filters=(32, 64, 64, 64),
#     conv_kernels=(3, 3, 3, 3),
#     conv_strides=(1, 2, 2, 1),
#     latent_space_dim=2
# )
# autoencoder.summary()

In [None]:
# from variational_autoencoder import VariationalAutoEncoder
from tensorflow.keras.datasets import mnist

EPOCHS = 50
BATCH_SIZE = 32
LEARNING_RATE = 0.0005


def load_mnist():
    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    x_train = x_train.astype('float32') / 255
    # x_train = x_train.reshape((*x_train.shape, 1))
    x_train = x_train.reshape(x_train.shape + (1, ))
    x_test = x_test.astype('float32') / 255
    # x_test = x_test.reshape((*x_test.shape, 1))
    x_test = x_test.reshape(x_test.shape + (1, ))
    return x_train, y_train, x_test, y_test


def train(x_train, learning_rate, batch_size, epochs):
    autoencoder = VariationalAutoEncoder(
        input_shape=(28, 28, 1),
        conv_filters=(32, 64, 64, 64),
        conv_kernels=(3, 3, 3, 3),
        conv_strides=(1, 2, 2, 1),
        latent_space_dim=2
    )
    # autoencoder.summary()
    autoencoder.compile(learning_rate),
    autoencoder.train(x_train, batch_size, epochs)
    return autoencoder

x_train, _, _, _ = load_mnist()
autoencoder = train(x_train[:10000], LEARNING_RATE, BATCH_SIZE, EPOCHS)
autoencoder.save('model')

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


Train on 10000 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [None]:
# Loading the model
loaded_autoencoder = VariationalAutoEncoder.load('model')
loaded_autoencoder.summary()

Model: "encoder"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 encoder_input (InputLayer)     [(None, 28, 28, 1)]  0           []                               
                                                                                                  
 encoder_conv_layer_1 (Conv2D)  (None, 28, 28, 32)   320         ['encoder_input[0][0]']          
                                                                                                  
 encoder_relu_1 (ReLU)          (None, 28, 28, 32)   0           ['encoder_conv_layer_1[0][0]']   
                                                                                                  
 encoder_bn_1 (BatchNormalizati  (None, 28, 28, 32)  128         ['encoder_relu_1[0][0]']         
 on)                                                                                        

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# from train import load_mnist
# from variational_autoencoder import VariationalAutoEncoder

def select_images(images, labels, num_images=10):
    sample_images_index = np.random.choice(range(len(images)), num_images)
    sample_images = images[sample_images_index]
    sample_labels = labels[sample_images_index]
    return sample_images, sample_labels


def plot_reconstructed_images(images, reconstructed_images):
    fig = plt.figure(figsize=(15, 3))
    num_images = len(images)
    for i, (image, reconstructed_image) in enumerate(zip(images, reconstructed_images)):
        image = image.squeeze()
        ax = fig.add_subplot(2, num_images, i + 1)
        ax.axis("off")
        ax.imshow(image, cmap="gray_r")
        # reconstructed_image = reconstructed_image.squeeze()
        reconstructed_image = np.squeeze(reconstructed_image)
        ax = fig.add_subplot(2, num_images, i + num_images + 1)
        ax.axis("off")
        ax.imshow(reconstructed_image, cmap="gray_r")
    plt.show()


def plot_images_encoded_in_latent_space(latent_representations, sample_labels):
    plt.figure(figsize=(10, 10))
    plt.scatter(latent_representations[:, 0],
                latent_representations[:, 1],
                cmap="rainbow",
                c=sample_labels,
                alpha=0.5,
                s=2)
    plt.colorbar()
    plt.show()


autoencoder = VariationalAutoEncoder.load("model")
x_train, y_train, x_test, y_test = load_mnist()

num_sample_images_to_show = 8
sample_images, _ = select_images(x_test, y_test, num_sample_images_to_show)
reconstructed_images, _ = autoencoder.reconstruct(sample_images)
plot_reconstructed_images(sample_images, reconstructed_images)

num_images = 6000
sample_images, sample_labels = select_images(x_test, y_test, num_images)
_, latent_representations = autoencoder.reconstruct(sample_images)
plot_images_encoded_in_latent_space(latent_representations, sample_labels)

  updates=self.state_updates,


OperatorNotAllowedInGraphError: ignored

<Figure size 1080x216 with 0 Axes>