<a href="https://colab.research.google.com/github/OlatundeEso/Resume/blob/main/Variational_Autoencoders_12102023.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import keras
from keras.layers import Conv2D, Conv2DTranspose, Input, Flatten, Dense, Lambda, Reshape
from keras.layers import BatchNormalization
from keras.models import Model
from keras.datasets import mnist
from keras import backend as K
import numpy as np
import matplotlib.pyplot as plt
# Loading the MNIST Dataset
(X_train, y_train), (X_test, y_test) = mnist.load_data()
# Normalize and reshape the data
# Normalize



Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [2]:
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

In [3]:
# Reshaping
img_width = X_train.shape[1]
img_height = X_train.shape[2]
num_channels = 1 # MNIST ---> MNIST is Grayscale so the channel is 1
X_train = X_train.reshape(X_train.shape[0], img_width, img_height, num_channels)
X_test = X_test.reshape(X_test.shape[0], img_width, img_height, num_channels)
input_shape = (img_height, img_width, num_channels)

In [4]:
X_train.shape # Prior to reshaping was (60000, 28, 28)

(60000, 28, 28, 1)

In [5]:
# BUILD THE MODEL
# Build the Encoder part of the model first
# We can define 4 nos ConV2D, Flatten and then Dense

In [6]:
latent_dim = 2 # Number of latent dimension parameters
input_img = Input(shape=input_shape, name = 'encoder_input')
x = Conv2D(32, 3, padding = 'same', activation = 'relu')(input_img)
x = Conv2D(64, 3, padding = 'same', activation = 'relu', strides=(2,2))(x)
x = Conv2D(64, 3, padding = 'same', activation = 'relu')(x)
x = Conv2D(64, 3, padding = 'same', activation = 'relu')(x)

In [7]:
conv_shape = K.int_shape(x) # This is the shape of the Convolution layer to be provided to the decoder


In [8]:
# Now let us Flatter the output of the convolution layers

In [9]:
x = Flatten()(x)
x = Dense(32, activation = 'relu')(x)

In [10]:
# Now we need to create two layers, to provide 2 outputs, one for each
# Two outputs, for the latent mean and log variance (standard deviation)
# We would use these to sample random variables in the latent space to which inputs are mapped

In [11]:
z_mu = Dense(latent_dim, name = 'latent_mu')(x) # the mean values of encoded input vectors
z_sigma = Dense(latent_dim, name = 'latent_sigma')(x) # the std of the encoded inputs

In [12]:
#  REPARAMETERIZATION TRICK
# We need to define the sampling function that would sample from the distribution
# Reparameterize based on mu + sigma_squad x eps
# This is to ensure that the gradient descent can be carried out on the network



In [13]:
def sample_z(args):
  z_mu, z_sigma = args
  eps = K.random_normal(shape= (K.shape(z_mu)[0], K.int_shape(z_mu)[1]))
  return z_mu + K.exp(z_sigma/2) * eps

In [14]:
# Now let us sample from the Latent Distribution space
# z is the Lambda custom layer that we are adding for gradient descent calculations
# using mu and sigma

In [15]:
z = Lambda(sample_z, output_shape = (latent_dim, ), name = 'z')([z_mu, z_sigma])

In [16]:
# z (the lambda layer) will be the last layer in the encoder
# Now we should define and summarise the encoder model

In [17]:
encoder = Model(input_img, [z_mu, z_sigma, z], name = 'encoder')

In [18]:
print(encoder.summary())

Model: "encoder"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 encoder_input (InputLayer)  [(None, 28, 28, 1)]          0         []                            
                                                                                                  
 conv2d (Conv2D)             (None, 28, 28, 32)           320       ['encoder_input[0][0]']       
                                                                                                  
 conv2d_1 (Conv2D)           (None, 14, 14, 64)           18496     ['conv2d[0][0]']              
                                                                                                  
 conv2d_2 (Conv2D)           (None, 14, 14, 64)           36928     ['conv2d_1[0][0]']            
                                                                                            

In [19]:
# Now let us build the Decoder


In [20]:
# Recall that the Decoder takes in the Latent Distribution Vector as the Input

In [21]:
decoder_input = Input(shape= (latent_dim, ), name = 'decoder_input')

In [22]:
# Now, we have to ensure that the we start with a shape that can be remapped
# to the original image shape as we want our final output to be (same shape as the original input)
# So, we add a dense layer with dimensions that can be reshaped to the desired output shape

In [23]:
x = Dense(conv_shape[1]*conv_shape[2]*conv_shape[3], activation = 'relu')(decoder_input)

In [24]:
# Reshape to the shape of the last conv layer in the encoder, so that we can upscale
# (conv2D transpose) back to the original shape

In [25]:
x = Reshape((conv_shape[1], conv_shape[2], conv_shape[3]))(x)

In [26]:
# Now we would use  Conv2D to reverse the conv layers defined in the encoder

In [27]:
x = Conv2DTranspose(32, 3, padding = 'same', activation = 'relu', strides = (2,2))(x)

In [28]:
# We can add more Conv2DTranspose layers as desired
# Now let us use Sigmoid activation

In [29]:
x = Conv2DTranspose(num_channels, 3, padding = 'same', activation = 'sigmoid', name = 'decoder')(x)

In [30]:
# Define and summarize the decoder model

In [31]:
decoder = Model(decoder_input, x, name = 'decoder-gangan')
decoder.summary()

Model: "decoder-gangan"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 decoder_input (InputLayer)  [(None, 2)]               0         
                                                                 
 dense_1 (Dense)             (None, 12544)             37632     
                                                                 
 reshape (Reshape)           (None, 14, 14, 64)        0         
                                                                 
 conv2d_transpose (Conv2DTr  (None, 28, 28, 32)        18464     
 anspose)                                                        
                                                                 
 decoder (Conv2DTranspose)   (None, 28, 28, 1)         289       
                                                                 
Total params: 56385 (220.25 KB)
Trainable params: 56385 (220.25 KB)
Non-trainable params: 0 (0.00 Byte)
______________

In [32]:
# Now we apply the decoder to the latent sample

In [33]:
z_decoded = decoder(z)

In [34]:
# Now let us define the custome loss
# Recall that the Variational Autoencoder is trained using 2 loss functions-
# Reconstruction Loss and KL divergence

In [35]:
# Let us add a class to define a custom layer with loss

In [36]:
class CustomLayer(keras.layers.Layer):
  def vae_loss(self, x, z_decoded):
    x = K.flatten(x)
    z_decoded = K.flatten(z_decoded)
    # Calculate Reconstruction Loss
    recon_loss = keras.metrics.binary_crossentropy(x, z_decoded)
    # KL Divergence (Check the axis parameter below)
    k1_loss = -5e-4 * K.mean(1 + z_sigma - K.square(z_mu) - K.exp(z_sigma), axis = -1)
    return K.mean(recon_loss + k1_loss)
  # Adding a Custom Loss to the Class
  def call(self, inputs):
    x = inputs[0]
    z_decoded = inputs[1]
    loss = self.vae_loss(x, z_decoded)
    self.add_loss(loss, inputs=inputs)
    return x

In [37]:
encoder = Model(input_img, [z_mu, z_sigma, z], name = 'encoder')

In [38]:
# Apply the custom loss to the input image and the decoded latent distribution

In [39]:
y = CustomLayer()([input_img, z_decoded])

In [40]:
# y is basically the original image after encoding img, sigma and z
# This will be used as the output of the Variational Autoencoder.

In [41]:
vae = Model(input_img, y, name ='vae')

In [42]:
# Now Compile the Model

In [43]:
vae.compile (optimizer = 'adam', loss = 'NONE')


In [44]:
vae.summary()

Model: "vae"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 encoder_input (InputLayer)  [(None, 28, 28, 1)]          0         []                            
                                                                                                  
 conv2d (Conv2D)             (None, 28, 28, 32)           320       ['encoder_input[0][0]']       
                                                                                                  
 conv2d_1 (Conv2D)           (None, 14, 14, 64)           18496     ['conv2d[0][0]']              
                                                                                                  
 conv2d_2 (Conv2D)           (None, 14, 14, 64)           36928     ['conv2d_1[0][0]']            
                                                                                                

In [45]:
# Train the Autoencoder

In [46]:
vae.fit(X_train, None, epochs=10, batch_size=32, validation_split=0.2)

Epoch 1/10


ValueError: ignored

In [None]:
# Now, let us visualize the inputs mapped to the latent space
# Remember that we have encoded inputs into the Latent Space with dimension = 2
# Now we extract z_mu (the first parameter in the result of the encoder)

In [None]:
mu, _, _ = encoder.predict(x_test)
# Plot dim1 and dim2 for mu
plt.figure(figsize=(8,10))
plt.scatter(mu[:, 0], mu[:, 1], c = y_test, cmap = 'brg')
plt.xlabel('dim 1')
plt.ylabel('dim 2')
plt.colorbar()
plt.show()

In [None]:
# Now let us visualize the images
# Single decoded image with random input latent vector (of size 1 x2)
# The Latent space range is about -5 to 5, so we would pick random values within
# this range- we will start with -1, 1 and slowly go up to -1.5, 1.5 and see how
# the output morphs from one image to the other

In [None]:
sample_vector = np.array([[1,3]])
decoded_example = decoder.predict(sample_vector)
decoded_example_reshaped = decoded_example.reshape(img_width, img_height)
plt.imshow(decoded_example_reshaped)

In [None]:
# Let us try to automate this process of morphing by generating multiple images
# and plotting
# We will use the decoder to generate images by tweaking the latent variables from the latent space
# We would create a grid of defined size with zeros
# We would take sample from some defined linear space, in this example [-4, 4]
# We will feed it to the decoder and update zeros in the figure with output

In [None]:
n = 20 # Generate 15 x 15 digits
figure = np.zeros((img_width * n, img_height * n, num_channels))

In [None]:
# Now we are creating a grid of latent variables to be supplied as inputs to
# decoder.predict()
# We will also create vectors within the range -5 to 5 as that seems to be the range
# in the latent space

In [None]:
grid_x = np.linspace(-4, 4, n)
grid_y = np.linspace(-4, 4, n)[::-1]


In [None]:
# Decoder for each square in the grid

In [None]:
for i, yi in enumerate(grid_y):
  for j, xi in enumerate(grid_x):
    z_sample = np.array([[xi, yi]])
    x_decoded = decoder.predict(z_sample)
    digit = x_decoded[0].reshape(img_width, img_height, num_channels)
    figure[i * img_width: (i + 1) * img_width,
           j * img_height: (j + i) * img_height] = digit


In [None]:
plt.figure(figsize=(10,10))
# Reshape for Visualization
fig_shape = np.shape(figure)
figure = figure.reshape((fig_shape[0], fig_shape[1]))
plt.imshow(figure, cmap = 'gnuplot2')
plt.show()

In [None]:
X_train[20].shape

In [None]:
X_train[20][22:23]

In [None]:
X_train[20].shape

In [None]:
X_train[20][:,:,0]

In [None]:
X_train[20].shape

In [None]:
plt.figure(1)
plt.subplot(221)
plt.imshow(X_train[12][:,:,0])

In [None]:
X_train[20][22:23]

In [None]:
X_train[20][22:23][:, :, 0]

In [None]:
input_shape

In [None]:
X_train.shape

In [None]:
X_test.shape

In [None]:
X_train

In [None]:
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

In [None]:
len(X_train)

In [None]:
X_train.shape

In [None]:
X_train.dtype

In [None]:
X_train.ndim

In [None]:
first_one = X_train[0]
first_one

In [None]:
first_one[20]/255

In [None]:
first_one[26]

In [None]:
first_one[26][3]

In [None]:
first_one.shape

In [None]:
len(first_one)

In [None]:
first_one

In [None]:
X_train.size

In [None]:
X_train = X_train/255
X_test = X_test/255