<a href="https://colab.research.google.com/github/Rubeennn/ACA-Homeworks/blob/main/VAE_Homework.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import numpy as np
import tensorflow as tf
import tensorflow.keras.layers as L
import matplotlib.pyplot as plt
%matplotlib inline

In [12]:
# The following code fetches you two datasets: images, usable for autoencoder training and attributes.
# Those attributes will be required for the final part of the assignment (applying smiles), so please keep them in mind
from lfw_dataset import fetch_lfw_dataset

data, attrs = fetch_lfw_dataset()

images not found, donwloading...
extracting...
done
attributes not found, downloading...
done


In [13]:
X_train = data[:10000].reshape((10000, -1))
print(X_train.shape)
X_val = data[10000:].reshape((-1, X_train.shape[1]))
print(X_val.shape)

image_h = data.shape[1]
image_w = data.shape[2]

(10000, 6075)
(3143, 6075)


In [14]:
X_train = np.float32(X_train)
X_train = X_train/255
X_val = np.float32(X_val)
X_val = X_val/255

In [15]:
def plot_gallery(images, h, w, n_row=3, n_col=6):
    """Helper function to plot a gallery of portraits"""
    plt.figure(figsize=(1.5 * n_col, 1.7 * n_row))
    plt.subplots_adjust(bottom=0, left=.01, right=.99, top=.90, hspace=.35)
    for i in range(n_row * n_col):
        plt.subplot(n_row, n_col, i + 1)
        plt.imshow(images[i].reshape((h, w, 3)), cmap=plt.cm.gray, vmin=-1, vmax=1, interpolation='nearest')
        plt.xticks(())
        plt.yticks(())

In [16]:
class DisplayCallback(tf.keras.callbacks.Callback):
    def __init__(self, model, rate):
        super(DisplayCallback, self).__init__()
        self.model = model
        self.rate = rate

    def on_epoch_end(self, epoch, logs=None):
        model = self.model
        if epoch % self.rate == 0:
#             print(model.weights[1], model.weights[-1])
            print(model.weights[1][0])
            idx = np.random.choice(X_train.shape[0])
            plt.subplot(221)
            plt.imshow(X_train[idx].reshape(
                (image_h, image_w, 3)
            ))
            plt.subplot(222)
            plt.imshow(tf.reshape(
                model(X_train[tf.newaxis, idx]), (image_h, image_w, 3)
            ))
            idx = np.random.choice(X_val.shape[0])
            plt.subplot(223)
            plt.imshow(X_val[idx].reshape(
                (image_h, image_w, 3)
            ))
            plt.subplot(224)
            plt.imshow(tf.reshape(
                model(X_val[tf.newaxis, idx]), (image_h, image_w, 3)
            ))
            plt.show()

In [17]:
from tensorflow import keras

In [7]:
class Sample(keras.layers.Layer):
    def __init__(self):
        super(Sample, self).__init__()

    def call(self, inputs):
        mean, std = inputs

        epsilon = tf.random.normal(shape=(256,))
        sample = tf.multiply(0.5 * tf.exp(std), epsilon)
        z = tf.add(mean, sample)

        return z, mean, std

inputs = tf.keras.Input(shape=(6075,))
e1 = L.Dense(units=1024, activation='relu', kernel_initializer='glorot_uniform')(inputs)
e2 = L.Dense(units=512, activation='relu', kernel_initializer='glorot_uniform')(e1)

mean = L.Dense(units=256, kernel_initializer='glorot_uniform', name='mean')(e2)
std = L.Dense(units=256, activation='relu', kernel_initializer='glorot_uniform', name='std')(e2)
sample = Sample()
z = sample([mean, std])
lattent = L.Dense(units=128, activation='relu', kernel_initializer='glorot_uniform')(z[0])
d1 = L.Dense(units=256, activation='relu', kernel_initializer='glorot_uniform')(lattent)
d2 = L.Dense(units=512, activation='relu', kernel_initializer='glorot_uniform')(d1)
d3 = L.Dense(units=1024, activation='relu', kernel_initializer='glorot_uniform')(d2)

encoded = L.Dense(units=6075, activation='sigmoid')(d3)

encoder = tf.keras.Model(inputs, z)
decoder = tf.keras.Model(z[0], encoded)


In [None]:
encoder.summary(), decoder.summary()

In [18]:
class VAE(tf.keras.Model):
    def __init__(self, encoder, decoder, **kwargs):
        super(VAE, self).__init__(**kwargs)
        self.encoder = encoder
        self.decoder = decoder
        self.optimizer = tf.keras.optimizers.Adam()

    @tf.function
    def KL_divergence(self, mu, sigma):
      return tf.reduce_sum(tf.multiply(-0.5, (1 + tf.math.log(sigma) - tf.square(mu) - tf.square(sigma))))

    @tf.function
    def log_likelihood(self, x, z):
      return tf.reduce_sum((1 / x.shape[1]) * (x - z)**2)

    def train_step(self, data):

      with tf.GradientTape() as tape:

        encoder_out = self.encoder(data)
        decoder_out = self.decoder(encoder_out[0])

        loss = self.KL_divergence(encoder_out[1], encoder_out[2]) + self.log_likelihood(data, decoder_out)

      gradients = tape.gradient(loss, self.trainable_variables)

      self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))

      return loss


    def call(self, inputs):
        mean, std = self.encoder(inputs)

        epsilon = tf.random.normal(shape=(256,))
        sample = tf.multiply(0.5 * tf.exp(std), epsilon)
        z = tf.add(mean, sample)

        rec_x = self.decoder(z)

        return rec_x

In [8]:
# inputs = tf.keras.Input(shape=(6075,))
# e1 = L.Dense(units=1024, activation='relu', kernel_initializer='glorot_uniform')(inputs)
# e2 = L.Dense(units=512, activation='relu', kernel_initializer='glorot_uniform')(e1)

# mean = L.Dense(units=256, kernel_initializer='glorot_uniform', name='mean')(e2)
# std = L.Dense(units=256, activation='relu', kernel_initializer='glorot_uniform', name='std')(e2)
# sample = Sample()
# z = sample([mean, std])
# lattent = L.Dense(units=128, activation='relu', kernel_initializer='glorot_uniform')(z[0])
# d1 = L.Dense(units=256, activation='relu', kernel_initializer='glorot_uniform')(lattent)
# d2 = L.Dense(units=512, activation='relu', kernel_initializer='glorot_uniform')(d1)
# d3 = L.Dense(units=1024, activation='relu', kernel_initializer='glorot_uniform')(d2)

# encoded = L.Dense(units=6075, activation='sigmoid')(d3)

# encoder = tf.keras.Model(inputs, z)
# decoder = tf.keras.Model(z[0], encoded)


In [9]:
# class VAE(tf.keras.Model):
#     def __init__(self, encoder, decoder, **kwargs):
#         super(VAE, self).__init__(**kwargs)
#         self.encoder = encoder
#         self.decoder = decoder
#         self.optimizer = tf.keras.optimizers.Adam()

#     @tf.function
#     def KL_divergence(self, mu, sigma):
#       return tf.reduce_sum(tf.multiply(-0.5, (1 + tf.math.log(sigma) - tf.square(mu) - tf.square(sigma))))

#     @tf.function
#     def log_likelihood(self, x, z):
#       return tf.reduce_sum((1 / x.shape[1]) * (x - z)**2)

#     def train_step(self, data):

#       with tf.GradientTape() as tape:

#         reconstructed_x = self.call(data)
#         mean, std = self.encoder(data)
#         kl_loss = self.KL_divergence(mean, std)
#         loss = self.KL_divergence(mean, std) + self.log_likelihood(data, decoder_out)

#       gradients = tape.gradient(loss, self.trainable_variables)

#       self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))

#       return loss


#     def call(self, inputs):
#         mean, std = self.encoder(inputs)

#         epsilon = tf.random.normal(shape=(256,))
#         sample = tf.multiply(0.5 * tf.exp(std), epsilon)
#         z = tf.add(mean, sample)

#         rec_x = self.decoder(z)

#         return rec_x

In [19]:
a = VAE(encoder, decoder)

In [79]:
X_train_tensor = tf.convert_to_tensor(X_train, dtype=tf.float32)


In [20]:
for i in range(10):
  loss = a.train_step(X_train)
  print(loss)



tf.Tensor(inf, shape=(), dtype=float32)
tf.Tensor(nan, shape=(), dtype=float32)
tf.Tensor(nan, shape=(), dtype=float32)
tf.Tensor(nan, shape=(), dtype=float32)
tf.Tensor(nan, shape=(), dtype=float32)
tf.Tensor(nan, shape=(), dtype=float32)
tf.Tensor(nan, shape=(), dtype=float32)
tf.Tensor(nan, shape=(), dtype=float32)
tf.Tensor(nan, shape=(), dtype=float32)
tf.Tensor(nan, shape=(), dtype=float32)


In [21]:
callback = DisplayCallback(a, 2)
a.compile(optimizer=tf.keras.optimizers.Adam())


In [22]:
history = a.fit(

                X_train,
                batch_size=64,
                callbacks=[callback],
                validation_data=(X_val, X_val),
                epochs=1)


AttributeError: ignored

In [55]:
a = tf.constant([5,6,5])
a * 0.5

TypeError: ignored

And the last, but not least! Place in the code where the most of the formulaes goes to - optimization objective. The objective for VAE has it's own name - variational lowerbound. And as for any lowerbound our intention is to maximize it. Here it is (for one sample $z$ per input $x$):

$$\mathcal{L} = -D_{KL}(q_{\phi}(z|x)||p_{\theta}(z)) + \log p_{\theta}(x|z)$$

Your next task is to implement two functions that compute KL-divergence and the second term - log-likelihood of an output. Here is some necessary math for your convenience:

$$D_{KL} = -\frac{1}{2}\sum_{i=1}^{dimZ}(1+log(\sigma_i^2)-\mu_i^2-\sigma_i^2)$$
$$\log p_{\theta}(x|z) = \sum_{i=1}^{dimX}\log p_{\theta}(x_i|z)=\sum_{i=1}^{dimX} \log \Big( \frac{1}{\sigma_i\sqrt{2\pi}}e^{-\frac{(\mu_I-x)^2}{2\sigma_i^2}} \Big)=...$$