# VAE

In [2]:
import os
import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras import Sequential, layers
from PIL import Image
from matplotlib import pyplot as plt



tf.random.set_seed(22)
np.random.seed(22)



def save_images(imgs, name):
    new_im = Image.new('L', (280, 280))

    index = 0
    for i in range(0, 280, 28):
        for j in range(0, 280, 28):
            im = imgs[index]
            im = Image.fromarray(im, mode='L')
            new_im.paste(im, (i, j))
            index += 1

    new_im.save(name)


h_dim = 20
batchsz = 512
lr = 1e-3


(x_train, y_train), (x_test, y_test) = keras.datasets.fashion_mnist.load_data()
x_train, x_test = x_train.astype(np.float32) / 255., x_test.astype(np.float32) / 255.
# we do not need label
train_db = tf.data.Dataset.from_tensor_slices(x_train)
train_db = train_db.shuffle(batchsz * 5).batch(batchsz)
test_db = tf.data.Dataset.from_tensor_slices(x_test)
test_db = test_db.batch(batchsz)

print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)

z_dim = 10

class VAE(keras.Model):

    def __init__(self):
        super(VAE, self).__init__()

        # Encoder
        self.fc1 = layers.Dense(128)
        self.fc2 = layers.Dense(z_dim) # get mean prediction
        self.fc3 = layers.Dense(z_dim)

        # Decoder
        self.fc4 = layers.Dense(128)
        self.fc5 = layers.Dense(784)

    def encoder(self, x):

        h = tf.nn.relu(self.fc1(x))
        # get mean
        mu = self.fc2(h)
        # get variance
        log_var = self.fc3(h)

        return mu, log_var

    def decoder(self, z):

        out = tf.nn.relu(self.fc4(z))
        out = self.fc5(out)

        return out

    def reparameterize(self, mu, log_var):

        eps = tf.random.normal(log_var.shape)

        std = tf.exp(log_var*0.5)

        z = mu + std * eps
        return z

    def call(self, inputs, training=None):

        # [b, 784] => [b, z_dim], [b, z_dim]
        mu, log_var = self.encoder(inputs)
        # reparameterization trick
        z = self.reparameterize(mu, log_var)

        x_hat = self.decoder(z)

        return x_hat, mu, log_var


model = VAE()
model.build(input_shape=(4, 784))
optimizer = tf.optimizers.Adam(lr)

for epoch in range(1000):

    for step, x in enumerate(train_db):

        x = tf.reshape(x, [-1, 784])

        with tf.GradientTape() as tape:
            x_rec_logits, mu, log_var = model(x)

            rec_loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=x, logits=x_rec_logits)
            rec_loss = tf.reduce_sum(rec_loss) / x.shape[0]

            # compute kl divergence (mu, var) ~ N (0, 1)
            # https://stats.stackexchange.com/questions/7440/kl-divergence-between-two-univariate-gaussians
            kl_div = -0.5 * (log_var + 1 - mu**2 - tf.exp(log_var))
            kl_div = tf.reduce_sum(kl_div) / x.shape[0]

            loss = rec_loss + 1. * kl_div

        grads = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(grads, model.trainable_variables))


        if step % 100 == 0:
            print(epoch, step, 'kl div:', float(kl_div), 'rec loss:', float(rec_loss))


    # evaluation
    z = tf.random.normal((batchsz, z_dim))
    logits = model.decoder(z)
    x_hat = tf.sigmoid(logits)
    x_hat = tf.reshape(x_hat, [-1, 28, 28]).numpy() *255.
    x_hat = x_hat.astype(np.uint8)
    save_images(x_hat, 'vae_images/sampled_epoch%d.png'%epoch)

    x = next(iter(test_db))
    x = tf.reshape(x, [-1, 784])
    x_hat_logits, _, _ = model(x)
    x_hat = tf.sigmoid(x_hat_logits)
    x_hat = tf.reshape(x_hat, [-1, 28, 28]).numpy() *255.
    x_hat = x_hat.astype(np.uint8)
    save_images(x_hat, 'vae_images/rec_epoch%d.png'%epoch)



(60000, 28, 28) (60000,)
(10000, 28, 28) (10000,)
0 0 kl div: 1.9412329196929932 rec loss: 546.2901000976562
0 100 kl div: 15.535074234008789 rec loss: 288.2947082519531
1 0 kl div: 16.25847625732422 rec loss: 272.8807678222656
1 100 kl div: 15.295757293701172 rec loss: 259.06195068359375
2 0 kl div: 15.529515266418457 rec loss: 250.00306701660156
2 100 kl div: 14.341240882873535 rec loss: 254.28314208984375
3 0 kl div: 14.867894172668457 rec loss: 250.24952697753906
3 100 kl div: 14.843954086303711 rec loss: 248.39674377441406
4 0 kl div: 14.137299537658691 rec loss: 245.79287719726562
4 100 kl div: 14.691381454467773 rec loss: 247.0005340576172
5 0 kl div: 14.585232734680176 rec loss: 237.10342407226562
5 100 kl div: 14.727499008178711 rec loss: 240.71823120117188
6 0 kl div: 14.548667907714844 rec loss: 238.0881805419922
6 100 kl div: 14.725262641906738 rec loss: 242.03463745117188
7 0 kl div: 14.938092231750488 rec loss: 235.62298583984375
7 100 kl div: 14.582120895385742 rec loss:

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "C:\Users\user\anaconda3\envs\tf2\lib\site-packages\IPython\core\interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-2-43ca5474e5b1>", line 123, in <module>
    grads = tape.gradient(loss, model.trainable_variables)
  File "C:\Users\user\anaconda3\envs\tf2\lib\site-packages\tensorflow\python\eager\backprop.py", line 1073, in gradient
    unconnected_gradients=unconnected_gradients)
  File "C:\Users\user\anaconda3\envs\tf2\lib\site-packages\tensorflow\python\eager\imperative_grad.py", line 77, in imperative_grad
    compat.as_str(unconnected_gradients.value))
  File "C:\Users\user\anaconda3\envs\tf2\lib\site-packages\tensorflow\python\eager\backprop.py", line 162, in _gradient_function
    return grad_fn(mock_op, *out_grads)
  File "C:\Users\user\anaconda3\envs\tf2\lib\site-packages\tensorflow\python\ops\math_grad.py", line 1692, in _MatMulGrad
    grad_b = gen_math_ops.mat_mu

TypeError: object of type 'NoneType' has no len()