# Chapter 7: Building a Variational Autoencoder

Generative models are the most promising push toward enabling computers to have an understanding of the world. They are true unsupervised models, and are able to perform those tasks that many today consider to be at the cutting edge of Artificial Intelligence (AI). Generative models are different for precisely the reason as it sounds: they generate data. Centered mostly around computer vision tasks, this class of network has the power to create new faces, new handwriting, or even paintings. 

In [None]:
import numpy as np
import tensorflow as tf

## Building a Variational AutoEncoder

In [None]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

#### Build the encoder

In [None]:
def encoder(x, initializer):
    
    input_layer = tf.layers.dense(inputs=x, units=784, activation=tf.nn.elu,
                                 kernel_initializer=initializer, bias_initializer=initializer,
                                 name='input_layer'
                                 )
    
    hidden_1 = tf.layers.dense(inputs=input_layer, units=256, activation=tf.nn.elu,
                             kernel_initializer=initializer, bias_initializer=initializer
                             )
    
    hidden_2 = tf.layers.dense(inputs=hidden_1, units=128, activation=tf.nn.elu,
                       kernel_initializer=initializer, bias_initializer=initializer
                              )
    
    ## Calculate mu and sigma for the z distribtion
    mu = tf.layers.dense(inputs=hidden_2, units=10, activation=None)
    sigma = tf.layers.dense(inputs=hidden_2, units=10, activation=None)
   
    ## Calculate z
    epsilon = tf.random_normal(shape=tf.shape(sigma), mean=0, stddev=1, dtype=tf.float32)
    z = mu + tf.sqrt(tf.exp(sigma)) * epsilon
    
    ## Calculate the KL Divergence
    kl_div = -0.5 * tf.reduce_sum( 1 + sigma - tf.square(mu) - tf.exp(sigma), axis=1)
    kl_div = tf.reduce_mean(latent_loss)
    
    
    return z, kl_div

#### Build the decoder

In [None]:
def decoder(z, initializer):
    layer_1 = fully_connected(z, 256, scope='dec_l1', activation_fn=tf.nn.elu, 
                             kernel_initializer=initializer, bias_initializer=initializer
                             )
    layer_2 = fully_connected(layer_1, 384, scope='dec_l2', activation_fn=tf.nn.elu,
                             kernel_initializer=initializer, bias_initializer=initializer
                             )
    layer_3 = fully_connected(layer_2, 512, scope='dec_l3', activation_fn=tf.nn.elu,
                             kernel_initializer=initializer, bias_initializer=initializer
                             )
    dec_out = fully_connected(layer_3, input_dim, scope='dec_l4', activation_fn=tf.sigmoid,
                             kernel_initializer=initializer, bias_initializer=initializer
                             )
    
    ## Calculate the reconstruction loss
    epsilon = 1e-10
    rec_loss = -tf.reduce_sum(x * tf.log(epsilon + dec_out) + (1 - x) * tf.log(epsilon + 1 - dec_out), axis=1)
    rec_loss = tf.reduce_mean(rec_loss)
    
    return dec_out, rec_loss

#### Initialize the training parameters

In [None]:
learning_rate = 1e-4
batch_size = 100
n_z = 10
epochs = 100
input_dim = 784 
num_sample = 55000

#### Initialize the Encoder Input

In [None]:
x = tf.placeholder(name='x', dtype='float', shape=[None, input_dim])

#### Initialize the model, loss, and optimizer

In [None]:
## initialize the models
z, kl_div = encoder(x)
dec_out, rec_loss = decoder(x)

## Calculate the overall model loss term
loss = tf.reduce_mean(rec_loss + kl_div)

## Create the optimizer
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss)

## Create the weight initializer
initializer = tf.contrib.layers.xavier_initializer()

#### Run the model training

In [None]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    for epoch in range(epochs):
        for iter in range(num_sample // batch_size):
 
            batch_x = mnist.train.next_batch(batch_size)
    
            _, l, rl, ll = sess.run([optimizer, loss, rec_loss, kl_div], feed_dict={x: batch_x[0]})

        if epoch % 5 == 0:
            print('[Epoch {}] Total Loss: {}, Reconstruction Loss: {}, Latent Loss: {}'.format(epoch, l, rl, ll))

#### Generate New Samples
Code from @shaohua0116

In [None]:
z = np.random.normal(size=[batch_size, n_z])
x_generated = x_hat = self.sess.run(dec_out, feed_dict={z: z})

n = np.sqrt(batch_size).astype(np.int32)
I_generated = np.empty((h*n, w*n))
for i in range(n):
    for j in range(n):
        I_generated[i*h:(i+1)*h, j*w:(j+1)*w] = x_generated[i*n+j, :].reshape(28, 28)

plt.figure(figsize=(8, 8))
plt.imshow(I_generated, cmap='gray')