# VAE TO IMITATE MNIST
https://github.com/FelixMohr/Deep-learning-with-Python/blob/master/VAE.ipynb

import data

In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('..\data\MNIST_data')

Extracting ..\data\MNIST_data/train-images-idx3-ubyte.gz
Extracting ..\data\MNIST_data/train-labels-idx1-ubyte.gz
Extracting ..\data\MNIST_data/t10k-images-idx3-ubyte.gz
Extracting ..\data\MNIST_data/t10k-labels-idx1-ubyte.gz


# VAE 1.0

## placeholders

In [20]:
tf.reset_default_graph()

# placeholders
X_ph = tf.placeholder(
            dtype = tf.float32,
            shape = [None, 28, 28, 1],
            name = "conv_in"
        )
Y_ph = tf.placeholder(dtype=tf.float32,
                      shape = [None, 28, 28],
                      name = 'Y')

Y_flat_ph = tf.reshape(Y_ph,
                       shape = [-1, 28*28])

keep_prob_ph = tf.placeholder(dtype = tf.float32,
                              shape = (),
                              name = 'keep_prob')
dec_in_channels = 1
n_latent = 8
reshaped_dim = [-1,7,7,dec_in_channels]
inputs_decoder = int(49*dec_in_channels/2)

def lrelu(x, alpha = 0.03):
    return tf.maximum(x, tf.multiply(x, alpha))

## encoder
image to mean and stddev

In [4]:
def encoder_backup(X_in, keep_prob):
    activation = lrelu
    with tf.variable_scope("encoder",reuse = None):
        X = tf.reshape(X_in, shape = [-1, 28, 28, 1])
        x = tf.layers.conv2d(X,
                             filters = 64,
                             kernel_size = 4,
                             strides = 2,
                             padding = same,
                             activation = activation
                            )
        x = tf.dropout(x, keep_prob)
        x = tf.layers.conv2d(X,
                     filters = 64,
                     kernel_size = 4,
                     strides = 2,
                     padding = same,
                     activation = activation
                    )
        x = tf.dropout(x, keep_prob)
        x = tf.layers.conv2d(X,
                     filters = 64,
                     kernel_size = 4,
                     strides = 1,
                     padding = same,
                     activation = activation
                    )
        x = tf.dropout(x, keep_prob)
        x = tf.contrib.layers.flatten(x)
        mn = tf.layers.dense(x, units = n_latent)
        epsilon = tf.random_normal(tf.stack([tf.shape(x)[0], n_latent]))
        sd = .5*tf.layers.dense(x, units = n_latent)
        z = mn+tf.multiply(epsilon,tf.exp(sd))
        
        return z,mn,sd


In [10]:
tf.reset_default_graph()
def encoder(in_sample,keep_prob = 1,tracking = False):
    if tracking:
        X_in = tf.placeholder(
            dtype = tf.float32,
            shape = [None, 28, 28, 1],
            name = "conv_in"
        )
    else:
        X_in = in_sample
    activation = lrelu
    with tf.variable_scope("encoder",reuse = None):
        X = tf.reshape(X_in, shape = [-1, 28, 28, 1])  # [3,28,28,1]
        conv1 = tf.layers.conv2d(X,                    # [3,14,14,64]
                             filters = 64,
                             kernel_size = 4,
                             strides = 2,
                             padding = 'same',
                             activation = activation
                            )
        dropout1 = tf.nn.dropout(conv1, keep_prob)
        conv2 = tf.layers.conv2d(dropout1,             # [3,7,7,64]
                     filters = 64,
                     kernel_size = 4,
                     strides = 2,
                     padding = 'same',
                     activation = activation
                    )
        dropout2 = tf.nn.dropout(conv2, keep_prob)     
        conv3 = tf.layers.conv2d(dropout2,             # [3,7,7,64]
                     filters = 64,
                     kernel_size = 4,
                     strides = 1,
                     padding = 'same',
                     activation = activation
                    )
        dropout3 = tf.nn.dropout(conv3, keep_prob)
        flatten = tf.contrib.layers.flatten(dropout3)  # [3,3136]
        mn = tf.layers.dense(flatten, units = n_latent)                        # [3,8]
        epsilon = tf.random_normal(tf.stack([tf.shape(flatten)[0], n_latent])) # [3,8]
        sd = .5*tf.layers.dense(flatten, units = n_latent)                     # [3,8]
        z = mn+tf.multiply(epsilon,tf.exp(sd))                                 # [3,8]
        if tracking:
            config = tf.ConfigProto()
            config.gpu_options.allow_growth=True
            sess = tf.Session(config=config)
            sess.run(tf.global_variables_initializer())
            return sess.run(epsilon, feed_dict =
                            {X_in: in_sample})
        return z,mn,sd

image_sample = np.ones((3,28,28,1))
print(encoder(image_sample,1,True).shape)


(3, 8)


# decoder

In [17]:
tf.reset_default_graph()
def decoder(sampled_z_in, keep_prob,tracking = False):
    activation = lrelu
    if tracking:
         sampled_z = tf.placeholder(
             dtype = tf.float32,
             shape = [None, 8],
             name = "conv_in"
         )
    else:
        sampled_z = sampled_z_in
    with tf.variable_scope("decoder", reuse = None):
        x = tf.layers.dense(sampled_z, units = inputs_decoder, activation = lrelu) # [3,24]
        x2 = tf.layers.dense(x, units = inputs_decoder*2+1, activation = lrelu)    # [3,49]
        unflatten = tf.reshape(x2, reshaped_dim)                                   # [3,7,7,1]
        deconv_3 = tf.layers.conv2d_transpose(unflatten,                           # [3,7,7,64]
                                              filters = 64,
                                              kernel_size = 4,
                                              strides = 1,
                                              padding = 'same'
                                             )
        dropout_3 = tf.nn.dropout(deconv_3, keep_prob)
        deconv_2 = tf.layers.conv2d_transpose(dropout_3,                           # [3,14,14,64]
                                              filters = 64,
                                              kernel_size = 4,
                                              strides = 2,
                                              padding = 'same'
                                             )
        dropout_2 = tf.nn.dropout(deconv_2, keep_prob)
        deconv_1 = tf.layers.conv2d_transpose(dropout_2,                           # [3,28,28,64]
                                              filters = 64,
                                              kernel_size = 4,
                                              strides = 2,
                                              padding = 'same'
                                             )
        dropout_1 = tf.nn.dropout(deconv_1, keep_prob)                             # [3,50176]
        flatten = tf.contrib.layers.flatten(dropout_1)
        decode_out = tf.layers.dense(flatten, units = 28*28, activation = tf.nn.sigmoid) # [3,784]
        img = tf.reshape(decode_out, shape = [-1,28,28,1])                         # [3,28,28,1]
        if tracking:
            config = tf.ConfigProto()
            config.gpu_options.allow_growth=True
            sess = tf.Session(config=config)
            sess.run(tf.global_variables_initializer())
            return sess.run(img, feed_dict =
                            {sampled_z: sampled_z_in})
        return img
sample_z = np.ones((3,8))
print(decoder(sample_z,1,True).shape)

(3, 28, 28, 1)


## define & compute loss function
loss = combineation of KL divergence &  squared difference

In [26]:
#tf.reset_default_graph()
with tf.variable_scope("loss", reuse = True):
    sampled, mn, sd = encoder(X_ph, keep_prob_ph)
    dec = decoder(sampled, keep_prob_ph)
    
    unreshaped = tf.reshape(dec, [-1,28*28])
    img_loss = tf.reduce_sum = (tf.squared_difference(unreshaped, Y_flat_ph), 1)
    latent_loss = 