# Variational Encoder Theory Basics

Variational ecoder(VAE) has been widely adopted. In the application level, it can be slow to train because of the noise it introduces to the encoded layer. In this notebook, we try to train a VAE with a convolution network. 

The strategy is to first train an convolutional auto-encoder first and then free the encoder and implement the VAE structure to map the feature layer to a variational latent space. 

# Application Highlights

In [6]:
import tensorflow as tf
import numpy as np
import os
from tensorflow.examples.tutorials.mnist import input_data    
FLAGS = None
mnist = input_data.read_data_sets('./', one_hot=False)

Extracting ./train-images-idx3-ubyte.gz
Extracting ./train-labels-idx1-ubyte.gz
Extracting ./t10k-images-idx3-ubyte.gz
Extracting ./t10k-labels-idx1-ubyte.gz


In [89]:
#Clearing all
tf.reset_default_graph()
#Opening new directory for tensorboard
from datetime import datetime
now = datetime.now()
logdir = 'C:/Users/CK/Documents/tf_logs' + now.strftime("%Y%m%d-%H%M%S") + "/"
#To monitor training through tensorboard
print('TensorBoard CMD:tensorboard --logdir='+logdir)

TensorBoard CMD:tensorboard --logdir=C:/Users/CK/Documents/tf_logs20181011-181705/


In [90]:
learning_rate = 0.01
epochs = 100
batch_size = 800
inputs_ = tf.placeholder(tf.float32, (batch_size, 28, 28, 1), name='inputs')

# CNN

In [91]:
def featConstructor(inputs,varScope,batch_normalised=True,activation=tf.nn.sigmoid):
    
    with tf.variable_scope(varScope):
        norm1=tf.layers.batch_normalization(inputs_,training=batch_normalised,axis=-1)
        conv1 = tf.layers.separable_conv2d(
                norm1,
                filters=32,
                kernel_size=(3,3),
                strides=(1, 1),
                activation=activation
        )
        maxpool1 = tf.layers.max_pooling2d(conv1, pool_size=(2,2), strides=(2,2), padding='same')
        norm2=tf.layers.batch_normalization(maxpool1,training=batch_normalised,axis=-1)
        conv2 = tf.layers.separable_conv2d(
                inputs=norm2,
                filters=16,
                kernel_size=(3,3),
                strides=(1, 1),
                activation=activation
        )
        maxpool2 = tf.layers.max_pooling2d(conv2, pool_size=(2,2), strides=(2,2), padding='same')
        norm3=tf.layers.batch_normalization(maxpool2,training=batch_normalised,axis=-1)
        conv3= tf.layers.separable_conv2d(
                inputs=norm3,
                filters=8,
                kernel_size=(3,3),
                strides=(1, 1),
                activation=activation
        )
        return tf.layers.max_pooling2d(conv3, pool_size=(2,2), strides=(2,2), padding='same',name='encoded') # Now 2x2x8

def imgConstructor(features,varScope,batch_normalised=True, activation=tf.nn.relu):
    with tf.variable_scope(varScope):
        norm1=tf.layers.batch_normalization(
            features,
            training=batch_normalised,
            axis=-1
        )
        upsample1 = tf.image.resize_images(norm1, size=(7,7), method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
        conv4 = tf.layers.separable_conv2d(inputs=upsample1,
                filters=16,
                kernel_size=(3,3),
                strides=(1, 1),
                padding='same',
                data_format='channels_last',
                dilation_rate=(1, 1),
                depth_multiplier=2,
                activation=activation)
        norm2=tf.layers.batch_normalization(
            conv4,
            training=batch_normalised,
            axis=-1
        )
        upsample2 = tf.image.resize_images(norm2, size=(14,14), method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
        conv5 = tf.layers.separable_conv2d(inputs=upsample2,
                filters=32,
                kernel_size=(3,3),
                strides=(1, 1),
                padding='same',
                data_format='channels_last',
                dilation_rate=(1, 1),
                depth_multiplier=2,
                activation=activation)
        norm3=tf.layers.batch_normalization(
            conv5,
            training=batch_normalised,
            axis=-1
        )
        upsample3 = tf.image.resize_images(norm3, size=(28,28), method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
        conv6 = tf.layers.separable_conv2d(inputs=upsample3, 
                filters=64,
                kernel_size=(3,3),
                strides=(1, 1),
                padding='same',
                data_format='channels_last',
                dilation_rate=(1, 1),
                depth_multiplier=2,
                activation=activation)
        norm4=tf.layers.batch_normalization(
            conv6,
            training=batch_normalised,
            axis=-1
        )
        logits = tf.layers.separable_conv2d(inputs=norm4, filters=1, kernel_size=(3,3), padding='same', activation=None)
        # Pass logits through sigmoid to get reconstructed image
        return logits,tf.nn.sigmoid(logits)
def encoder(inputs,featConstructor,imgConstructor,varScope):
    with tf.variable_scope(varScope):
        features=featConstructor(inputs,'features')
        images_logits,images=imgConstructor(features,'images')
        loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=inputs, logits=images_logits))
        with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
            adam = tf.train.AdamOptimizer(learning_rate)
            # Return the encoded code for embedding visualisation, the optimiser for executing training session.
            return features,adam.minimize(loss,var_list= tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,scope=varScope))

In [92]:
feats,opt_feats=encoder(inputs_,featConstructor,imgConstructor,'CNN_encoder')

# VAE

### Standard VAE

In [93]:
# A multi-layer coder 
def coder(inputs,layer_units,varScope,batch_normalised=True,activation=tf.nn.relu):
    with tf.variable_scope(varScope):
        layers=[inputs]
        for i in range(0,len(layer_units)):
            print(layers[-1])
            layers=layers+[tf.layers.batch_normalization(layers[-1],training=batch_normalised,axis=-1)]
            layers=layers+[tf.layers.dense(layers[-1],units=layer_units[i],activation=tf.nn.relu)]
    return tf.layers.dense(layers[-1],units=layer_units[-1],activation=activation)
# A sampler to sample variations from the latent space
def sampler(mean,sd):
    epsilon_=tf.random_normal(shape=[i.value for i in mean.get_shape()])
    #return mean + tf.exp(0.5 * logVar) * epsilon_
    sample=mean + sd*epsilon_
    return sample
# A variational ecoder using coder input
def vae(inputs,coder,encoder_units,decoder_units,varScope):
    original_shape=inputs.get_shape()
    inputs_flattened=tf.reshape(inputs,shape=(original_shape[0].value,-1))
    with tf.variable_scope(varScope):
        encoded_mean=coder(inputs_flattened,encoder_units,'encoded_mean')
        encoded_sd=coder(inputs_flattened,encoder_units,'encoded_sd')
        encoded=sampler(encoded_mean,encoded_sd)
        logits=coder(encoded,decoder_units,'decoded',activation=None)
        decoded=tf.nn.sigmoid(logits)
        loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=inputs_flattened, logits=logits))
        kl_loss = -0.5*tf.reduce_mean(1 + encoded_sd - tf.square(encoded_mean) - tf.exp(encoded_sd))
    with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
        adam = tf.train.AdamOptimizer(learning_rate)
    
    # Return the encoded code for embedding visualisation, the optimiser for executing training session.
    return encoded,adam.minimize(loss+0.1*kl_loss,var_list= tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,scope=varScope))
    
    

In [94]:
sess = tf.Session()
saver = tf.train.Saver()
saver.restore(sess, "../CNN/CNN_encoder.ckpt")

INFO:tensorflow:Restoring parameters from ../CNN/CNN_encoder.ckpt


In [95]:
#Training model

vae_inputs=tf.reshape(feats,shape=(batch_size,-1))
encoded,opt=vae(vae_inputs,coder,[16,8],[16,32],'vae')

merged_summary=tf.summary.merge_all()
print([merged_summary,opt,encoded])
modelname="CNN_feats_VAE"
writer = tf.summary.FileWriter(logdir, sess.graph)
for e in range(epochs):
    for ii in range(mnist.train.num_examples//batch_size):
        
        batch_xs,batch_ys = mnist.train.next_batch(batch_size)
        imgs = batch_xs.reshape((-1, 28, 28, 1)) 
        
        summary, _, code = sess.run([merged_summary,opt,encoded], feed_dict={inputs_: imgs})
        
        print("Running epochs {}, batch {}".format(e+1,ii+1))
        writer.add_summary(summary, ii+batch_size*e)

    # Add ops to save and restore all the variables.
    saver.save(sess, "./"+modelname+".ckpt")

Tensor("Reshape_1:0", shape=(800, 32), dtype=float32)
Tensor("vae/encoded_mean/dense/Relu:0", shape=(800, 16), dtype=float32)
Tensor("Reshape_1:0", shape=(800, 32), dtype=float32)
Tensor("vae/encoded_sd/dense/Relu:0", shape=(800, 16), dtype=float32)
Tensor("vae/add:0", shape=(800, 8), dtype=float32)
Tensor("vae/decoded/dense/Relu:0", shape=(800, 16), dtype=float32)
[None, <tf.Operation 'Adam' type=NoOp>, <tf.Tensor 'vae/add:0' shape=(800, 8) dtype=float32>]


TypeError: Fetch argument None has invalid type <class 'NoneType'>

## Graph


In [None]:
#To show the tensorBoard
from IPython.display import clear_output, Image, display, HTML

def strip_consts(graph_def, max_const_size=32):
    """Strip large constant values from graph_def."""
    strip_def = tf.GraphDef()
    for n0 in graph_def.node:
        n = strip_def.node.add() 
        n.MergeFrom(n0)
        if n.op == 'Const':
            tensor = n.attr['value'].tensor
            size = len(tensor.tensor_content)
            if size > max_const_size:
                tensor.tensor_content = "<stripped %d bytes>"%size
    return strip_def

def show_graph(graph_def, max_const_size=32):
    """Visualize TensorFlow graph."""
    if hasattr(graph_def, 'as_graph_def'):
        graph_def = graph_def.as_graph_def()
    strip_def = strip_consts(graph_def, max_const_size=max_const_size)
    code = """
        <script>
          function load() {{
            document.getElementById("{id}").pbtxt = {data};
          }}
        </script>
        <link rel="import" href="https://tensorboard.appspot.com/tf-graph-basic.build.html" onload=load()>
        <div style="height:600px">
          <tf-graph-basic id="{id}"></tf-graph-basic>
        </div>
    """.format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))

    iframe = """
        <iframe seamless style="width:1200px;height:620px;border:0" srcdoc="{}"></iframe>
    """.format(code.replace('"', '&quot;'))
    display(HTML(iframe))
show_graph(tf.get_default_graph().as_graph_def())

# Embeddings(code) visualisation

In [184]:
def create_sprite_image(images):
    """Returns a sprite image consisting of images passed as argument. Images should be count x width x height"""
    if isinstance(images, list):
        images = np.array(images)
    img_h = images.shape[1]
    img_w = images.shape[2]
    n_plots = int(np.ceil(np.sqrt(images.shape[0])))
    
    
    spriteimage = np.ones((img_h * n_plots ,img_w * n_plots ))
    
    for i in range(n_plots):
        for j in range(n_plots):
            this_filter = i * n_plots + j
            if this_filter < images.shape[0]:
                this_img = images[this_filter]
                spriteimage[i * img_h:(i + 1) * img_h,
                  j * img_w:(j + 1) * img_w] = this_img
    
    return spriteimage

def vector_to_matrix_mnist(mnist_digits):
    """Reshapes normal mnist digit (batch,28*28) to matrix (batch,28,28)"""
    return np.reshape(mnist_digits,(-1,28,28))

def invert_grayscale(mnist_digits):
    """ Makes black white, and white black """
    return 1-mnist_digits

In [185]:
to_visualise = batch_xs
to_visualise = vector_to_matrix_mnist(to_visualise)
to_visualise = invert_grayscale(to_visualise)

sprite_image = create_sprite_image(to_visualise)
plt.imsave(os.path.join(logdir,'mnistdigits.png'),sprite_image,cmap='gray')

In [186]:
with open(os.path.join(logdir,'metadata.tsv'),'w') as f:
    f.write("Index\tLabel\n")
    for index,label in enumerate(batch_ys):
        f.write("%d\t%d\n" % (index,label))

In [187]:
# Embeddings logging for Tensorboard
from tensorflow.contrib.tensorboard.plugins import projector
config = projector.ProjectorConfig()
embedding_var=tf.Variable(code,dtype=tf.float32,name='embeddings')
embedding = config.embeddings.add()
embedding.tensor_name = embedding_var.name
sess.run(embedding_var.initializer)
summary_writer = tf.summary.FileWriter(logdir)
# Specify where you find the metadata
embedding.metadata_path = os.path.join(logdir,'metadata.tsv') #'metadata.tsv'

# Specify where you find the sprite (we will create this later)
embedding.sprite.image_path = os.path.join(logdir,'mnistdigits.png') #'mnistdigits.png'
embedding.sprite.single_image_dim.extend([28,28])

# Say that you want to visualise the embeddings
projector.visualize_embeddings(summary_writer, config)
saver = tf.train.Saver([embedding_var])
saver.save(sess, os.path.join(logdir, 'embeddings.ckpt'))

'C:/Users/CK/Documents/tf_logs20181011-131207/embeddings.ckpt'