In [2]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

In [3]:
from tensorflow.examples.tutorials.mnist import input_data

In [4]:
mnist = input_data.read_data_sets("/mnist/data_rnn_model/",one_hot=True)

Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting /mnist/data_rnn_model/train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting /mnist/data_rnn_model/train-labels-idx1-ubyte.gz
Instructions for updating:
Please use tf.one_hot on tensors.
Extracting /mnist/data_rnn_model/t10k-images-idx3-ubyte.gz
Extracting /mnist/data_rnn_model/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.


In [5]:
#Define some parameters
element_size = 28
time_steps = 28
num_of_classes = 10
batch_size = 128
#Hidden layer size is set arbitrarily to 128 and controls the size of the hidden RNN state vector
hidden_layer_size=128

In [6]:
#Where to save the TensorBoard Model Summaries

LOG_DIR = "logs/RNN_with_summaries"

#Create the placeholders for inputs, labels
#Each image is 28 by 28 pixels
#Element size is the dimension of each vector in our sequence
#time steps is the number of such elements in a a sequence

_inputs = tf.placeholder(tf.float32,shape=[None,time_steps,element_size],name='inputs')
y = tf.placeholder(tf.float32,shape=[None,num_of_classes],name='labels')

In [7]:
batch_x,batch_y = mnist.train.next_batch(batch_size=batch_size)
batch_x = batch_x.reshape((batch_size,time_steps,element_size))


In [8]:
#This helper fxn taken from the official TensorFlow documentation
#adds some ops that take care of logging summaries
def variable_summaries(var):
    with tf.name_scope('summaries'):
        mean = tf.reduce_mean(var)
        tf.summary.scalar('mean',mean)
        with tf.name_scope('stddev'):
            stddev = tf.sqrt(tf.reduce_mean(tf.square(var-mean)))
            tf.summary.scalar('stddev',stddev)
            tf.summary.scalar('max',tf.reduce_max(var))
            tf.summary.scalar('min',tf.reduce_min(var))
            tf.summary.histogram('histogram',var)
with tf.name_scope('rnn_weights'):
    with tf.name_scope("W_x"):
        Wx=tf.Variable(tf.zeros([element_size,hidden_layer_size]))
        variable_summaries(Wx)
    with tf.name_scope('W_h'):
        Wh = tf.Variable(tf.zeros([hidden_layer_size,hidden_layer_size]))
        variable_summaries(Wh)
    with tf.name_scope("Bias"):
        b_rnn = tf.Variable(tf.zeros([hidden_layer_size]))
        variable_summaries(b_rnn)

In [9]:
#Applying the RNN step with tf.scan()
#Implements the Vanilla RNN step:
#h1 = tanh(Wx.xt + Wh.ht-1 + b)
#tanh is the hyperbolic tangent fxn that has its range in [-1,1]
#xt and ht are the input state vectors 

def rnn_step(previous_hidden_state,x):
    current_hidden_state = tf.tanh(tf.matmul(previous_hidden_state,Wh) + tf.matmul(x,Wx) + b_rnn)
    return current_hidden_state

#Next we apply this function across all 28 time steps
processed_input = tf.transpose(_inputs,perm=[1,0,2])
initial_hidden = tf.zeros([batch_size,hidden_layer_size])
all_hidden_states = tf.scan(rnn_step,processed_input,initializer=initial_hidden,name='states')
#Now that the first axis in our input tensor represents the time axis,
#We can iterate over all time steps by using the built-in, tf.scan() fxn
#tf.scan() repeatedly applies a callable fxn to a sequence of elements in order
#It introduces loops to Tensorflow

In [10]:
#Weights for output layers
with tf.name_scope('linear_layr_weights') as scope:
    with tf.name_scope('W_linear'):
        Wl = tf.Variable(tf.truncated_normal([hidden_layer_size,num_of_classes],mean=0,stddev=0.01))
        variable_summaries(Wl)
    with tf.name_scope('Bias_linear'):
        bl = tf.Variable(tf.truncated_normal([num_of_classes],mean=0,stddev=0.01))
        variable_summaries(bl)


In [11]:
# Apply linear layer to state vector
def get_linear_layer(hidden_state):
    return tf.matmul(hidden_state,Wl) + bl
with tf.name_scope('linear_layer_weights') as scope:
    #Iterate across time, apply linear layer to all RNN outputs
    all_outputs = tf.map_fn(get_linear_layer,all_hidden_states)
    output = all_outputs[-1]
    tf.summary.histogram('outputs',output)
    #tf.map_fn is similar to python's map function since it 
    #applies a function to a sequence or iterables in an element-wise manner(each element in the sequence)


In [12]:
with tf.name_scope('cross_entropy'):
    cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=output,labels=y)) 
    tf.summary.scalar('cross_entropy',cross_entropy)
with tf.name_scope('train'):
    train_step = tf.train.RMSPropOptimizer(0.001,0.9).minimize(cross_entropy)
with tf.name_scope('accuracy'):
    correct_prediction = tf.equal(tf.argmax(y,1),tf.argmax(output,1))
    accuracy = (tf.reduce_mean(tf.cast(correct_prediction,tf.float32)))*100
    tf.summary.scalar('accuracy',accuracy)

In [17]:
# Merge all the summaries
merged = tf.summary.merge_all()

#Get a small test set/sample
test_data = mnist.test.images[:batch_size].reshape((-1,time_steps,element_size))
test_label = mnist.test.labels[:batch_size]

with tf.Session() as sess:
    train_writer = tf.summary.FileWriter(LOG_DIR+'/train',graph=tf.get_default_graph())
    test_writer = tf.summary.FileWriter(LOG_DIR+'/test',graph = tf.get_default_graph())
    sess.run(tf.global_variables_initializer())
    for i in range(10000):
        batch_x,batch_y=mnist.train.next_batch(batch_size)
        #Reshape data to get 28 sequences of 28 pixels
        batch_x = batch_x.reshape((batch_size,time_steps,element_size))
        summary,_=sess.run([merged,train_step],feed_dict={_inputs:batch_x,y:batch_y})
        train_writer.add_summary(summary,i)
        
        if i % 100 ==0:
            acc,loss = sess.run([accuracy,cross_entropy],feed_dict={_inputs:batch_x,y:batch_y})
            print("Iter "+str(i) + ",Minibatch Loss = {:.6f} + Training Accuracy = {:.5f}".format(loss,acc))
        if i % 10:
            #Calculate the accuracy for 128 MNIST test images and add to summaries
            summary,acc = sess.run([merged,accuracy],feed_dict={_inputs:test_data,y:test_label})
            test_writer.add_summary(summary,i)
    test_acc = sess.run([accuracy],feed_dict={_inputs:test_data,y:test_label})
    print("Test Accuracy: ",test_acc)

Iter 0,Minibatch Loss = 2.302809 + Training Accuracy = 10.93750
Iter 100,Minibatch Loss = 2.301754 + Training Accuracy = 7.81250
Iter 200,Minibatch Loss = 2.262827 + Training Accuracy = 15.62500
Iter 300,Minibatch Loss = 1.808889 + Training Accuracy = 30.46875
Iter 400,Minibatch Loss = 1.504862 + Training Accuracy = 34.37500
Iter 500,Minibatch Loss = 1.629171 + Training Accuracy = 33.59375
Iter 600,Minibatch Loss = 1.425022 + Training Accuracy = 51.56250
Iter 700,Minibatch Loss = 1.236673 + Training Accuracy = 53.12500
Iter 800,Minibatch Loss = 1.089949 + Training Accuracy = 60.15625
Iter 900,Minibatch Loss = 1.157528 + Training Accuracy = 57.81250
Iter 1000,Minibatch Loss = 1.211760 + Training Accuracy = 60.15625
Iter 1100,Minibatch Loss = 1.131388 + Training Accuracy = 60.93750
Iter 1200,Minibatch Loss = 0.835991 + Training Accuracy = 69.53125
Iter 1300,Minibatch Loss = 0.814610 + Training Accuracy = 72.65625
Iter 1400,Minibatch Loss = 1.052594 + Training Accuracy = 62.50000
Iter 150