## Basic RNNs in TensorFlow --Chp 14 from hands-on-ML

### A Very Basic RNN

We implement the following RNN without using any TnesorFlow built-in fucntions

![A Basic RNN](5cell_RNN.png)



In [1]:
import tensorflow as tf
import numpy as np

In [75]:
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

In [76]:
reset_graph()
n_inputs = 3
n_neurons = 5

X0 = tf.placeholder(tf.float32, [None, n_inputs])
X1 = tf.placeholder(tf.float32, [None, n_inputs])

Wx = tf.Variable(tf.random_normal(shape=[n_inputs, n_neurons],dtype=tf.float32))
Wy = tf.Variable(tf.random_normal(shape=[n_neurons,n_neurons],dtype=tf.float32))
b = tf.Variable(tf.zeros([1, n_neurons], dtype=tf.float32))

Y0 = tf.tanh(tf.matmul(X0, Wx) + b)
Y1 = tf.tanh(tf.matmul(Y0, Wy) + tf.matmul(X1, Wx) + b)

init = tf.global_variables_initializer()

In [77]:
# Mini-batch:        instance 0,instance 1,instance 2,instance 3
X0_batch = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 0, 1]]) # t = 0
X1_batch = np.array([[9, 8, 7], [0, 0, 0], [6, 5, 4], [3, 2, 1]]) # t = 1

with tf.Session() as sess:
    init.run()
    Y0_val, Y1_val = sess.run([Y0, Y1], feed_dict={X0: X0_batch, X1: X1_batch})

In [78]:
print("vals for t = 0\n ")
print(Y0_val)  # output at t = 0
print("\n Vals for t=1\n")
print(Y1_val)  # output at t = 1

vals for t = 0
 
[[-0.0664006   0.9625767   0.68105793  0.7091854  -0.898216  ]
 [ 0.9977755  -0.719789   -0.9965761   0.9673924  -0.9998972 ]
 [ 0.99999774 -0.99898803 -0.9999989   0.9967762  -0.9999999 ]
 [ 1.         -1.         -1.         -0.99818915  0.9995087 ]]

 Vals for t=1

[[ 1.         -1.         -1.          0.4020025  -0.9999998 ]
 [-0.12210421  0.6280527   0.9671843  -0.9937122  -0.25839362]
 [ 0.9999983  -0.9999994  -0.9999975  -0.8594331  -0.9999881 ]
 [ 0.99928284 -0.99999815 -0.9999058   0.9857963  -0.92205757]]


### Static Unrolling Through Time
The __static_rnn()__ function creates an unrolled RNN network by chaining cells. The following code creates the exact same model as the previous one:

In [79]:
reset_graph()
X0 = tf.placeholder(tf.float32, [None, n_inputs])
X1 = tf.placeholder(tf.float32, [None, n_inputs])

basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)
output_seqs, states = tf.contrib.rnn.static_rnn(basic_cell, [X0, X1],
                                                dtype=tf.float32)
Y0, Y1 = output_seqs


## Dynamic Unrolling Through Time

In [80]:
reset_graph()


n_steps = 3
#n_inputs =

In [81]:
X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])

basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)
outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)

## Handling Variable length Input

In [92]:
reset_graph()

n_inputs = 3
n_neurons = 5
n_steps = 2
seq_length = tf.placeholder(tf.int32, [None])

X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])

basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)
outputs, states = tf.nn.dynamic_rnn(basic_cell, X, sequence_length=seq_length, dtype=tf.float32)
init = tf.global_variables_initializer()

In [98]:
X_manual = np.array([
        # step 0     step 1
        [[0, 1, 2], [9, 8, 7]], # instance 0
        [[3, 4, 5], [0, 0, 0]], # instance 1 (padded with a zero vector)
        [[6, 7, 8], [6, 5, 4]], # instance 2
        [[9, 0, 1], [3, 2, 1]], # instance 3
    ])

seq_length_batch = np.array([3, 1, 2, 2])

with tf.Session() as sess:
    init.run()
    outputs_val, states_val = sess.run(
        [outputs, states], feed_dict={X: X_manual, seq_length: seq_length_batch})
    
print(states_val)
print("\n ############## \n This is the outputs_vals \n##############\n ")
print(outputs_val)

[[ 0.9999941   0.99594396  0.4582593   0.9991697   0.8991136 ]
 [ 0.9978262   0.44117272 -0.5916074   0.9749958  -0.35678416]
 [ 0.9995612   0.9808731   0.8890911   0.9768431   0.5203418 ]
 [ 0.9791339   0.719474    0.93568635  0.63649696 -0.17854698]]

 ############## 
 This is the outputs_vals 
##############
 
[[[ 0.65519077 -0.38949275 -0.6105231   0.5927638  -0.48083815]
  [ 0.9999941   0.99594396  0.4582593   0.9991697   0.8991136 ]]

 [[ 0.9978262   0.44117272 -0.5916074   0.9749958  -0.35678416]
  [ 0.          0.          0.          0.          0.        ]]

 [[ 0.9999887   0.87606186 -0.5720173   0.99874693 -0.21872665]
  [ 0.9995612   0.9808731   0.8890911   0.9768431   0.5203418 ]]

 [[ 0.9990465   0.92710733  0.23267001  0.9804318   0.99994713]
  [ 0.9791339   0.719474    0.93568635  0.63649696 -0.17854698]]]


## Building An Image Classifier Using RNN and MNIST Data

We start with loading the data

### Constructing the graph using RNN

In [18]:
reset_graph()

n_steps = 28 
n_inputs = 28
n_neurons = 150
n_outputs = 10

learning_rate = 0.001

X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.int32, [None])

# Next line can be 
#lstm_cell = tf.contrib.rnn.LSTMCell(num_units=n_neurons, use_peepholes=True) 
#or could be 
#tf.contrib.rnn.GRUCell(num_units=n_neurons) # 
basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons) 
outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)

logits = tf.layers.dense(states, n_outputs)
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,
                                                          logits=logits)
loss = tf.reduce_mean(xentropy)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss)
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

init = tf.global_variables_initializer()

### Loading the MNIST Data

In [19]:
from tensorflow.examples.tutorials.mnist import input_data

mnist = input_data.read_data_sets("/tmp/data/")
X_test = mnist.test.images.reshape((-1, n_steps, n_inputs)) # -1 means python or np will figure out the new dimensions
y_test = mnist.test.labels

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [30]:
X_test.shape, mnist.test.images.shape, mnist.train.images.shape

((10000, 28, 28), (10000, 784), (55000, 784))

In [34]:
28*28

784

In [20]:
n_epochs = 10
batch_size = 150

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            X_batch = X_batch.reshape((-1, n_steps, n_inputs))
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})
        print(epoch, "Train accuracy:", acc_train, "Test accuracy:", acc_test)

0 Train accuracy: 0.93333334 Test accuracy: 0.918
1 Train accuracy: 0.9533333 Test accuracy: 0.9449
2 Train accuracy: 0.94666666 Test accuracy: 0.9506
3 Train accuracy: 0.96 Test accuracy: 0.9502
4 Train accuracy: 0.96 Test accuracy: 0.9619
5 Train accuracy: 0.9533333 Test accuracy: 0.966
6 Train accuracy: 0.97333336 Test accuracy: 0.9659
7 Train accuracy: 0.98 Test accuracy: 0.9712
8 Train accuracy: 0.96 Test accuracy: 0.9671
9 Train accuracy: 0.97333336 Test accuracy: 0.9733


### Using LSTM

In [66]:
reset_graph()

n_steps = 28
n_inputs = 28
n_neurons = 150
n_outputs = 10

learning_rate = 0.001

X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.int32, [None])

lstm_cell = tf.contrib.rnn.LSTMCell(num_units=n_neurons, use_peepholes=True) 
outputs, states = tf.nn.dynamic_rnn(lstm_cell, X, dtype=tf.float32)

logits = tf.layers.dense(states[0], n_outputs)
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,
                                                          logits=logits)
loss = tf.reduce_mean(xentropy)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss)
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

init = tf.global_variables_initializer()

In [32]:
n_epochs = 3
batch_size = 150

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            X_batch = X_batch.reshape((-1, n_steps, n_inputs))
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})
        print(epoch, "Train accuracy:", acc_train, "Test accuracy:", acc_test)

0 Train accuracy: 0.91333336 Test accuracy: 0.948
1 Train accuracy: 0.98 Test accuracy: 0.9555
2 Train accuracy: 0.98 Test accuracy: 0.9687
