In [1]:
import tensorflow as tf
import numpy as np

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  from ._conv import register_converters as _register_converters
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
# to make this notebook's output stable across runs
def reset_graph(seed=1):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

## tf implementation of RNN (single cell)

### Computation Graph Construct Phase

The sample code runs rnn that goes 2 time steps, taking input vector of size 3

In [10]:
reset_graph()
n_inputs = 3
n_neurons = 5

In [11]:
X0 = tf.placeholder(tf.float32, [None, n_inputs])
X1 = tf.placeholder(tf.float32, [None, n_inputs])

In [12]:
# initialize weight for input X at current time t
Wx = tf.Variable(tf.random_normal(shape=[n_input, n_neurons], dtype=tf.float32))
# initialize weight for input X at previous time t-1. The input shape of this should be
# output length of the neuron at t-1, which is n_neurons
Wy = tf.Variable(tf.random_normal(shape=[n_neurons, n_neurons], dtype=tf.float32))
b = tf.Variable(tf.zeros([1, n_neurons], dtype=tf.float32))

In [13]:
Y0 = tf.tanh(tf.matmul(X0, Wx) + b)
Y1 = tf.tanh(tf.matmul(Y0, Wy) + tf.matmul(X1, Wx) + b)
# Y2 = tf.tanh(tf.matmul(Y1, Wy) + tf.matmul(X2, Wx) + b)...

In [14]:
init = tf.global_variables_initializer()

### Graph Execution Phase

First we create some sythetic data to showcase the training process

In [17]:
import numpy as np

# Minibatch        instance_0, instance_1, instance_2, instance_3
X0_batch = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 0, 1]]) # t=1
X1_batch = np.array([[9, 8, 7], [0, 0, 0], [6, 5, 4], [3, 2, 1]]) # t=2

In [18]:
with tf.Session() as sess:
    init.run()
    y0_val, y1_val = sess.run([Y0, Y1], feed_dict={X0: X0_batch, X1: X1_batch})

In [19]:
print(y0_val)

[[-0.91210926 -0.97909343 -0.9963769  -0.80419695  0.81554604]
 [-0.9984175  -0.99501723 -0.9999901  -0.999553    0.878944  ]
 [-0.9999729  -0.9988196  -1.         -0.99999917  0.92149407]
 [ 0.9867836   1.          1.         -0.5567072  -0.9989797 ]]


In [20]:
print(y1_val)

[[-0.9999856   0.8339144  -0.9999801  -0.99999195 -0.97266006]
 [-0.92764604  0.8301138   0.03124466  0.95625156 -0.9378031 ]
 [-0.999539    0.97095305 -0.99141777 -0.9961223  -0.9843221 ]
 [-0.5459391  -0.39203942  0.943575   -0.9999922   0.93447596]]


### A few methods to use "static" ways to unwrap a RNN layer, but instead a `dynamic_rnn` is going to be way easier

In [3]:
n_steps = 2
n_inputs = 3
n_neurons = 5

In [8]:
reset_graph()

# define an input with a known n_steps and n_inputs
X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])

# seq_length should be each instance's length, when input
# instance is of various lengths
seq_length = tf.placeholder(tf.int32, [None])

In [9]:
# basic cell is like a factory that will create the rnn neurons
basic_cell = tf.keras.layers.SimpleRNNCell(units=n_neurons)
# both outputs (Y1) and states (Y0) needs to be stored
outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32, sequence_length=seq_length)
init = tf.global_variables_initializer()

In [10]:
# train with a various length list of sequence (padded to be just 2 steps long)
X_batch = np.array([
    # step 0 step 1
    [[ 0, 1, 2], [9, 8, 7]], # instance 0
    [[ 3, 4, 5], [0, 0, 0]], # instance 1 (padded with a zero vector)
    [[ 6, 7, 8], [6, 5, 4]], # instance 2
    [[ 9, 0, 1], [3, 2, 1]], # instance 3
])
seq_length_batch = np.array([ 2, 1, 2, 2])

In [16]:
X_batch.shape

(4, 2, 3)

In [11]:
with tf.Session() as sess:
    init.run()
    outputs, states = sess.run(
        [outputs, states], feed_dict={X:X_batch, seq_length:seq_length_batch}
    )

In [12]:
print(outputs)

[[[ 0.95115983 -0.9610069   0.94697636  0.9527325   0.06647953]
  [ 1.         -0.9999789   0.99999994  1.         -0.61731684]]

 [[ 0.99999803 -0.99981755  0.99987227  0.99999624  0.11031032]
  [ 0.          0.          0.          0.          0.        ]]

 [[ 1.         -0.99999917  0.9999998   1.          0.15371612]
  [ 0.9999994  -0.99534667  0.9999252   0.99999213 -0.6344111 ]]

 [[ 0.9999957   0.9977971   0.3841205   0.99995565 -0.84551716]
  [ 0.9876907   0.79720205  0.78417754  0.99387246 -0.23067977]]]


Note that the first (`batch_size`) and second (`n_steps`) dimensions keeps the same for the outputs tensor after the RNN layer. The third dimension is `n_neurons`

In [14]:
outputs.shape

(4, 2, 5)

In [13]:
print(states)

[[ 1.         -0.9999789   0.99999994  1.         -0.61731684]
 [ 0.99999803 -0.99981755  0.99987227  0.99999624  0.11031032]
 [ 0.9999994  -0.99534667  0.9999252   0.99999213 -0.6344111 ]
 [ 0.9876907   0.79720205  0.78417754  0.99387246 -0.23067977]]


Note that the `states` is the neuron outputs of the `last step` in the sequence; therefore, is has the size of `batch_size` by `n_neurons`

In [15]:
states.shape

(4, 5)

## Training a RNN neural network with MNIST

**Note**:
The 28 * 28 input data of MNIST can be processed as 28 steps of 28-element vectors

In [20]:
n_steps = 28
n_inputs = 28
n_neurons = 150
n_outputs = 10

In [21]:
learning_rate = 0.001

In [25]:
reset_graph()

X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.int32, [None])

In [26]:
# create the RNN layers
with tf.name_scope('RNN'):
    basic_cell = tf.keras.layers.SimpleRNNCell(units=n_neurons)
    # dynamic_run here takes care of remembering the hidden states for 
    outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)

logits = tf.layers.dense(states, n_outputs, name="FC")

with tf.name_scope('Loss'):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,
                                                              logits=logits)
    loss = tf.reduce_mean(xentropy)

with tf.name_scope('training'):
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
    training_op = optimizer.minimize(loss)
    
with tf.name_scope('validation'):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    
init = tf.global_variables_initializer()

In [27]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('/tmp/data/')
X_test = mnist.test.images.reshape((-1, n_steps, n_inputs))
y_test = mnist.test.labels

W0822 17:43:49.834811  1652 deprecation.py:323] From <ipython-input-27-296044d6dcb8>:2: read_data_sets (from tensorflow.contrib.learn.python.learn.datasets.mnist) is deprecated and will be removed in a future version.
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
W0822 17:43:49.835774  1652 deprecation.py:323] From C:\Users\oycy\AppData\Roaming\Python\Python35\site-packages\tensorflow\contrib\learn\python\learn\datasets\mnist.py:260: maybe_download (from tensorflow.contrib.learn.python.learn.datasets.base) is deprecated and will be removed in a future version.
Instructions for updating:
Please write your own downloading logic.
W0822 17:43:49.837770  1652 deprecation.py:323] From C:\Users\oycy\AppData\Roaming\Python\Python35\site-packages\tensorflow\contrib\learn\python\learn\datasets\mnist.py:262: extract_images (from tensorflow.contrib.learn.python.learn.datasets.mnist) is deprecated and will be removed in a future version

Extracting /tmp/data/train-images-idx3-ubyte.gz


W0822 17:43:50.173022  1652 deprecation.py:323] From C:\Users\oycy\AppData\Roaming\Python\Python35\site-packages\tensorflow\contrib\learn\python\learn\datasets\mnist.py:267: extract_labels (from tensorflow.contrib.learn.python.learn.datasets.mnist) is deprecated and will be removed in a future version.
Instructions for updating:
Please use tf.data to implement this functionality.
W0822 17:43:50.226744  1652 deprecation.py:323] From C:\Users\oycy\AppData\Roaming\Python\Python35\site-packages\tensorflow\contrib\learn\python\learn\datasets\mnist.py:290: DataSet.__init__ (from tensorflow.contrib.learn.python.learn.datasets.mnist) is deprecated and will be removed in a future version.
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.


Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [28]:
n_epochs = 100
batch_size = 150

with tf.Session() as sess:
    init.run()
    for epoch in range( n_epochs):
        for iteration in range( mnist.train.num_examples // batch_size + 1):
            X_batch, y_batch = mnist.train.next_batch( batch_size)
            X_batch = X_batch.reshape((-1, n_steps, n_inputs))
            sess.run(training_op, feed_dict ={X: X_batch, y: y_batch})
        acc_train = accuracy.eval(feed_dict ={X: X_batch, y: y_batch})
        acc_test = accuracy.eval( feed_dict ={X: X_test, y: y_test})
        print("Epoch: ", epoch, "Train accuracy:", acc_train, "Test accuracy:", acc_test)

Epoch:  0 Train accuracy: 0.94666666 Test accuracy: 0.9318
Epoch:  1 Train accuracy: 0.96 Test accuracy: 0.9522
Epoch:  2 Train accuracy: 0.98 Test accuracy: 0.9649
Epoch:  3 Train accuracy: 0.99333334 Test accuracy: 0.9659
Epoch:  4 Train accuracy: 0.99333334 Test accuracy: 0.9625
Epoch:  5 Train accuracy: 0.97333336 Test accuracy: 0.9654
Epoch:  6 Train accuracy: 0.99333334 Test accuracy: 0.9679
Epoch:  7 Train accuracy: 0.98 Test accuracy: 0.9754
Epoch:  8 Train accuracy: 0.98 Test accuracy: 0.9717
Epoch:  9 Train accuracy: 0.98 Test accuracy: 0.9737
Epoch:  10 Train accuracy: 1.0 Test accuracy: 0.9757
Epoch:  11 Train accuracy: 0.98 Test accuracy: 0.9779
Epoch:  12 Train accuracy: 0.99333334 Test accuracy: 0.9742
Epoch:  13 Train accuracy: 0.9866667 Test accuracy: 0.9704
Epoch:  14 Train accuracy: 0.99333334 Test accuracy: 0.9765
Epoch:  15 Train accuracy: 0.99333334 Test accuracy: 0.9747
Epoch:  16 Train accuracy: 0.99333334 Test accuracy: 0.9733
Epoch:  17 Train accuracy: 0.99333