In [1]:
# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals

# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

# To plot pretty figures
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12

# Where to save the figures
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "rnn"

def save_fig(fig_id, tight_layout=True):
    path = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID, fig_id + ".png")
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format='png', dpi=300)

In [2]:
import tensorflow as tf

In [4]:
reset_graph()

n_inputs = 3
n_neurons = 5

X0 = tf.placeholder(tf.float32, [None, n_inputs])
X1 = tf.placeholder(tf.float32, [None, n_inputs])

Wx = tf.Variable(tf.random_normal(shape = [n_inputs, n_neurons], dtype = tf.float32))
Wy = tf.Variable(tf.random_normal(shape = [n_neurons, n_neurons], dtype = tf.float32))
b = tf.Variable(tf.zeros([1, n_neurons], dtype = tf.float32))

Y0 = tf.tanh(tf.matmul(X0, Wx) + b)
Y1 = tf.tanh(tf.matmul(Y0, Wy) + tf.matmul(X1, Wx) + b)

init = tf.global_variables_initializer()

In [5]:
import numpy as np

X0_batch = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 0, 1]]) # t = 0
X1_batch = np.array([[9, 8, 7], [0, 0, 0], [6, 5, 4], [3, 2, 1]]) # t = 1

with tf.Session() as sess:
    init.run()
    Y0_val, Y1_val = sess.run([Y0, Y1], feed_dict = {X0: X0_batch, X1: X1_batch})

In [6]:
print(Y0_val)

[[-0.0664006   0.96257669  0.68105793  0.70918542 -0.89821601]
 [ 0.9977755  -0.71978897 -0.99657619  0.96739239 -0.99989718]
 [ 0.99999785 -0.99898815 -0.99999893  0.99677622 -0.99999982]
 [ 1.         -1.         -1.         -0.99818927  0.99950868]]


In [7]:
print(Y1_val)

[[ 1.         -1.         -1.          0.40200275 -0.99999982]
 [-0.12210425  0.62805289  0.96718431 -0.99371219 -0.25839356]
 [ 0.99999821 -0.9999994  -0.9999975  -0.85943305 -0.9999879 ]
 [ 0.99928284 -0.99999803 -0.99990582  0.98579615 -0.92205751]]


In [8]:
n_inputs = 3
n_neurons = 5

In [10]:
reset_graph()

X0 = tf.placeholder(tf.float32, [None, n_inputs])
X1 = tf.placeholder(tf.float32, [None, n_inputs])

basic_cell = tf.contrib.rnn.BasicRNNCell(num_units = n_neurons)
output_seqs, states = tf.contrib.rnn.static_rnn(basic_cell, [X0, X1], dtype = tf.float32)

Y0, Y1 = output_seqs

In [11]:
init = tf.global_variables_initializer()

In [12]:
X0_batch = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 0, 1]])
X1_batch = np.array([[9, 8, 7], [0, 0, 0], [6, 5, 4], [3, 2, 1]])

with tf.Session() as sess:
    init.run()
    Y0_val, Y1_val = sess.run([Y0, Y1], feed_dict = {X0: X0_batch, X1: X1_batch})

In [13]:
Y0_val

array([[ 0.30741334, -0.32884315, -0.65428466, -0.93850589,  0.52089024],
       [ 0.99122757, -0.95425403, -0.75180787, -0.99952078,  0.98202348],
       [ 0.99992681, -0.99783254, -0.82473528, -0.9999963 ,  0.99947792],
       [ 0.99677098, -0.68750614,  0.84199691,  0.93039113,  0.81206834]], dtype=float32)

In [14]:
Y1_val

array([[ 0.99998897, -0.99976057, -0.0667929 , -0.99998027,  0.99982214],
       [-0.65249431, -0.51520866, -0.37968946, -0.59225935, -0.08968391],
       [ 0.99862397, -0.99715203, -0.03308637, -0.99915659,  0.99329019],
       [ 0.99681675, -0.95981938,  0.3966063 , -0.83076054,  0.79671967]], dtype=float32)

In [3]:
n_steps = 2
n_inputs = 3
n_neurons = 5

In [4]:
reset_graph()

X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
X_seqs = tf.unstack(tf.transpose(X, perm = [1, 0, 2]))

basic_cell = tf.contrib.rnn.BasicRNNCell(num_units = n_neurons)
output_seqs, states = tf.contrib.rnn.static_rnn(basic_cell, X_seqs, dtype = tf.float32)

outputs = tf.transpose(tf.stack(output_seqs), perm = [1, 0, 2])

In [5]:
init = tf.global_variables_initializer()

In [6]:
X_batch = np.array([
        # t = 0      t = 1 
        [[0, 1, 2], [9, 8, 7]], # instance 1
        [[3, 4, 5], [0, 0, 0]], # instance 2
        [[6, 7, 8], [6, 5, 4]], # instance 3
        [[9, 0, 1], [3, 2, 1]], # instance 4
    ])

with tf.Session() as sess:
    init.run()
    outputs_val = outputs.eval(feed_dict = {X: X_batch})

In [7]:
X_batch.shape

(4, 2, 3)

In [9]:
print(outputs_val)

[[[-0.45652324 -0.68064123  0.40938237  0.63104504 -0.45732826]
  [-0.94287997 -0.99988699  0.94055808  0.99999839 -0.99999982]]

 [[-0.80015343 -0.99218267  0.78177965  0.99710321 -0.99646091]
  [-0.63711601  0.11300927  0.5798437   0.4310559  -0.6371699 ]]

 [[-0.93605185 -0.99983788  0.93088669  0.9999814  -0.99998307]
  [-0.9165386  -0.99456048  0.89605403  0.99987197 -0.99997509]]

 [[ 0.9927367  -0.99819332 -0.55543643  0.9989031  -0.9953323 ]
  [-0.02746338 -0.73191983  0.7827872   0.95256811 -0.97817701]]]


In [12]:
n_steps = 2
n_inputs = 3
n_neurons = 5

In [13]:
reset_graph()

X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])

basic_cell = tf.contrib.rnn.BasicRNNCell(num_units = n_neurons)
outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype = tf.float32)

In [14]:
init = tf.global_variables_initializer()

In [15]:
X_batch = np.array([
        [[0, 1, 2], [9, 8, 7]], # instance 1
        [[3, 4, 5], [0, 0, 0]], # instance 2
        [[6, 7, 8], [6, 5, 4]], # instance 3
        [[9, 0, 1], [3, 2, 1]], # instance 4
    ])

with tf.Session() as sess:
    init.run()
    outputs_val, states_val = sess.run([outputs, states], feed_dict = {X: X_batch})

In [16]:
print(outputs_val)

[[[-0.46430176 -0.04473137  0.45657092 -0.601556   -0.28795418]
  [-0.99999988 -0.97130966  0.99933434 -0.6387586  -0.90433723]]

 [[-0.9983902  -0.71462387  0.95593154 -0.86314547 -0.68196076]
  [ 0.52432048  0.32354674  0.54770964  0.32016146 -0.60165441]]

 [[-0.99999648 -0.94119084  0.99728239 -0.95754659 -0.87851131]
  [-0.99987793 -0.88867241  0.99540216 -0.22945848 -0.91968971]]

 [[-0.99997151 -0.75314504  0.73893744 -0.93929273  0.46864882]
  [-0.96881706 -0.10343011  0.8409428  -0.09726928 -0.28027278]]]


In [17]:
print(states_val)

[[-0.99999988 -0.97130966  0.99933434 -0.6387586  -0.90433723]
 [ 0.52432048  0.32354674  0.54770964  0.32016146 -0.60165441]
 [-0.99987793 -0.88867241  0.99540216 -0.22945848 -0.91968971]
 [-0.96881706 -0.10343011  0.8409428  -0.09726928 -0.28027278]]


In [4]:
n_steps = 2
n_inputs = 3
n_neurons = 5

reset_graph()

X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])

basic_cell = tf.contrib.rnn.BasicRNNCell(num_units = n_neurons)

In [5]:
seq_length = tf.placeholder(tf.int32, [None])

In [6]:
outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype = tf.float32, sequence_length = seq_length)

In [7]:
init = tf.global_variables_initializer()

In [8]:
X_batch = np.array([
        # step 0     step 1
        [[0, 1, 2], [9, 8, 7]], # instance 1
        [[3, 4, 5], [0, 0, 0]], # instance 2 (padded with zero vectors)
        [[6, 7, 8], [6, 5, 4]], # instance 3
        [[9, 0, 1], [3, 2, 1]], # instance 4
    ])
seq_length_batch = np.array([2, 1, 2, 2])

In [9]:
with tf.Session() as sess:
    init.run()
    
    outputs_val, states_val = sess.run(
    [outputs, states], feed_dict = {X: X_batch, seq_length: seq_length_batch})

In [10]:
print(outputs_val)

[[[ 0.65519077 -0.38949275 -0.6105231   0.59276378 -0.48083815]
  [ 0.99999392  0.99594402  0.45825899  0.99916971  0.89911348]]

 [[ 0.99782628  0.44117272 -0.59160739  0.97499579 -0.35678416]
  [ 0.          0.          0.          0.          0.        ]]

 [[ 0.99998862  0.87606186 -0.57201713  0.99874693 -0.21872665]
  [ 0.99956119  0.98087293  0.88909107  0.97684324  0.52034187]]

 [[ 0.99904662  0.92710745  0.23267001  0.9804318   0.99994713]
  [ 0.97913378  0.71947402  0.93568647  0.63649684 -0.1785471 ]]]


In [11]:
print(states_val)

[[ 0.99999392  0.99594402  0.45825899  0.99916971  0.89911348]
 [ 0.99782628  0.44117272 -0.59160739  0.97499579 -0.35678416]
 [ 0.99956119  0.98087293  0.88909107  0.97684324  0.52034187]
 [ 0.97913378  0.71947402  0.93568647  0.63649684 -0.1785471 ]]


In [21]:
reset_graph()

n_steps = 28
n_inputs = 28
n_neurons = 150
n_outputs = 10

learning_rate = 0.001

X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.int32, [None])

basic_cell = tf.contrib.rnn.BasicRNNCell(num_units = n_neurons)
outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype = tf.float32)

logits = tf.layers.dense(states, n_outputs)

xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels = y, logits = logits)
loss = tf.reduce_mean(xentropy)

optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate)

training_op = optimizer.minimize(loss)

correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

init = tf.global_variables_initializer()

In [22]:
from tensorflow.examples.tutorials.mnist import input_data

mnist = input_data.read_data_sets('/tmp/data')
X_test = mnist.test.images.reshape((-1, n_steps, n_inputs))
y_test = mnist.test.labels

Successfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.
Extracting /tmp/data/train-images-idx3-ubyte.gz
Successfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Successfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Successfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [23]:
n_epochs = 100
batch_size = 150

with tf.Session() as sess:
    init.run()
    
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            X_batch = X_batch.reshape((-1, n_steps, n_inputs))
            sess.run(training_op, feed_dict = {X: X_batch, y: y_batch})
            
        acc_train = accuracy.eval(feed_dict = {X: X_batch, y: y_batch})
        acc_test = accuracy.eval(feed_dict = {X: X_test, y: y_test})
        
        print(epoch, "Train accuracy:", acc_train, "Test accuracy:", acc_test)

0 Train accuracy: 0.933333 Test accuracy: 0.923
1 Train accuracy: 0.953333 Test accuracy: 0.9533
2 Train accuracy: 0.96 Test accuracy: 0.954
3 Train accuracy: 0.973333 Test accuracy: 0.9528
4 Train accuracy: 0.94 Test accuracy: 0.9637
5 Train accuracy: 0.96 Test accuracy: 0.9675
6 Train accuracy: 0.98 Test accuracy: 0.9731
7 Train accuracy: 0.98 Test accuracy: 0.9683
8 Train accuracy: 0.953333 Test accuracy: 0.9716
9 Train accuracy: 0.966667 Test accuracy: 0.9681
10 Train accuracy: 0.973333 Test accuracy: 0.971
11 Train accuracy: 0.973333 Test accuracy: 0.9679
12 Train accuracy: 0.973333 Test accuracy: 0.972
13 Train accuracy: 0.98 Test accuracy: 0.9773
14 Train accuracy: 0.986667 Test accuracy: 0.9767
15 Train accuracy: 1.0 Test accuracy: 0.9715
16 Train accuracy: 1.0 Test accuracy: 0.9748
17 Train accuracy: 0.993333 Test accuracy: 0.9757
18 Train accuracy: 0.973333 Test accuracy: 0.9718
19 Train accuracy: 0.986667 Test accuracy: 0.979
20 Train accuracy: 0.986667 Test accuracy: 0.9738