In [1]:
import tensorflow as tf
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
import warnings
warnings.filterwarnings('ignore')

In [2]:
# load data
mnist = input_data.read_data_sets('./data/MNIST_data', one_hot=True)

Extracting ./MNIST_data/train-images-idx3-ubyte.gz
Extracting ./MNIST_data/train-labels-idx1-ubyte.gz
Extracting ./MNIST_data/t10k-images-idx3-ubyte.gz
Extracting ./MNIST_data/t10k-labels-idx1-ubyte.gz


In [3]:
# setting hyperparamters for training
lr = 0.001
training_iters = 100000
batch_size = 128

In [4]:
# in mnist, each image has shape 28*28, so in RNN, each time step, sequence length is 28, time steps are 28
# define neural networks parameters
n_inputs = 28
n_steps = 28
n_hidden_units = 128
n_classes = 10

In [5]:
# define input data and weight parameters
x = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.float32, [None, n_classes])
weights = {
    'in': tf.Variable(tf.random_normal([n_inputs, n_hidden_units])),  # shape (28, 128)
    'out': tf.Variable(tf.random_normal([n_hidden_units, n_classes]))  # shape (128, 10)
}
biases = {
    'in': tf.Variable(tf.constant(0.1, shape=[n_hidden_units, ])),  # shape (128, )
    'out': tf.Variable(tf.constant(0.1, shape=[n_classes, ]))  # shape (10, )
}

In [6]:
# define a model
def RNN(X, weights, biases):
    # convert input data to (128 batch * 28 steps, 28 inputs)
    X = tf.reshape(X, [-1, n_inputs])
    # forward to hidden layer
    # X_in = (128 batch * 28 steps, 128 hidden)
    X_in = tf.matmul(X, weights['in']) + biases['in']
    # X_in ==> (128 batch, 28 steps, 128 hidden)
    X_in = tf.reshape(X_in, [-1, n_steps, n_hidden_units])
    # Using Basic LSTM Unit
    lstm_cell = tf.contrib.rnn.BasicLSTMCell(n_hidden_units, forget_bias=1.0, state_is_tuple=True)
    # initialize with zero, lstm is consist of two parts: (c_state, h_state)
    init_state = lstm_cell.zero_state(batch_size, dtype=tf.float32)
    # dynamic_rnn accepts the tensor (batch, steps, inputs) or (steps, batch, inputs) as X_in
    outputs, final_state = tf.nn.dynamic_rnn(lstm_cell, X_in, initial_state=init_state, time_major=False)
    
    results = tf.matmul(final_state[1], weights['out']) + biases['out']
    return results

In [7]:
# define loss and optimizer
pred = RNN(x, weights, biases)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
train_op = tf.train.AdamOptimizer(lr).minimize(cost)

In [8]:
# model predict results and compute accuracy
correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [9]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    step = 0
    while step * batch_size < training_iters:
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
        batch_xs = batch_xs.reshape([batch_size, n_steps, n_inputs])
        sess.run([train_op], feed_dict={x: batch_xs, y: batch_ys})
        if step % 20 == 0:
            print(sess.run(accuracy, feed_dict={x: batch_xs, y: batch_ys}))
        step += 1

0.171875
0.75
0.757812
0.789062
0.875
0.804688
0.859375
0.890625
0.914062
0.90625
0.914062
0.898438
0.882812
0.9375
0.914062
0.921875
0.929688
0.953125
0.929688
0.945312
0.921875
0.945312
0.976562
0.976562
0.914062
0.96875
0.96875
0.960938
0.976562
0.953125
0.976562
0.96875
0.953125
0.921875
0.96875
0.976562
0.96875
0.960938
0.976562
0.984375
