In [3]:
import tensorflow as tf
import numpy as np
from tensorflow.contrib import rnn
from tensorflow.examples.tutorials.mnist import input_data

INPUT_VEC_SIZE = 28
TIME_STEP_SIZE = 28
BATCH_SIZE = 128
TEST_SIZE = 256

In [4]:
# configuration
#                        O * W + b -> 10 labels for each image, O[? 28], W[28 10], B[10]
#                       ^ (O: output 28 vec from 28 vec input)
#                       |
#      +-+  +-+       +--+
#      |1|->|2|-> ... |28| time_step_size = 28
#      +-+  +-+       +--+
#       ^    ^    ...  ^
#       |    |         |
# img1:[28] [28]  ... [28]
# img2:[28] [28]  ... [28]
# img3:[28] [28]  ... [28]
# ...
# img128 or img256 (batch_size or test_size 256)
#      each input size = input_vec_size=lstm_size=28

# configuration variables

In [5]:
#example 1
a = tf.Variable(tf.random_normal((128, 28, 28), stddev=0.01))
print(a.shape)
a_t = tf.transpose(a, [1,0,2])
print(a_t.shape)
a_r = tf.reshape(a_t, [-1, 28])
print(a_r.shape)

#example 2

# 'value' is a tensor with shape [5, 30]
# Split 'value' into 3 tensors with sizes [4, 15, 11] along dimension 1
# split0, split1, split2 = tf.split(value, [4, 15, 11], 1)
# tf.shape(split0)  # [5, 4]
# tf.shape(split1)  # [5, 15]
# tf.shape(split2)  # [5, 11]

(128, 28, 28)
(28, 128, 28)
(3584, 28)


In [6]:
#example 3

#BasicLSTMCell

#lstm cell zero
#time step zero
#128 vectors of size 28x1

#################################
#                               #
#                               #
#                               #
#                               #
#                               #
#                               #
#################################
#  ^         ^       ...     ^
#  |         |               |
# img1:[1] img2:[1]  ... img:128[1]
#      [1]      [1]  ...        [1]
#      [1]      [1]  ...        [1]


#example 4
#https://www.tensorflow.org/api_docs/python/tf/nn/static_rnn
#tf.nn.static_rnn()
# The simplest form of RNN network generated is:

#   state = cell.zero_state(...)
#   outputs = []
#   for input_ in inputs:
#     output, state = cell(input_, state)
#     outputs.append(output)
#   return (outputs, state)

In [59]:
def get_weight(shape):
    return tf.Variable(tf.random_normal(shape, stddev=0.01))

def model(X, W, b, input_vec_size):
    
    lstm_size = input_vec_size # you can choose any lstm_size, 
    #but here lstm_size = input_vec_size
    
    #X, input shape: (batch_size, time_step_size, input_vec_size)
    XT = tf.transpose(X, [1,0,2])
    #XT shape: (time_step_size, batch_size, input_vec_size)
    XR = tf.reshape(XT, [-1, lstm_size]) #each row has input for each lstm cell
    #XR shape: (time_step_size * batch_size, input_vec_size)
    X_split = tf.split(XR, TIME_STEP_SIZE, 0) #split them to TIME_STEP_SIZE (28 arrays)
    #Each array shape: (batch_size, input_vec_size)
    # Total TIME_STEP_SIZE x (batch_size, input_vec_size)
    
    
    #Make lstm with lstm_size (each input vector size),
    # input: (None, 28 = input_vec_size)
    lstm = tf.nn.rnn_cell.LSTMCell(lstm_size, 
                                   forget_bias = 1.0, 
                                   state_is_tuple = True)
    
    #Get lstm cell output, time_step_size(28) arrays with lstm_size output:
    #(batch_size, lstm_size)
    outputs, _states = rnn.static_rnn(lstm, 
                                      X_split, 
                                      dtype = tf.float32)
    
    #Linear activation 
    #Get the last output
    return tf.matmul(outputs[-1], W) + b

In [55]:
mnist = input_data.read_data_sets("MNIST_data/", one_hot = True)
trX, trY, teX, teY = mnist.train.images, mnist.train.labels, mnist.test.images, mnist.test.labels
trX = trX.reshape(-1, 28, 28)
teX = teX.reshape(-1, 28, 28)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [56]:
tf.reset_default_graph()

X = tf.placeholder("float", [None, 28, 28])
Y = tf.placeholder("float", [None, 10])

#get lstm_size and output 10 lables
W = get_weight([INPUT_VEC_SIZE, 10])
b = get_weight([10])

py_x = model(X, W, b, INPUT_VEC_SIZE)

In [57]:
cost = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits_v2(logits = py_x, 
                                                        labels = Y))
train_op = tf.train.RMSPropOptimizer(0.001, 0.9).minimize(cost)
predict_op = tf.argmax(py_x, 1)

In [58]:
with tf.Session() as sess:
    tf.global_variables_initializer().run()
    
    for i in range(10):
        for start, end in zip(range(0, len(trX), BATCH_SIZE),\
                              range(BATCH_SIZE, len(trX), BATCH_SIZE)):
            sess.run(train_op, feed_dict = {X: trX[start:end], 
                                            Y: trY[start:end]})
            
        test_indeces = np.arange(len(teX))
        np.random.shuffle(test_indeces)

        batch_teX = teX[test_indeces[:TEST_SIZE]]
        batch_teY = teY[test_indeces[:TEST_SIZE]]

        pred_val = sess.run(predict_op, feed_dict = {X: batch_teX, 
                                                     Y: batch_teY})
        Y_norm = np.argmax(batch_teY, 1)
        accuracy_val = np.mean(pred_val == Y_norm)
        print("Accuracy on step {} is {}".format(i, accuracy_val))

Accuracy on step 0 is 0.66015625
Accuracy on step 1 is 0.796875
Accuracy on step 2 is 0.8828125
Accuracy on step 3 is 0.90625
Accuracy on step 4 is 0.9375
Accuracy on step 5 is 0.93359375
Accuracy on step 6 is 0.9375
Accuracy on step 7 is 0.953125
Accuracy on step 8 is 0.9296875
Accuracy on step 9 is 0.9609375
