# sequence classification with LSTM

In [2]:
import tensorflow as tf
import tensorflow.examples.tutorials.mnist.input_data as input_data
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline  
print ("packages imported")

mnist = input_data.read_data_sets('../MNIST_data/', one_hot=True)
train_imgs, train_labels, test_imgs, test_labels \
= mnist.train.images, mnist.train.labels, mnist.test.images, mnist.test.labels
n_train, n_test, dim, n_classes \
= train_imgs.shape[0], test_imgs[0], train_imgs.shape[1], train_labels.shape[1]
print ("mnist loaded")

packages imported
Extracting ../MNIST_data/train-images-idx3-ubyte.gz
Extracting ../MNIST_data/train-labels-idx1-ubyte.gz
Extracting ../MNIST_data/t10k-images-idx3-ubyte.gz
Extracting ../MNIST_data/t10k-labels-idx1-ubyte.gz
mnist loaded


## We will treat the MNIST image $\in \mathcal{R}^{28 \times 28}$ as $28$ sequences of a vector $\mathbf{x} \in \mathcal{R}^{28}$. 
## Our simple RNN consists of  
1. One input layer which converts a $28$ dimensional input to an $128$ dimensional hidden layer, 
2. One intermediate recurrent neural network (LSTM) 
3. One output layer which converts an $128$ dimensional output of the LSTM to $10$ dimensional output indicating a class label. 

<img src="../images/etc/rnn_input3.jpg" width="700" height="400" >

## contruct a RNN 

In [4]:
dim_input = 28
dim_hidden = 128
dim_output = n_classes
n_step = 28
weights = {
    'hidden': tf.Variable(tf.random_normal([dim_input, dim_hidden])),
    'out': tf.Variable(tf.random_normal([dim_hidden, dim_output]))
}
biases = {
    'hidden': tf.Variable(tf.random_normal([dim_hidden])),
    'out': tf.Variable(tf.random_normal([dim_output]))
}

Instructions for updating:
Colocations handled automatically by placer.


In [5]:
def _RNN(_x, _istate, _w, _b, _nsteps, _name):
    # 1. premute input from [batch_size, n_step, dim_input]
    # => [n_steps, batch_size, dim_input]
    _x = tf.transpose(_x, [1, 0, 2])
    # 2. reshape input to [n_step * batch_size , dim_input]
    _x = tf.reshape(_x, [-1, dim_input])
    # 3. input layer => hidden layer
    _h = tf.matmul(_x, _w['hidden']) + b['hidden']
    # 4. splite data to 'n_step' chuncks, an i-th chunck indicates i-th batch data
    _h_split = tf.split(0, _nsteps, _h)
    # 5. get lstm's final output (_LSTM_0) and state (_LSTM_S)
    #    Both _LSTM_O and _LSTM_S consist of 'batchsize' elements
    #    Only _LSTM_O will be used to predict the output.
    with tf.variable_scope(_name):
        lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(dim_hidden, forget_bias=1.0)
        _LSTM_O, _LSTM_S = tf.nn.rnn(lstm_cell, _h_split, initial_state = _istate)
    # 6. output
    _O = tf.add(tf.matmul(_LSTM_O[-1], _w['out']), _b['out'])
    # return 
    return {
        'X': _x, 'h': _h, 'hsplit': _h_split, 
        'LSTM_O': _LSTM_O, '_LSTM_S': _LSTM_S, 'O': _O
    }
print("network ready")

network ready
