# Overview LSTM with Tensorflow :

This work follow closely the work done in http://adventuresinmachinelearning.com/recurrent-neural-networks-lstm-tutorial-tensorflow/ and aymericdamien tutorial.

Decomposition of this script :

1._ One LSTM (one layer)

2._ Two layers of indep LSTMs stacked one after the other

3._ Stacked_cell LSTM, 

4._ LSTMS with dropout

In [7]:
### Imports :
from __future__ import print_function

import tensorflow as tf
from tensorflow.contrib import rnn

# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [8]:
# Training Parameters
learning_rate= 0.001
training_steps = 10000
batch_size = 128
display_step = 200

# Network Parameters
num_input = 28 # MNIST data input (img shape: 28*28)
timesteps = 28 # timesteps
num_classes = 10 # MNIST total classes (0-9 digits)
num_hidden = 128

## 1._ Simple model using one layer of LSTM (or GRU/RNN):

In [9]:
# Placeholders for Inputs :
X = tf.placeholder("float", [None, timesteps, num_input])
Y = tf.placeholder("float", [None, num_classes])

In [10]:
# Define weights for the output layer :
weights = {'out': tf.Variable(tf.random_normal([num_hidden, num_classes]))}
biases = {'out': tf.Variable(tf.random_normal([num_classes]))}

"""
    ************************ LSTM_onelayer ************************************************************
    
    ## Inputs : x       = data, 
                weights = weights to compute the logits at the end
                biais   = biais to compute the logits at the end
                num_hidden = dimension of the hidden layer
                timesteps = length of the input timeseries
                
    ## Performance :
    
            - execute one LSTM layer, takes only the last element of the hidden layer to compute the logits.
            
    NB : can change the lstm cell to GRU cell or RNN cell..
"""
def LSTM_onelayer(x, weights, biases, num_hidden, timesteps):
    
    # Data input shape: (batch_size, timesteps, n_input)
    # Unstack to get a list of 'timesteps' tensors of shape (batch_size, n_input)
    x = tf.unstack(x, timesteps, 1)

    # Define a Lstm cell with tensorflow
    # NB : *****  The output shape of this cell is num_hidden
    lstm_cell = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0)

    # Get lstm cell output
    #### NB : ***** outputs is a list of outputs one for each input (h1,....hn)
    #### state is the final state (c_t)
    #### each h has a dimension=num_hidden :
    outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32)

    # Linear activation, 
    # !!!!!!! select only the last output !!!!!!! :
    return tf.matmul(outputs[-1], weights['out']) + biases['out']



## training process :
logits = LSTM_onelayer(X, weights, biases, num_hidden, timesteps)
prediction = tf.nn.softmax(logits)

# Define loss and optimizer
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
    logits=logits, labels=Y))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)

# Evaluate model (with test logits, for dropout to be disabled)
correct_pred = tf.equal(tf.argmax(prediction, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()

# Start training
with tf.Session() as sess:

    # Run the initializer
    sess.run(init)

    for step in range(1, training_steps+1):
        batch_x, batch_y = mnist.train.next_batch(batch_size)
        # Reshape data to get 28 seq of 28 elements
        batch_x = batch_x.reshape((batch_size, timesteps, num_input))
        # Run optimization op (backprop)
        sess.run(train_op, feed_dict={X: batch_x, Y: batch_y})
        if step % display_step == 0 or step == 1:
            # Calculate batch loss and accuracy
            loss, acc = sess.run([loss_op, accuracy], feed_dict={X: batch_x,
                                                                 Y: batch_y})
            print("Step " + str(step) + ", Minibatch Loss= " + \
                  "{:.4f}".format(loss) + ", Training Accuracy= " + \
                  "{:.3f}".format(acc))

    print("Optimization Finished!")

    # Calculate accuracy for 128 mnist test images
    test_len = 128
    test_data = mnist.test.images[:test_len].reshape((-1, timesteps, num_input))
    test_label = mnist.test.labels[:test_len]
    print("Testing Accuracy:", \
        sess.run(accuracy, feed_dict={X: test_data, Y: test_label}))

    
## Training interupted !

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See tf.nn.softmax_cross_entropy_with_logits_v2.

Step 1, Minibatch Loss= 2.6234, Training Accuracy= 0.109
Step 200, Minibatch Loss= 2.0708, Training Accuracy= 0.344
Step 400, Minibatch Loss= 1.9664, Training Accuracy= 0.367
Step 600, Minibatch Loss= 1.7729, Training Accuracy= 0.422
Step 800, Minibatch Loss= 1.6922, Training Accuracy= 0.516
Step 1000, Minibatch Loss= 1.5825, Training Accuracy= 0.523
Step 1200, Minibatch Loss= 1.4029, Training Accuracy= 0.602


KeyboardInterrupt: 

## 2._ Two layers of LSTM :

First layer : LSTM1 with output_shape=(num_hidden1) and perform output1
              LSTM2 takes as input=output1[-num_hidden2:]
              
              returns output2

In [19]:
# Define weights for the output layer :
num_hidden2 = 50
# Placeholders for Inputs :
X = tf.placeholder("float", [None, timesteps, num_input])
Y = tf.placeholder("float", [None, num_classes])
weights = {'out': tf.Variable(tf.random_normal([num_hidden2, num_classes]))}
biases = {'out': tf.Variable(tf.random_normal([num_classes]))}

"""
    ************************ LSTM Multilayer ************************************************
    
    First Layer : LSTM (hidden dim = num_hidden1), output=outputs_1
    Second Layer : LSTM (hidden dim = num_hidden2), output=outputs_2
    returns outputs_2
"""

def LSTM_multilayer(x, weights, biases, timesteps, num_hidden1=100, num_hidden2=50):
    
    # Data input shape: (batch_size, timesteps, n_input)
    # Unstack to get a list of 'timesteps' tensors of shape (batch_size, n_input)
    x = tf.unstack(x, timesteps, 1)
    
    # First layer of LSTM :
    with tf.variable_scope('lstm1', reuse=tf.AUTO_REUSE) as scope:
        lstm_cell_1 = rnn.BasicLSTMCell(num_hidden1, forget_bias=1.0)
        outputs_1, states_1 = rnn.static_rnn(lstm_cell_1, x, dtype=tf.float32)
    
    ## select the last elements of the outputs_1 : outputs_1 
    input_2 = outputs_1[-num_hidden2:]
    
    with tf.variable_scope('lstm2', reuse=tf.AUTO_REUSE) as scope:
        lstm_cell_2 = rnn.BasicLSTMCell(num_hidden2, forget_bias=1.0)
        outputs_2, states_2 = rnn.static_rnn(lstm_cell_2, input_2, dtype=tf.float32)
    
    return outputs_2, states_2

### See running version in Multilayer_LSTM script

## 3._ Stacked-cells LSTM :

Big cells including many lstm capsules.

In [21]:
tf.reset_default_graph()

def LSTM_multicell(x, timesteps, num_layers=2, num_hidden=128):
    
    # Data input shape: (batch_size, timesteps, n_input)
    # Unstack to get a list of 'timesteps' tensors of shape (batch_size, n_input)
    x = tf.unstack(x, timesteps, 1)
    
    # First layer of LSTM :
    with tf.variable_scope('lstm1', reuse=tf.AUTO_REUSE) as scope:
        cell_ = []
        for i in range(num_layers):
            
            lstm_cell = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0)
            cell_.append(lstm_cell)
        
        cell = tf.nn.rnn_cell.MultiRNNCell(cell_)
    
    outputs, states = rnn.static_rnn(cell, x, dtype=tf.float32)
    
    return outputs, states

In [22]:
import numpy as np
# Training Parameters
batch_size = 128

# Network Parameters
num_input = 28 # MNIST data input (img shape: 28*28)
timesteps = 28 # timesteps
num_classes = 10 # MNIST total classes (0-9 digits)

X = tf.placeholder("float", [None, timesteps, num_input])
outputs, _ = LSTM_multicell(X, timesteps)
# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()

# Start training
with tf.Session() as sess:

    # Run the initializer
    sess.run(init)

    batch_x, batch_y = mnist.train.next_batch(batch_size)
    # Reshape data to get 28 seq of 28 elements
    batch_x = batch_x.reshape((batch_size, timesteps, num_input))
    # Run optimization op (backprop)
    l = sess.run(outputs, feed_dict={X: batch_x})
    print(np.shape(l))
        

(28, 128, 128)


## 4._ Dropout 

In [25]:
"""
cell = tf.nn.rnn_cell.LSTMCell(state_size, state_is_tuple=True)
cell = tf.nn.rnn_cell.DropoutWrapper(cell, input_keep_prob=input_dropout, output_keep_prob=output_dropout)
cell = tf.nn.rnn_cell.MultiRNNCell([cell] * num_layers, state_is_tuple=True)
"""

'\ncell = tf.nn.rnn_cell.LSTMCell(state_size, state_is_tuple=True)\ncell = tf.nn.rnn_cell.DropoutWrapper(cell, input_keep_prob=input_dropout, output_keep_prob=output_dropout)\ncell = tf.nn.rnn_cell.MultiRNNCell([cell] * num_layers, state_is_tuple=True)\n'

In [23]:
tf.reset_default_graph()

def LSTM_multicell_DropOut(x, weights, biases, timesteps=28, num_layers=2, num_hidden=128):
    
    # Data input shape: (batch_size, timesteps, n_input)
    # Unstack to get a list of 'timesteps' tensors of shape (batch_size, n_input)
    x = tf.unstack(x, timesteps, 1)
    
    # First layer of LSTM :
    with tf.variable_scope('lstm1', reuse=tf.AUTO_REUSE) as scope:
        cell_ = []
        for i in range(num_layers):
            # first construct the lstm cell :
            lstm_cell = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0)
            # Add the dropout operation :
            lstm_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_cell, output_keep_prob=0.5)
            cell_.append(lstm_cell)
        # Construct a RNN network with multiple layers (num_layers)
        cell = tf.nn.rnn_cell.MultiRNNCell(cell_)
    
    outputs, states = rnn.static_rnn(cell, x, dtype=tf.float32)
    
    return outputs, states

In [None]:
## Training :

X = tf.placeholder("float", [None, timesteps, num_input])
Y = tf.placeholder("float", [None, num_classes])
weights = {'out': tf.Variable(tf.random_normal([num_hidden, num_classes]))}
biases = {'out': tf.Variable(tf.random_normal([num_classes]))}

outputs, _ = LSTM_multicell_DropOut(X, weights, biases, timesteps)
logits = tf.matmul(outputs[-1], weights['out']) + biases['out']
prediction = tf.nn.softmax(logits)

# Define loss and optimizer
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
    logits=logits, labels=Y))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)

# Evaluate model (with test logits, for dropout to be disabled)
correct_pred = tf.equal(tf.argmax(prediction, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()

# Start training
with tf.Session() as sess:

    # Run the initializer
    sess.run(init)

    for step in range(1, 2001):
        batch_x, batch_y = mnist.train.next_batch(batch_size)
        # Reshape data to get 28 seq of 28 elements
        batch_x = batch_x.reshape((batch_size, timesteps, num_input))
        # Run optimization op (backprop)
        sess.run(train_op, feed_dict={X: batch_x, Y: batch_y})
        if step % display_step == 0 or step == 1:
            # Calculate batch loss and accuracy
            loss, acc = sess.run([loss_op, accuracy], feed_dict={X: batch_x,
                                                                 Y: batch_y})
            print("Step " + str(step) + ", Minibatch Loss= " + \
                  "{:.4f}".format(loss) + ", Training Accuracy= " + \
                  "{:.3f}".format(acc))

    print("Optimization Finished!")

    # Calculate accuracy for 128 mnist test images
    test_len = 128
    test_data = mnist.test.images[:test_len].reshape((-1, timesteps, num_input))
    test_label = mnist.test.labels[:test_len]
    print("Testing Accuracy:", \
        sess.run(accuracy, feed_dict={X: test_data, Y: test_label}))

Step 1, Minibatch Loss= 3.2190, Training Accuracy= 0.094
Step 200, Minibatch Loss= 2.2503, Training Accuracy= 0.148


In [17]:
## Check dimension of Multicell_LSTM :
import numpy as np
tf.reset_default_graph()
X = tf.placeholder("float", [None, timesteps, num_input])
weights = {'out': tf.Variable(tf.random_normal([num_hidden, num_classes]))}
biases = {'out': tf.Variable(tf.random_normal([num_classes]))}

outputs, _ = LSTM_multicell_DropOut(X, weights, biases, timesteps)

init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    out = sess.run( outputs,  feed_dict={X: np.ones((1,28,28))})

## shape = (number of timesteps, batch number, dimension of the hidden layer)    
print(np.shape(out))

(28, 1, 128)
