# Workshop DL03: Recurrent Neural Networks

## Recurrent Neural Networks
* http://adventuresinmachinelearning.com/recurrent-neural-networks-lstm-tutorial-tensorflow/
* http://karpathy.github.io/2015/05/21/rnn-effectiveness/
* https://gist.github.com/karpathy/d4dee566867f8291f086#file-min-char-rnn-py-L20
* http://karpathy.github.io/2015/05/21/rnn-effectiveness/

## Workshop Exercises

1. Work through the MNIST RNN example; make sure that you understand the meaning of **SEQUENCE_LEN**, **ELEMENT_LEN**, and **N_TRAINING_BATCHES**. Also pay very close attention to the **reshape** and **transpose** steps in the first `variable_scope` block.
2. Find your own sequential data (e.g. a text corpus) (or just use "the-last-question.txt"); try to recreate Karparthy's min-char-rnn for the text in tensorflow.

## Worked Example: MNIST RNN

In [1]:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
from tensorflow.contrib import rnn

  from ._conv import register_converters as _register_converters


In [8]:
mnist = input_data.read_data_sets("/tmp/data", one_hot=True)

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [19]:
SEQUENCE_LEN = 28
ELEMENT_LEN = 28

N_CLASSES = 10

N_TRAINING_EPOCHS = 10
TRAINING_BATCH_SIZE = 128

N_LSTM_CELLS = 150

In [23]:
tf.reset_default_graph()

In [24]:
with tf.variable_scope("input") as scope:
    X = tf.placeholder(tf.float32, shape=[None, 784])
    X_reshape = tf.reshape(X, [-1, SEQUENCE_LEN, ELEMENT_LEN])
    
    # make the primary axis BATCH_LEN
    X_transpose = tf.transpose(X_reshape, [1,0,2])
    
    # split the (SEQUENCE_LEN, BATCH_LEN, ELEMENT_LEN) tensor into
    # a list of (BATCH_LEN, ELEMENT_LEN) tensors
    X_reshape_v2 = tf.reshape(X_transpose, [-1, ELEMENT_LEN])
    X_split = tf.split(X_reshape_v2, SEQUENCE_LEN, 0)
    
with tf.variable_scope("rnn") as scope:
    lstm_cell = rnn.BasicLSTMCell(N_LSTM_CELLS)
    outputs, states = rnn.static_rnn(
        lstm_cell, X_split, dtype=tf.float32)
    
with tf.variable_scope("output") as scope:
    W = tf.get_variable("weights", shape=[N_LSTM_CELLS, N_CLASSES])
    b = tf.get_variable("biases", shape=[N_CLASSES])
    # outputs is shape SEQUENCE_LEN x (BATCH_LEN, ELEMENT_LEN)
    # outputs[0] is the embedded prediction that the LSTM comes up with
    # after having seen only the 0th element in the sequence, outputs[-1]
    # is the embedded prediction the LSTM comes up with after having
    # seen **all** elements in the sequence - this is the one we want
    # to use for our final prediction
    y_ = tf.matmul(outputs[-1], W) + b

In [32]:
outputs[-1].shape

TensorShape([Dimension(None), Dimension(150)])

In [25]:
with tf.variable_scope("loss") as scope:
    y = tf.placeholder(tf.float32, shape=[None, N_CLASSES])
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(
        labels=y, logits=y_))

with tf.variable_scope("optimizer") as scope:
    optimizer = tf.train.AdamOptimizer().minimize(loss)

In [18]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    for epoch in range(N_TRAINING_EPOCHS):
        epoch_loss = 0
        
        for i in range(int(mnist.train.num_examples/TRAINING_BATCH_SIZE)):
            epoch_x, epoch_y = mnist.train.next_batch(TRAINING_BATCH_SIZE)
            
            _, c = sess.run([optimizer, loss], feed_dict={
                X: epoch_x,
                y: epoch_y
            })
            epoch_loss += c
            
        print("==============================================")
        print("Epoch " + str(epoch) + " completed.")
        print("Loss: " + str(epoch_loss))
        print("==============================================")

Epoch 0 completed.
Loss: 633.796434879303
Epoch 1 completed.
Loss: 534.4614440202713
Epoch 2 completed.
Loss: 511.82694256305695
Epoch 3 completed.
Loss: 496.16765052080154
Epoch 4 completed.
Loss: 484.3252323269844
Epoch 5 completed.
Loss: 475.68764013051987
Epoch 6 completed.
Loss: 469.02505922317505
Epoch 7 completed.
Loss: 463.5093591809273
Epoch 8 completed.
Loss: 458.82539570331573
Epoch 9 completed.
Loss: 454.82355189323425


## Worked Example II: Character RNN

In [None]:
import tensorflow as tf
from tensorflow.contrib import rnn
import numpy as np

In [None]:
# read in the text corpus
text = open("./workshop-DL03/the-last-question.txt","r").read()

In [None]:
# preprocess the text corpus into integers
character_set = list(set(text))
char_to_int = { ch:i for i,ch in enumerate(chars) }
int_to_char = { i:ch for i,ch in enumerate(chars) }
n_characters = len(vocabulary)

In [None]:
SEQUENCE_LEN = 24 # length of char-sequences fed to RNN
ELEMENT_LEN = 1 #n_characters # length of each element in a char-sequence

N_CLASSES = 1 # n_characters

N_TRAINING_EPOCHS = 20
TRAINING_BATCH_SIZE = 128

In [None]:
def text_to_integers(text):
    """
    Converts text into a list of integers.
    """
    integers = []
    for char in text:
        integers.append([char_to_int[char]])
    return integers

In [None]:
# preprocess the text
encoding = text_to_integers(text)
train_X, train_y = [], []
for i in range(0, len(encoding)-SEQUENCE_LEN, SEQUENCE_LEN):
    train_X.append( encoding[i:i+SEQUENCE_LEN] )
    train_y.append( encoding[i+SEQUENCE_LEN] )

In [None]:
print(train_X[1], train_y[1])
print(train_X[2], train_y[2])

In [None]:
def batchify(data, batch_size):
    batches = []
    for i in range(len(data)//batch_size):
        batches.append(data[i*batch_size:(i+1)*batch_size])
    return batches

In [None]:
batches_X = batchify(train_X, TRAINING_BATCH_SIZE)
batches_y = batchify(train_y, TRAINING_BATCH_SIZE)

In [None]:
tf.reset_default_graph()

In [None]:
# define the input placeholder / reshape
with tf.variable_scope("input"):
    X = tf.placeholder(tf.float32, shape=[None, SEQUENCE_LEN, ELEMENT_LEN])
    X_transpose = tf.transpose(X, [1,0,2])
    X_reshape = tf.reshape(X_transpose, [-1, ELEMENT_LEN])
    X_split = tf.split(X_reshape, SEQUENCE_LEN, axis=0)

In [None]:
# define the network architecture
with tf.variable_scope("rnn"):
    lstm_cell = tf.nn.rnn_cell.LSTMCell(128)
    output, state = rnn.static_rnn(
        lstm_cell, X_split, dtype=tf.float32)

In [None]:
# define the output placeholder
with tf.variable_scope("output"):
    W = tf.get_variable("weight", shape=[128, N_CLASSES])
    b = tf.get_variable("biases", shape=[N_CLASSES])
    y_pre_softmax = tf.matmul(output[-1], W) + b
    #y_ = tf.nn.softmax(y_pre_softmax)

In [None]:
# define the loss function
with tf.variable_scope("loss"):
    y = tf.placeholder(tf.float32, shape=[None, N_CLASSES])
    #entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=y_pre_softmax, labels=y)
    msqerr = (y - y_pre_softmax)**2
    loss = tf.reduce_mean(msqerr)

In [None]:
# define the optimizer
with tf.variable_scope("optimizer"):
    optimizer = tf.train.RMSPropOptimizer(0.01).minimize(loss)

In [None]:
with tf.Session() as sess:
    
    # train the network
    sess.run(tf.global_variables_initializer())
    for epoch in range(1, N_TRAINING_EPOCHS+1):
        epoch_loss = 0.0
        
        for i in range(len(batches_X)):
            batch_loss, _ = sess.run([loss, optimizer],
                feed_dict={
                    X: batches_X[i],
                    y: batches_y[i]
                })
            epoch_loss += batch_loss
        
            
        print("==============================================")
        print("Epoch " + str(epoch) + " completed.")
        print("Loss: " + str(epoch_loss))
        print("==============================================")