# Showcasing recurrent neural networks in mink

This show case is taken from the [Lasagne examples](https://github.com/Lasagne/Lasagne/blob/master/examples/recurrent.py), all credit goes there.

## Imports

In [1]:
import numpy as np
import tensorflow as tf

In [2]:
from mink.layers import RecurrentLayer
from mink.layers import LSTMLayer
from mink.layers import GRULayer
from mink.layers import InputLayer
from mink.layers import DenseLayer
from mink.nonlinearities import Tanh
from mink.updates import RMSProp
from mink import NeuralNetRegressor

## Parameters

In [3]:
LEARNING_RATE = 0.0015
MAX_EPOCHS = 40
UPDATE = RMSProp(learning_rate=LEARNING_RATE)

gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5)
SESSION_KWARGS = {'config': tf.ConfigProto(gpu_options=gpu_options)}

## Generate data

In [4]:
def gen_data(min_length=50, max_length=55, n_batch=1000):
    '''
    Generate a batch of sequences for the "add" task, e.g. the target for the
    following

    ``| 0.5 | 0.7 | 0.3 | 0.1 | 0.2 | ... | 0.5 | 0.9 | ... | 0.8 | 0.2 |
      |  0  |  0  |  1  |  0  |  0  |     |  0  |  1  |     |  0  |  0  |``

    would be 0.3 + .9 = 1.2.  This task was proposed in [1]_ and explored in
    e.g. [2]_.

    Parameters
    ----------

    min_length : int
        Minimum sequence length.

    max_length : int
        Maximum sequence length.

    n_batch : int
        Number of samples in the batch.

    Returns
    -------

    X : np.ndarray
        Input to the network, of shape (n_batch, max_length, 2), where the last
        dimension corresponds to the two sequences shown above.

    y : np.ndarray
        Correct output for each sample, shape (n_batch,).

    mask : np.ndarray
        A binary matrix of shape (n_batch, max_length) where ``mask[i, j] = 1``
        when ``j <= (length of sequence i)`` and ``mask[i, j] = 0`` when ``j >
        (length of sequence i)``.

    References
    ----------

    .. [1] Hochreiter, Sepp, and Jürgen Schmidhuber. "Long short-term memory."
    Neural computation 9.8 (1997): 1735-1780.

    .. [2] Sutskever, Ilya, et al. "On the importance of initialization and
    momentum in deep learning." Proceedings of the 30th international
    conference on machine learning (ICML-13). 2013.

    '''
    # Generate X - we'll fill the last dimension later
    X = np.concatenate([np.random.uniform(size=(n_batch, max_length, 1)),
                        np.zeros((n_batch, max_length, 1))],
                       axis=-1)
    mask = np.zeros((n_batch, max_length))
    y = np.zeros((n_batch,))
    # Compute masks and correct values
    for n in range(n_batch):
        # Randomly choose the sequence length
        length = np.random.randint(min_length, max_length)
        # Make the mask for this sample 1 within the range of length
        mask[n, :length] = 1
        # Zero out X after the end of the sequence
        X[n, length:, 0] = 0
        # Set the second dimension to 1 at the indices to add
        X[n, np.random.randint(length/10), 1] = 1
        X[n, np.random.randint(length/2, length), 1] = 1
        # Multiply and sum the dimensions of X to get the target value
        y[n] = np.sum(X[n, :, 0]*X[n, :, 1])
    # Center the inputs and outputs
    X -= X.reshape(-1, 2).mean(axis=0)
    y -= y.mean()
    return (
        X.astype(np.float32), 
        y.astype(np.float32),
        mask.astype(np.float32))

In [5]:
X, y, _ = gen_data()

In [6]:
X.shape, y.shape

((1000, 55, 2), (1000,))

## Basic recurrent layer

Here we use the vanilla mink recurrent layer. You can pass it any `tensorflow.nn.rnn_cell`. By default, it uses `tensorflow.nn.rnn_cell.BasicRNNCell` with 100 units.

In [7]:
l0 = InputLayer()
l1 = RecurrentLayer(l0)
l2 = DenseLayer(l1, nonlinearity=Tanh())

In [8]:
net = NeuralNetRegressor(
    l2,
    update=UPDATE,
    max_epochs=MAX_EPOCHS,
    verbose=1,
    session_kwargs=SESSION_KWARGS)

In [9]:
net.fit(X, y)

# Neural Network with 5501 learnable parameters

## Layer information

|   # | name      | size   |
|----:|:----------|:-------|
|   0 | input     | 55x2   |
|   1 | recurrent | 55x100 |
|   2 | dense     | 1      |

|   epoch |   train loss |     dur |
|--------:|-------------:|--------:|
|       1 |      [36m0.18565[0m | 0.16853 |
|       2 |      [36m0.17970[0m | 0.09502 |
|       3 |      [36m0.17496[0m | 0.09938 |
|       4 |      [36m0.17092[0m | 0.09245 |
|       5 |      [36m0.16716[0m | 0.08941 |
|       6 |      [36m0.16376[0m | 0.09735 |
|       7 |      [36m0.16062[0m | 0.08589 |
|       8 |      [36m0.16005[0m | 0.10094 |
|       9 |      0.16674 | 0.09322 |
|      10 |      0.16366 | 0.09473 |
|      11 |      [36m0.15644[0m | 0.09227 |
|      12 |      0.15760 | 0.09193 |
|      13 |      0.15819 | 0.09398 |
|      14 |      0.15886 | 0.08454 |
|      15 |      0.15832 | 0.08232 |
|      16 |      0.15726 | 0.08955 |
|      17 |      [36m0.15576[0m | 

NeuralNetRegressor(batch_iterator_test=128, batch_iterator_train=128,
          encoder=None,
          layer=DenseLayer(W=GlorotUniform(c01b=False, gain=1.0), b=Constant(value=0.0),
      incoming=RecurrentLayer(cell=None,
        incoming=InputLayer(Xs=None, make_logs=False, name=None, ys=None),
        make_logs=False, name=None, sequence_length=None),
      make_logs=False, name=None, nonlinearity=Tanh(), num_units=1),
          max_epochs=40, objective=MeanSquaredError(),
          on_epoch_finished=(PrintTrainProgress(first_iteration=False, floatfmt='.5f', tablefmt='pipe'),),
          on_training_started=(PrintLayerInfo(tablefmt='pipe'),),
          session_kwargs={'config': gpu_options {
  per_process_gpu_memory_fraction: 0.5
}
},
          update=RMSProp(decay=0.9, learning_rate=0.0015, momentum=0.0),
          verbose=1)

## Passing an `LSTMCell` to the basic `RecurrentLayer`

Here we show how you can pass an `LSTMCell` to the `RecurrentLayer`.

In [10]:
cell = tf.nn.rnn_cell.LSTMCell(
    num_units=100,
    use_peepholes=True,
    state_is_tuple=True,
    cell_clip=100,
)

In [11]:
l0 = InputLayer()
l1 = RecurrentLayer(l0, cell=cell)
l2 = DenseLayer(l1, nonlinearity=Tanh())

In [12]:
net = NeuralNetRegressor(
    l2,
    update=UPDATE,
    max_epochs=MAX_EPOCHS,
    verbose=1,
    session_kwargs=SESSION_KWARGS)

In [13]:
net.fit(X, y, epochs=50)

# Neural Network with 5501 learnable parameters

## Layer information

|   # | name      | size   |
|----:|:----------|:-------|
|   0 | input     | 55x2   |
|   1 | recurrent | 55x100 |
|   2 | dense     | 1      |

|   epoch |   train loss |     dur |
|--------:|-------------:|--------:|
|       1 |      [36m0.17640[0m | 0.37055 |
|       2 |      [36m0.17581[0m | 0.32639 |
|       3 |      [36m0.17504[0m | 0.32380 |
|       4 |      [36m0.17403[0m | 0.32425 |
|       5 |      [36m0.17270[0m | 0.31516 |
|       6 |      [36m0.17097[0m | 0.32826 |
|       7 |      [36m0.16875[0m | 0.32109 |
|       8 |      [36m0.16600[0m | 0.31773 |
|       9 |      [36m0.16309[0m | 0.31548 |
|      10 |      [36m0.16021[0m | 0.31493 |
|      11 |      [36m0.15758[0m | 0.31299 |
|      12 |      [36m0.15543[0m | 0.31409 |
|      13 |      [36m0.15362[0m | 0.31559 |
|      14 |      [36m0.15265[0m | 0.34484 |
|      15 |      [36m0.15199[0m | 0.31772 |
|      16 |      0.

NeuralNetRegressor(batch_iterator_test=128, batch_iterator_train=128,
          encoder=None,
          layer=DenseLayer(W=GlorotUniform(c01b=False, gain=1.0), b=Constant(value=0.0),
      incoming=RecurrentLayer(cell=<tensorflow.python.ops.rnn_cell.LSTMCell object at 0x7f4b8af735c0>,
        incoming=InputLayer(Xs=None, make_logs=False, name=None, ys=None),
        make_logs=False, name=None, sequence_length=None),
      make_logs=False, name=None, nonlinearity=Tanh(), num_units=1),
          max_epochs=40, objective=MeanSquaredError(),
          on_epoch_finished=(PrintTrainProgress(first_iteration=False, floatfmt='.5f', tablefmt='pipe'),),
          on_training_started=(PrintLayerInfo(tablefmt='pipe'),),
          session_kwargs={'config': gpu_options {
  per_process_gpu_memory_fraction: 0.5
}
},
          update=RMSProp(decay=0.9, learning_rate=0.0015, momentum=0.0),
          verbose=1)

## LSTM layer

Instead of passing an `LSTMCell` to the `RecurrentLayer`, it is possible to directly use mink's LSTMLayer. Under the hood, both approaches amount to the same outcome.

In [14]:
tf.reset_default_graph()

In [15]:
l0 = InputLayer()
l1 = LSTMLayer(l0, use_peepholes=True, cell_clip=100)
l2 = DenseLayer(l1, nonlinearity=Tanh())

In [16]:
net = NeuralNetRegressor(
    l2,
    update=UPDATE,
    max_epochs=MAX_EPOCHS,
    verbose=1,
    session_kwargs=SESSION_KWARGS)

In [17]:
net.fit(X, y, epochs=50)

# Neural Network with 5501 learnable parameters

## Layer information

|   # | name   | size   |
|----:|:-------|:-------|
|   0 | input  | 55x2   |
|   1 | lstm   | 55x100 |
|   2 | dense  | 1      |

|   epoch |   train loss |     dur |
|--------:|-------------:|--------:|
|       1 |      [36m0.17374[0m | 0.33540 |
|       2 |      [36m0.17351[0m | 0.31706 |
|       3 |      [36m0.17317[0m | 0.31294 |
|       4 |      [36m0.17267[0m | 0.31776 |
|       5 |      [36m0.17197[0m | 0.31788 |
|       6 |      [36m0.17099[0m | 0.31407 |
|       7 |      [36m0.16974[0m | 0.31321 |
|       8 |      [36m0.16830[0m | 0.31218 |
|       9 |      [36m0.16644[0m | 0.32088 |
|      10 |      [36m0.16373[0m | 0.31720 |
|      11 |      [36m0.16044[0m | 0.31294 |
|      12 |      [36m0.15711[0m | 0.31125 |
|      13 |      [36m0.15631[0m | 0.31411 |
|      14 |      [36m0.15391[0m | 0.31486 |
|      15 |      [36m0.15093[0m | 0.30924 |
|      16 |      0.15225 | 0.31204

NeuralNetRegressor(batch_iterator_test=128, batch_iterator_train=128,
          encoder=None,
          layer=DenseLayer(W=GlorotUniform(c01b=False, gain=1.0), b=Constant(value=0.0),
      incoming=LSTMLayer(cell_clip=100,
     incoming=InputLayer(Xs=None, make_logs=False, name=None, ys=None),
     make_logs=False, name=None, nonlinearity=Tanh(), num_units=100,
     sequence_length=None, use_peepholes=True),
      make_logs=False, name=None, nonlinearity=Tanh(), num_units=1),
          max_epochs=40, objective=MeanSquaredError(),
          on_epoch_finished=(PrintTrainProgress(first_iteration=False, floatfmt='.5f', tablefmt='pipe'),),
          on_training_started=(PrintLayerInfo(tablefmt='pipe'),),
          session_kwargs={'config': gpu_options {
  per_process_gpu_memory_fraction: 0.5
}
},
          update=RMSProp(decay=0.9, learning_rate=0.0015, momentum=0.0),
          verbose=1)

## GRU layer

Finally, mink also currently supports a layer using the Gated Recurrent Unit (GRU) cell from tensorflow.

In [18]:
l0 = InputLayer()
l1 = GRULayer(l0)
l2 = DenseLayer(l1, nonlinearity=Tanh())

In [19]:
net = NeuralNetRegressor(
    l2,
    update=UPDATE,
    max_epochs=MAX_EPOCHS,
    verbose=1,
    session_kwargs=SESSION_KWARGS)

In [20]:
net.fit(X, y, epochs=50)

# Neural Network with 5501 learnable parameters

## Layer information

|   # | name   | size   |
|----:|:-------|:-------|
|   0 | input  | 55x2   |
|   1 | gru    | 55x100 |
|   2 | dense  | 1      |

|   epoch |   train loss |     dur |
|--------:|-------------:|--------:|
|       1 |      [36m0.17357[0m | 0.25589 |
|       2 |      [36m0.17340[0m | 0.24200 |
|       3 |      [36m0.17315[0m | 0.24157 |
|       4 |      [36m0.17277[0m | 0.24604 |
|       5 |      [36m0.17222[0m | 0.23385 |
|       6 |      [36m0.17141[0m | 0.24360 |
|       7 |      [36m0.17025[0m | 0.24318 |
|       8 |      [36m0.16864[0m | 0.23332 |
|       9 |      [36m0.16648[0m | 0.22514 |
|      10 |      [36m0.16376[0m | 0.22569 |
|      11 |      [36m0.16053[0m | 0.23903 |
|      12 |      [36m0.15703[0m | 0.24755 |
|      13 |      [36m0.15374[0m | 0.24099 |
|      14 |      [36m0.15126[0m | 0.24317 |
|      15 |      [36m0.15013[0m | 0.23944 |
|      16 |      [36m0.14979[0m 

NeuralNetRegressor(batch_iterator_test=128, batch_iterator_train=128,
          encoder=None,
          layer=DenseLayer(W=GlorotUniform(c01b=False, gain=1.0), b=Constant(value=0.0),
      incoming=GRULayer(incoming=InputLayer(Xs=None, make_logs=False, name=None, ys=None),
     make_logs=False, name=None, nonlinearity=Tanh(), num_units=100,
     sequence_length=None),
      make_logs=False, name=None, nonlinearity=Tanh(), num_units=1),
          max_epochs=40, objective=MeanSquaredError(),
          on_epoch_finished=(PrintTrainProgress(first_iteration=False, floatfmt='.5f', tablefmt='pipe'),),
          on_training_started=(PrintLayerInfo(tablefmt='pipe'),),
          session_kwargs={'config': gpu_options {
  per_process_gpu_memory_fraction: 0.5
}
},
          update=RMSProp(decay=0.9, learning_rate=0.0015, momentum=0.0),
          verbose=1)