# Many to One RNN with Variable Sequence Length:

In this tutorial we implement 

<img src="files/files/06.png">


*Fig1. Unfolded representation of the implemented RNN structure*


## 0. Import the required libraries:
We will start with importing the required libraries to our Python environment.

In [36]:
# imports
import tensorflow as tf
import numpy as np

In [37]:

tf.compat.v1.reset_default_graph()
tf.compat.v1.disable_eager_execution()

## 1. Generate some data

For this tutorial ...

### 1.1. Data dimension
Here, we specify the dimensions of the data samples which will be used in the code. Defining these variables makes it easier (compared with using hard-coded number all throughout the code) to modify them later. Ideally these would be inferred from the data that has been read, but here we just write the numbers.

In [38]:
# Data Dimensions
input_dim = 1           # input dimension
seq_max_len = 4         # sequence maximum length
out_dim = 1             # output dimension

### 1.2. Generate data and display the sizes
Now we can use the defined helper function in "train" mode which loads the train and validation images and their corresponding labels. We'll also display their sizes:

In [39]:
def generate_data(count=1000, max_length=4, dim=1):
    x = np.random.randint(0, 10, size=(count, max_length, dim))
    length = np.random.randint(1, max_length+1, count)
    for i in range(count):
        x[i, length[i]:, :] = 0
    y = np.sum(x, axis=1)
    return x, y, length

In [40]:
x_train, y_train, seq_len_train = generate_data(count=1000, max_length=seq_max_len, dim=input_dim)
x_test, y_test, seq_len_test = generate_data(count=5, max_length=seq_max_len, dim=input_dim)

print("Size of:")
print("- Training-set:\t\t{}".format(len(y_train)))
print("- Test-set:\t{}".format(len(y_test)))

Size of:
- Training-set:		1000
- Test-set:	5


To get batches of samples:

In [41]:
def next_batch(x, y, seq_len, batch_size):
    N = x.shape[0]
    batch_indeces = np.random.permutation(N)[:batch_size]
    x_batch = x[batch_indeces]
    y_batch = y[batch_indeces]
    seq_len_batch = seq_len[batch_indeces]
    return x_batch, y_batch, seq_len_batch

## 2. Hyperparameters

In [42]:
# Parameters
learning_rate = 0.01    # The optimization initial learning rate
training_steps = 10000  # Total number of training steps
batch_size = 10         # batch size
display_freq = 1000     # Frequency of displaying the training results

## 2. Hyperparameters

In [43]:
learning_rate = 0.001 # The optimization initial learning rate
epochs = 10           # Total number of training epochs
batch_size = 100      # Training batch size
display_freq = 100    # Frequency of displaying the training results

## 3. Network configuration

In [44]:
num_hidden_units = 10   # number of hidden units

## 4. Create network helper functions 
### 4.1. Helper functions for creating new variables

In [45]:
# weight and bais wrappers
def weight_variable(shape):
    """
    Create a weight variable with appropriate initialization
    :param name: weight name
    :param shape: weight shape
    :return: initialized weight variable
    """
    initer = tf.compat.v1.truncated_normal_initializer(stddev=0.01)
    return tf.compat.v1.get_variable('W',
                           dtype=tf.float32,
                           shape=shape,
                           initializer=initer)


def bias_variable(shape):
    """
    Create a bias variable with appropriate initialization
    :param name: bias variable name
    :param shape: bias variable shape
    :return: initialized bias variable
    """
    initial = tf.compat.v1.constant(0., shape=shape, dtype=tf.compat.v1.float32)
    return tf.compat.v1.get_variable('b',
                           dtype=tf.compat.v1.float32,
                           initializer=initial)

### 4.2. Helper-function for creating a RNN

In [46]:
def RNN(x, weights, biases, n_hidden, seq_max_len, seq_len):
    """
    :param x: inputs of shape [batch_size, max_time, input_dim]
    :param weights: matrix of fully-connected output layer weights
    :param biases: vector of fully-connected output layer biases
    :param n_hidden: number of hidden units
    :param seq_max_len: sequence maximum length
    :param seq_len: length of each sequence of shape [batch_size,]
    """
    cell = tf.compat.v1.nn.rnn_cell.BasicRNNCell(n_hidden)
    outputs, states = tf.compat.v1.nn.dynamic_rnn(cell, x, sequence_length=seq_len, dtype=tf.compat.v1.float32)

    # Hack to build the indexing and retrieve the right output.
    batch_size = tf.compat.v1.shape(outputs)[0]
    # Start indices for each sample
    index = tf.compat.v1.range(0, batch_size) * seq_max_len + (seq_len - 1)
    # Indexing
    outputs = tf.compat.v1.gather(tf.compat.v1.reshape(outputs, [-1, n_hidden]), index)
    out = tf.compat.v1.matmul(outputs, weights) + biases
    return out

## 5. Create the network graph
### 5.1. Placeholders for the inputs (x), sequence length (seqLen), and corresponding labels (y)

In [47]:
# Placeholders for inputs(x), input sequence lengths (seqLen) and outputs(y)
x = tf.compat.v1.placeholder(tf.compat.v1.float32, [None, seq_max_len, input_dim])
seqLen = tf.compat.v1.placeholder(tf.compat.v1.int32, [None])
y = tf.compat.v1.placeholder(tf.compat.v1.float32, [None, 1])

### 5.2. Define the network

In [48]:
# create weight matrix initialized randomly from N~(0, 0.01)
W = weight_variable(shape=[num_hidden_units, out_dim])

# create bias vector initialized as zero
b = bias_variable(shape=[out_dim])

# Network predictions
pred_out = RNN(x, W, b, num_hidden_units, seq_max_len, seqLen)

### 5.3. Define the loss function and optimizer

In [49]:
# Define the loss function (i.e. mean-squared error loss) and optimizer
cost = tf.compat.v1.reduce_mean(tf.compat.v1.square(pred_out - y))
train_op = tf.compat.v1.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

### 5.4. Initialize all variables

In [50]:
# Creating the op for initializing all variables
init = tf.compat.v1.global_variables_initializer()

## 6. Train

In [51]:
with tf.compat.v1.Session() as sess:
    sess.run(init)
    print('----------Training---------')
    for i in range(training_steps):
        x_batch, y_batch, seq_len_batch = next_batch(x_train, y_train, seq_len_train, batch_size)
        _, mse = sess.run([train_op, cost], feed_dict={x: x_batch, y: y_batch, seqLen: seq_len_batch})
        if i % display_freq == 0:
            print('Step {0:<6}, MSE={1:.4f}'.format(i, mse))

----------Training---------
Step 0     , MSE=172.5014
Step 100   , MSE=152.4450
Step 200   , MSE=112.5536
Step 300   , MSE=112.1071
Step 400   , MSE=87.0684
Step 500   , MSE=72.2100
Step 600   , MSE=77.0473
Step 700   , MSE=63.6623
Step 800   , MSE=54.0221
Step 900   , MSE=49.9522
Step 1000  , MSE=40.4604
Step 1100  , MSE=42.6400
Step 1200  , MSE=41.7146
Step 1300  , MSE=33.6827
Step 1400  , MSE=22.9148
Step 1500  , MSE=21.6448
Step 1600  , MSE=24.2795
Step 1700  , MSE=15.5190
Step 1800  , MSE=19.2789
Step 1900  , MSE=14.1949
Step 2000  , MSE=10.7972
Step 2100  , MSE=9.4697
Step 2200  , MSE=9.2722
Step 2300  , MSE=9.7767
Step 2400  , MSE=4.4663
Step 2500  , MSE=2.9950
Step 2600  , MSE=5.3773
Step 2700  , MSE=7.6283
Step 2800  , MSE=3.3528
Step 2900  , MSE=3.7180
Step 3000  , MSE=3.6196
Step 3100  , MSE=6.1622
Step 3200  , MSE=3.6682
Step 3300  , MSE=3.7539
Step 3400  , MSE=1.0630
Step 3500  , MSE=5.2369
Step 3600  , MSE=1.6170
Step 3700  , MSE=1.0651
Step 3800  , MSE=1.5865
Step 3900  

## 7. Test
### 7.1. Helper functions for plotting the results

In [52]:
with tf.compat.v1.Session() as sess:
    sess.run(init)
# Test
    y_pred = sess.run(pred_out, feed_dict={x: x_test, seqLen: seq_len_test})
    print('--------Test Results-------')
    for i, x in enumerate(y_test):
        print("When the ground truth output is {}, the model thinks it is {}"
              .format(y_test[i], y_pred[i]))


--------Test Results-------
When the ground truth output is [17], the model thinks it is [0.00480353]
When the ground truth output is [1], the model thinks it is [0.00175828]
When the ground truth output is [18], the model thinks it is [0.0193611]
When the ground truth output is [24], the model thinks it is [0.0197016]
When the ground truth output is [16], the model thinks it is [0.01386129]
