In [None]:
import keras
import os, sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
import KerasTools as KT
import numpy as np

import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)

In [None]:
def adding_problem_generator(N, seq_len=6, high=1):
    """ A data generator for the adding problem.

    The data definition strictly follows Quoc V. Le, Navdeep Jaitly, Geoffrey E.
    Hintan's paper, A Simple Way to Initialize Recurrent Networks of Rectified
    Linear Units.

    The single datum entry is a 2D vector with two rows with same length.
    The first row is a list of random data; the second row is a list of binary
    mask with all ones, except two positions sampled by uniform distribution.
    The corresponding label entry is the sum of the masked data. For
    example:

     input          label
     -----          -----
    1 4 5 3  ----->   9 (4 + 5)
    0 1 1 0

    :param N: the number of the entries.
    :param seq_len: the length of a single sequence.
    :param p: the probability of 1 in generated mask
    :param high: the random data is sampled from a [0, high] uniform distribution.
    :return: (X, Y), X the data, Y the label.
    """    
    X_num = np.random.uniform(low=0, high=high, size=(N, seq_len, 1))
    X_mask = np.zeros((N, seq_len, 1))
    Y = np.ones((N, 1))
    for i in range(N):
        # Default uniform distribution on position sampling
        positions = np.random.choice(seq_len, size=2, replace=False)
        X_mask[i, positions] = 1
        Y[i, 0] = np.sum(X_num[i, positions])
    X = np.append(X_num, X_mask, axis=2)
    return X, Y

In [None]:
seqlen = 64
epochs = 200
batch_size = 100
x_train, y_train = adding_problem_generator(1000, seq_len=seqlen)

model = keras.models.Sequential()
model.add(keras.layers.GRU(8, input_shape=(seqlen, 2)))
model.add(keras.layers.Dense(1, activation='linear'))
model.compile(optimizer='rmsprop', loss='mse')
model.summary()

history = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.2)

In [None]:
x_test, y_test = adding_problem_generator(500, seq_len=seqlen)
history.history['test_loss'] = model.evaluate(x_test, y_test)
history.history['epochs'] = epochs
history.history['baseline'] = 0.1767
KT.plot_history(history.history)