https://machinelearningmastery.com/learn-add-numbers-seq2seq-recurrent-neural-networks/

In [1]:
from random import seed
from random import randint
from numpy import array
from keras.models import Sequential
from keras.layers import Dense, TimeDistributed, RepeatVector, Flatten
from keras.layers import LSTM
from math import sqrt
from sklearn.metrics import mean_squared_error
from numpy import argmax

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.
  return f(*args, **kwds)


In [3]:
seed(1)
X, y = list(), list()
for i in range(100):
    in_pattern = [randint(1,100) for _ in range(2)]
    out_pattern = sum(in_pattern)
    if i < 5:
        print(in_pattern, out_pattern)
    X.append(in_pattern)
    y.append(out_pattern)

[18, 73] 91
[98, 9] 107
[33, 16] 49
[64, 98] 162
[58, 61] 119


In [2]:
# generate examples of random integers and their sum
def random_sum_pairs(n_examples, n_numbers, largest):
    X, y = list(), list()
    for i in range(n_examples):
        in_pattern = [randint(1, largest) for _ in range(n_numbers)]
        out_pattern = sum(in_pattern)
        X.append(in_pattern)
        y.append(out_pattern)
    # format as NumPy arrays
    X,y = array(X), array(y)
    # normalize
    X = X.astype('float') / float(largest * n_numbers)
    y = y.astype('float') / float(largest * n_numbers)
    return X, y

In [3]:
# invert normalization
def invert(value, n_numbers, largest):
    return round(value * float(largest * n_numbers))

#### Model

In [4]:
n_examples = 100
n_numbers = 2
largest = 100
# define LSTM configuration
n_batch = 1
n_epoch = 100

In [10]:
model = Sequential()
model.add(LSTM(6, return_sequences=True))
model.add(LSTM(6))
model.add(Dense(1))

model.compile(loss='mean_squared_error', optimizer='adam')

In [12]:
# train LSTM
for _ in range(n_epoch):
    X, y = random_sum_pairs(n_examples, n_numbers, largest)
    X = X.reshape(n_examples, n_numbers, 1)  # add 1 dimension
    model.fit(X, y, epochs=1, batch_size=n_batch, verbose=2)

Epoch 1/1
 - 2s - loss: 0.1705
Epoch 1/1
 - 1s - loss: 0.0491
Epoch 1/1
 - 1s - loss: 0.0309
Epoch 1/1
 - 1s - loss: 0.0238
Epoch 1/1
 - 1s - loss: 0.0301
Epoch 1/1
 - 1s - loss: 0.0187
Epoch 1/1
 - 1s - loss: 0.0157
Epoch 1/1
 - 1s - loss: 0.0132
Epoch 1/1
 - 1s - loss: 0.0090
Epoch 1/1
 - 1s - loss: 0.0073
Epoch 1/1
 - 1s - loss: 0.0066
Epoch 1/1
 - 1s - loss: 0.0046
Epoch 1/1
 - 1s - loss: 0.0057
Epoch 1/1
 - 1s - loss: 0.0033
Epoch 1/1
 - 1s - loss: 0.0034
Epoch 1/1
 - 1s - loss: 0.0031
Epoch 1/1
 - 1s - loss: 0.0035
Epoch 1/1
 - 1s - loss: 0.0030
Epoch 1/1
 - 1s - loss: 0.0024
Epoch 1/1
 - 1s - loss: 0.0022
Epoch 1/1
 - 1s - loss: 0.0021
Epoch 1/1
 - 1s - loss: 0.0019
Epoch 1/1
 - 1s - loss: 0.0016
Epoch 1/1
 - 1s - loss: 0.0013
Epoch 1/1
 - 1s - loss: 0.0010
Epoch 1/1
 - 1s - loss: 8.2899e-04
Epoch 1/1
 - 1s - loss: 9.1822e-04
Epoch 1/1
 - 1s - loss: 7.7952e-04
Epoch 1/1
 - 1s - loss: 5.6560e-04
Epoch 1/1
 - 1s - loss: 7.1647e-04
Epoch 1/1
 - 1s - loss: 4.7454e-04
Epoch 1/1
 - 1s

In [15]:
# evaluate on some new patterns
X, y = random_sum_pairs(n_examples, n_numbers, largest)
X = X.reshape(n_examples, n_numbers, 1)
result = model.predict(X, batch_size=n_batch, verbose=0)
# calculate error
expected = [invert(x, n_numbers, largest) for x in y]
predicted = [invert(x, n_numbers, largest) for x in result[:,0]]
rmse = sqrt(mean_squared_error(expected, predicted))
print('RMSE: %f' % rmse)
# show some examples
for i in range(20):
    error = expected[i] - predicted[i]
    print('Expected=%d, Predicted=%d (err=%d)' % (expected[i], predicted[i], error))

RMSE: 0.806226
Expected=140, Predicted=140 (err=0)
Expected=121, Predicted=120 (err=1)
Expected=47, Predicted=46 (err=1)
Expected=98, Predicted=98 (err=0)
Expected=69, Predicted=70 (err=-1)
Expected=105, Predicted=104 (err=1)
Expected=134, Predicted=135 (err=-1)
Expected=67, Predicted=66 (err=1)
Expected=141, Predicted=141 (err=0)
Expected=69, Predicted=69 (err=0)
Expected=27, Predicted=27 (err=0)
Expected=92, Predicted=92 (err=0)
Expected=101, Predicted=101 (err=0)
Expected=192, Predicted=190 (err=2)
Expected=197, Predicted=195 (err=2)
Expected=72, Predicted=71 (err=1)
Expected=47, Predicted=46 (err=1)
Expected=144, Predicted=143 (err=1)
Expected=147, Predicted=146 (err=1)
Expected=87, Predicted=87 (err=0)


That is, the order of the input no longer matters. We could shuffle it up any way we want and still learn the problem.

**MLPs are designed to learn mapping functions** and can easily nail the problem of learning how to add numbers.

### Frame as sequence
We can frame addition as an input and output string of characters and let the model figure out the meaning of the characters.

The model must learn not only the integer nature of the characters, but also the nature of the mathematical operation to perform.

In [7]:
from random import seed
from random import randint
from math import ceil
from math import log10

#### Data generation

In [24]:
# generate lists of random integers and their sum
def random_sum_pairs(n_examples, n_numbers, largest):
    X, y = list(), list()
    for i in range(n_examples):
        in_pattern = [randint(1,largest) for _ in range(n_numbers)]
        out_pattern = sum(in_pattern)
        X.append(in_pattern)
        y.append(out_pattern)
    return X, y

# convert data to strings
def to_string(X, y, n_numbers, largest):
    max_length = n_numbers * ceil(log10(largest+1)) + n_numbers - 1
    Xstr = list()
    for pattern in X:
        strp = '+'.join([str(n) for n in pattern])
        strp = ''.join([' ' for _ in range(max_length-len(strp))]) + strp
        Xstr.append(strp)
    max_length = ceil(log10(n_numbers * (largest+1)))
    ystr = list()
    for pattern in y:
        strp = str(pattern)
        strp = ''.join([' ' for _ in range(max_length-len(strp))]) + strp
        ystr.append(strp)
    return Xstr, ystr

# integer encode strings
def integer_encode(X, y, alphabet):
    char_to_int = dict((c, i) for i, c in enumerate(alphabet))
    Xenc = list()
    for pattern in X:
        integer_encoded = [char_to_int[char] for char in pattern]
        Xenc.append(integer_encoded)
    yenc = list()
    for pattern in y:
        integer_encoded = [char_to_int[char] for char in pattern]
        yenc.append(integer_encoded)
    return Xenc, yenc
 
# one hot encode
def one_hot_encode(X, y, max_int):
    Xenc = list()
    for seq in X:
        pattern = list()
        for index in seq:
            vector = [0 for _ in range(max_int)]
            vector[index] = 1
            pattern.append(vector)
        Xenc.append(pattern)
    yenc = list()
    for seq in y:
        pattern = list()
        for index in seq:
            vector = [0 for _ in range(max_int)]
            vector[index] = 1
            pattern.append(vector)
        yenc.append(pattern)
    return Xenc, yenc
 
seed(1)
n_samples = 1
n_numbers = 2
largest = 10
# generate pairs
X, y = random_sum_pairs(n_samples, n_numbers, largest)
print(X, y)
# convert to strings
X, y = to_string(X, y, n_numbers, largest)
print(X, y)
# integer encode
alphabet = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', ' ']  # length 12
X, y = integer_encode(X, y, alphabet)
print(X, y)
# one hot encode
X, y = one_hot_encode(X, y, len(alphabet))
print(X, y)  # (1, 5, 12) = (batch, time_step, features)

[[3, 10]] [13]
[' 3+10'] ['13']
[[11, 3, 10, 1, 0]] [[1, 3]]
[[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0], [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]] [[[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]]]


In [53]:
# generate an encoded dataset
def generate_data(n_samples, n_numbers, largest, alphabet):
    # generate pairs
    X, y = random_sum_pairs(n_samples, n_numbers, largest)
    # convert to strings
    X, y = to_string(X, y, n_numbers, largest)
    # integer encode
    X, y = integer_encode(X, y, alphabet)
    # one hot encode
    X, y = one_hot_encode(X, y, len(alphabet))
    # return as numpy arrays
    X, y = array(X), array(y)
    return X, y

In [27]:
# invert encoding
def invert(seq, alphabet):
    int_to_char = dict((i, c) for i, c in enumerate(alphabet))
    strings = list()
    for pattern in seq:
        string = int_to_char[argmax(pattern)]
        strings.append(string)
    return ''.join(strings)

#### Model

In [60]:
n_samples = 1000
n_numbers = 2
largest = 10
alphabet = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', ' ']
n_chars = len(alphabet)
n_in_seq_length = n_numbers * ceil(log10(largest+1)) + n_numbers - 1
n_out_seq_length = ceil(log10(n_numbers * (largest+1)))
# define LSTM configuration
n_batch = 10
n_epoch = 30

# create LSTM
model = Sequential()
model.add(LSTM(100, input_shape=(n_in_seq_length, n_chars)))
model.add(RepeatVector(n_out_seq_length))
model.add(LSTM(50, return_sequences=True))
model.add(TimeDistributed(Dense(n_chars, activation='softmax')))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())
# train LSTM
for i in range(n_epoch):
    X, y = generate_data(n_samples, n_numbers, largest, alphabet)
    print(i)
    model.fit(X, y, epochs=1, batch_size=n_batch)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_12 (LSTM)               (None, 100)               45200     
_________________________________________________________________
repeat_vector_4 (RepeatVecto (None, 2, 100)            0         
_________________________________________________________________
lstm_13 (LSTM)               (None, 2, 50)             30200     
_________________________________________________________________
time_distributed_5 (TimeDist (None, 2, 12)             612       
Total params: 76,012
Trainable params: 76,012
Non-trainable params: 0
_________________________________________________________________
None
0
Epoch 1/1
1
Epoch 1/1
2
Epoch 1/1
3
Epoch 1/1
4
Epoch 1/1
5
Epoch 1/1
6
Epoch 1/1
7
Epoch 1/1
8
Epoch 1/1
9
Epoch 1/1
10
Epoch 1/1
11
Epoch 1/1
12
Epoch 1/1
13
Epoch 1/1
14
Epoch 1/1
15
Epoch 1/1
16
Epoch 1/1
17
Epoch 1/1
18
Epoch 1/1
19
Epoch 1/1
20
Epoch 1/1
21

In [61]:
# evaluate on some new patterns
X, y = generate_data(n_samples, n_numbers, largest, alphabet)
result = model.predict(X, batch_size=n_batch, verbose=0)
# calculate error
expected = [invert(x, alphabet) for x in y]
predicted = [invert(x, alphabet) for x in result]
# show some examples
for i in range(20):
    print('Expected=%s, Predicted=%s' % (expected[i], predicted[i]))

Expected=11, Predicted=11
Expected= 9, Predicted= 9
Expected=15, Predicted=15
Expected= 4, Predicted= 4
Expected=19, Predicted=19
Expected= 8, Predicted= 8
Expected= 6, Predicted= 6
Expected=10, Predicted=10
Expected= 6, Predicted= 6
Expected=11, Predicted=11
Expected= 9, Predicted= 9
Expected= 6, Predicted= 6
Expected=13, Predicted=13
Expected=12, Predicted=12
Expected= 6, Predicted= 6
Expected=15, Predicted=15
Expected=10, Predicted=10
Expected=12, Predicted=12
Expected= 8, Predicted= 8
Expected= 8, Predicted= 8
