In [1]:
from random import seed
from random import randint
from numpy import array
from math import ceil
from math import log10
from math import sqrt
from numpy import argmax
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import TimeDistributed
from keras.layers import RepeatVector

In [4]:
# generate lists of random integers and their sum
def random_sum_pairs(n_examples, n_numbers, largest):
    X, y = list(), list()
    for i in range(n_examples):
        in_pattern = [randint(1,largest) for _ in range(n_numbers)]
        print("in_pattern : ",in_pattern)
        out_pattern = sum(in_pattern)
        X.append(in_pattern)
        y.append(out_pattern)
    return X, y

seed(1)
n_examples = 1
n_numbers = 2
largest = 20
random_sum_pairs(n_examples, n_numbers, largest)

in_pattern :  [5, 19]


([[5, 19]], [24])

In [58]:
# convert data to strings
def to_string(X, y, n_numbers, largest):
    max_length = n_numbers * ceil(log10(largest+1)) + n_numbers - 1
    Xstr = list()
    for pattern in X:
        strp = '+' .join([str(n) for n in pattern])
        strp ='' .join(['' for _ in range(max_length-len(strp))]) + strp
        Xstr.append(strp) 
    max_length = ceil(log10(n_numbers * (largest+1)))
    ystr = list()
    for pattern in y:
        strp = str(pattern)
        strp = ''.join([' ' for _ in range(max_length-len(strp))]) + strp
        ystr.append(strp)
    return Xstr, ystr

c,b=random_sum_pairs(5, 2, 10)
to_string(c, b, 2,10)

(['2+5', '2+8', '8+8', '7+4', '2+8'], [' 7', '10', '16', '11', '10'])

In [59]:
# integer encode strings
def integer_encode(X, y, alphabet):
    char_to_int = dict((c, i) for i, c in enumerate(alphabet))
    Xenc = list()
    for pattern in X:
        integer_edencode = [char_to_int[char] for char in pattern]
        Xenc.append(integer_edencode)
    yenc = list()
    for pattern in y:
        integer_edencode = [char_to_int[char] for char in pattern]
        yenc.append(integer_edencode)
    return Xenc, yenc

In [60]:
seed(1)
n_samples = 1
n_numbers = 2
largest = 10
# generate pairs
X, y = random_sum_pairs(n_samples, n_numbers, largest)
print(X, y)
# convert to strings
X, y = to_string(X, y, n_numbers, largest)
print(X, y)
# integer encode
alphabet = ['0' , '1' , '2' , '3' , '4' , '5' , '6' , '7' , '8' , '9' , '+' , '']
X, y = integer_encode(X, y, alphabet)
print(X, y)

[[3, 10]] [13]
['3+10'] ['13']
[[3, 10, 1, 0]] [[1, 3]]


In [61]:
# one hot encode
def one_hot_encode(X, y, max_int):
    Xenc = list()
    for seq in X:
        pattern = list()
        for index in seq:
            vector = [0 for _ in range(max_int)]
            vector[index] = 1
            pattern.append(vector)
        Xenc.append(pattern)
    yenc = list()
    for seq in y:
        pattern = list()
        for index in seq:
            vector = [0 for _ in range(max_int)]
            vector[index] = 1
            pattern.append(vector)
        yenc.append(pattern)
    return Xenc, yenc

In [62]:
seed(1)
n_samples = 1
n_numbers = 2
largest = 10
# generate pairs
X, y = random_sum_pairs(n_samples, n_numbers, largest)
print(X, y)
# convert to strings
X, y = to_string(X, y, n_numbers, largest)
print(X, y)
# integer encode
alphabet = [ '0' , '1' , '2' , '3' , '4' , '5' , '6' , '7' , '8' , '9' , '+' , '']
X, y = integer_encode(X, y, alphabet)
print(X, y)
# one hot encode
X, y = one_hot_encode(X, y, len(alphabet))
print(X, y)

[[3, 10]] [13]
['3+10'] ['13']
[[3, 10, 1, 0]] [[1, 3]]
[[[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0], [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]] [[[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]]]


In [94]:
seed(1)
n_samples = 1
n_numbers = 2
largest = 10
alphabet = [ '0' , '1' , '2' , '3' , '4' , '5' , '6' , '7' , '8' , '9' , '+' , '']
# generate an encoded dataset
def generate_data(n_samples, n_numbers, largest, alphabet):
    # generate pairs
    X, y = random_sum_pairs(n_samples, n_numbers, largest)
    # convert to strings
    X, y = to_string(X, y, n_numbers, largest)
    # integer encode
    X, y = integer_encode(X, y, alphabet)
    # one hot encode
    X, y = one_hot_encode(X, y, len(alphabet))
    # return as NumPy arrays
    X, y = array(X), array(y)
    
    return X, y

X,y=generate_data(n_samples, n_numbers, largest, alphabet)
print(X.shape)
print(X.dtype)
print(y.shape)
print(y.dtype)

(1, 4, 12)
int32
(1, 2, 12)
int32


In [64]:
# invert encoding
def invert(seq, alphabet):
    int_to_char = dict((i, c) for i, c in enumerate(alphabet))
    strings = list()
    for pattern in seq:
        string = int_to_char[argmax(pattern)]
        strings.append(string)
    return ''.join(strings)

In [96]:
# configure problem

# number of math terms
n_terms = 3
# largest value for any single input digit
largest = 10
# scope of possible symbols for each input or output time step
alphabet = [str(x) for x in range(10)] + [ '+' ,' ']
# size of alphabet: (12 for 0-9, + and )
n_chars = len(alphabet)
# length of encoded input sequence (8 for 10+10+10)
n_in_seq_length = n_terms * ceil(log10(largest+1)) + n_terms - 1
# length of encoded output sequence (2 for 30 )
n_out_seq_length = ceil(log10(n_terms * (largest+1)))

In [97]:
# define LSTM
model = Sequential()
model.add(LSTM(75, input_shape=(n_in_seq_length, n_chars)))
model.add(RepeatVector(n_out_seq_length))
model.add(LSTM(50, return_sequences=True))
model.add(TimeDistributed(Dense(n_chars, activation= 'softmax' )))
model.compile(loss= 'categorical_crossentropy', optimizer= 'adam' , metrics=[ 'accuracy' ])
print(model.summary())

Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_16 (LSTM)               (None, 75)                26400     
_________________________________________________________________
repeat_vector_8 (RepeatVecto (None, 2, 75)             0         
_________________________________________________________________
lstm_17 (LSTM)               (None, 2, 50)             25200     
_________________________________________________________________
time_distributed_8 (TimeDist (None, 2, 12)             612       
Total params: 52,212
Trainable params: 52,212
Non-trainable params: 0
_________________________________________________________________
None


In [98]:
# fit LSTM
X, y = generate_data(75000, n_terms,largest,alphabet) 

In [99]:
print(X.shape)
print(X.dtype)
print(y.shape)
print(y.dtype)

(75000,)
object
(75000, 2, 12)
int32


In [89]:
[print(i.shape, i.dtype) for i in model.inputs]
[print(o.shape, o.dtype) for o in model.outputs]
[print(l.name, l.input_shape, l.dtype) for l in model.layers]

(None, 8, 12) <dtype: 'float32'>
(None, 2, 12) <dtype: 'float32'>
lstm_14 (None, 8, 12) float32
repeat_vector_7 (None, 75) float32
lstm_15 (None, 2, 75) float32
time_distributed_7 (None, 2, 50) float32


[None, None, None, None]