In [1]:
from random import seed
import os
from random import randint
import matplotlib.pyplot as plt

from numpy import array
from math import ceil
from math import log10
from numpy import argmax
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import TimeDistributed
from keras.layers import RepeatVector

In [2]:
# generate lists of random integers and their sum
# n_examples for number of pairs
# n_numbers pairs (2)
# [ lowest, largest ] 
def random_sum_pairs(n_examples, n_numbers, lowest, largest):
    X, y = list(), list()
    for i in range(n_examples):
        in_pattern = [randint(lowest, largest) for _ in range(n_numbers)]
        out_pattern = sum(in_pattern)
        X.append(in_pattern)
        y.append(out_pattern)
    return X, y

In [3]:
# convert data to strings
def to_string(X, y, n_numbers, largest):
    max_length = n_numbers * ceil(log10(largest + 1)) + n_numbers - 1
    Xstr = list()
    counter = 0;
    for pattern in X:
        if randint(lowest, largest) % 2 == 1:
            strp = '-'.join([str(n) for n in pattern])
            strp = ''.join([' ' for _ in range(max_length - len(strp))]) + strp
            Xstr.append(strp)
            y[counter] = y[counter] - 2*pattern[1]
        else:
            strp = '+'.join([str(n) for n in pattern])
            strp = ''.join([' ' for _ in range(max_length - len(strp))]) + strp
            Xstr.append(strp)
        counter = counter+1
    max_length = ceil(log10(n_numbers * (largest + 1)))
    ystr = list()
    for pattern in y:
        strp = str(pattern)
        strp = ''.join([' ' for _ in range(max_length - len(strp))]) + strp
        ystr.append(strp)
    return Xstr, ystr

In [4]:
# integer encode strings
def integer_encode(X, y, alphabet):
    char_to_int = dict((c, i) for i, c in enumerate(alphabet))
    Xenc = list()
    for pattern in X:
        integer_encoded = [char_to_int[char] for char in pattern]
        Xenc.append(integer_encoded)
    yenc = list()
    for pattern in y:
        integer_encoded = [char_to_int[char] for char in pattern]
        yenc.append(integer_encoded)
    return Xenc, yenc

In [5]:
# one hot encode
def one_hot_encode(X, y, max_int):
    Xenc = list()
    for seq in X:
        pattern = list()
        for index in seq:
            vector = [0 for _ in range(max_int)]
            vector[index] = 1
            pattern.append(vector)
        Xenc.append(pattern)
    yenc = list()
    for seq in y:
        pattern = list()
        for index in seq:
            vector = [0 for _ in range(max_int)]
            vector[index] = 1
            pattern.append(vector)
        yenc.append(pattern)
    return Xenc, yenc

In [6]:
# generate an encoded dataset
def generate_data(n_samples, n_numbers, lowest, largest, alphabet):
    # generate pairs
    X, y = random_sum_pairs(n_samples, n_numbers, lowest,  largest)
    # convert to strings
    X, y = to_string(X, y, n_numbers, largest)
   
    # integer encode
    X, y = integer_encode(X, y, alphabet)
    # one hot encode
    X, y = one_hot_encode(X, y, len(alphabet))
    # return as numpy arrays
    X, y = array(X), array(y)
    return X, y

In [7]:
# invert encoding
def invert(seq, alphabet):
    int_to_char = dict((i, c) for i, c in enumerate(alphabet))
    strings = list()
    for pattern in seq:
        string = int_to_char[argmax(pattern)]
        strings.append(string)
    return ''.join(strings)

In [8]:
# define dataset
seed(1)
print("Configuring the dataset")
#Samples to be created for the dataset
n_samples = 5000
#number of operands
n_numbers = 2
#lowest number in operation
lowest = 1
#largest number in operation
largest = 1000
#text alphabet (decimal digits, +, -)
alphabet = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '-', ' ']
n_chars = len(alphabet)
print("Train set items:", end=":")
print(n_samples)
print("Operands:", end=":")
print(n_numbers)
print("Numbers range: ["+str(lowest)+","+str(largest))
#computing text max length
n_in_seq_length = n_numbers * ceil(log10(largest + 1)) + n_numbers - 1
#computing result max length
n_out_seq_length = ceil(log10(n_numbers * (largest + 1)))
print("Input Length:"+str(n_in_seq_length))
print("Output Length:"+str(n_out_seq_length))
print("Configuring the model")
# define LSTM configuration
n_batch = 10
n_epoch = 10
nr_epoch = 10
print("Number of batches:"+str(n_batch))
print("Number of rounds:"+str(n_epoch))

Configuring the dataset
Train set items::5000
Operands::2
Numbers range: [1,1000
Input Length:9
Output Length:4
Configuring the model
Number of batches:10
Number of rounds:10


In [9]:
# create LSTM
print("Creating the model")
model = Sequential()
#Adding the input LSTM layer
model.add(LSTM(2*largest, input_shape=(n_in_seq_length, n_chars)))
model.add(RepeatVector(n_out_seq_length))
model.add(LSTM(largest, return_sequences=True))
#Adding Dense layer for output
model.add(TimeDistributed(Dense(n_chars, activation='softmax')))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())
print("Training the model / "+str(n_epoch)+" rounds of "+str(nr_epoch)+" epochs each (each round concerns one different dataset)")
#training the model in n_epoch round of nr_epoch epochs each
for i in range(n_epoch):
    X, y = generate_data(n_samples, n_numbers, lowest, largest, alphabet)
    print("Epoch: "+str(i),end=":")
    history = model.fit(X, y, epochs=nr_epoch, batch_size=n_batch, verbose=2)

Creating the model
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 2000)              16112000  
                                                                 
 repeat_vector (RepeatVector  (None, 4, 2000)          0         
 )                                                               
                                                                 
 lstm_1 (LSTM)               (None, 4, 1000)           12004000  
                                                                 
 time_distributed (TimeDistr  (None, 4, 13)            13013     
 ibuted)                                                         
                                                                 
Total params: 28,129,013
Trainable params: 28,129,013
Non-trainable params: 0
_________________________________________________________________
None
Training the model /

Epoch 2/10
500/500 - 181s - loss: 0.0888 - accuracy: 0.9737 - 181s/epoch - 361ms/step
Epoch 3/10
500/500 - 182s - loss: 0.0430 - accuracy: 0.9906 - 182s/epoch - 363ms/step
Epoch 4/10
500/500 - 183s - loss: 0.0258 - accuracy: 0.9955 - 183s/epoch - 365ms/step
Epoch 5/10
500/500 - 183s - loss: 0.0734 - accuracy: 0.9790 - 183s/epoch - 366ms/step
Epoch 6/10
500/500 - 184s - loss: 0.0633 - accuracy: 0.9802 - 184s/epoch - 368ms/step
Epoch 7/10
500/500 - 186s - loss: 0.0513 - accuracy: 0.9843 - 186s/epoch - 373ms/step
Epoch 8/10
500/500 - 213s - loss: 0.0602 - accuracy: 0.9823 - 213s/epoch - 426ms/step
Epoch 9/10
500/500 - 199s - loss: 0.0383 - accuracy: 0.9888 - 199s/epoch - 397ms/step
Epoch 10/10
500/500 - 190s - loss: 0.0158 - accuracy: 0.9969 - 190s/epoch - 379ms/step
Epoch: 9:Epoch 1/10
500/500 - 197s - loss: 0.2516 - accuracy: 0.9138 - 197s/epoch - 395ms/step
Epoch 2/10
500/500 - 206s - loss: 0.0817 - accuracy: 0.9734 - 206s/epoch - 413ms/step
Epoch 3/10
500/500 - 195s - loss: 0.0295 - a

In [12]:
#Create outputData folder
outdir = './OutputData'
if not os.path.exists(outdir):
    os.mkdir(outdir)
#store model in outputdata folder
model.save("OutputData/ModelLSTM.h5") 

In [21]:
# evaluate on some new patterns
X, y = generate_data(n_samples, n_numbers, lowest, largest, alphabet)

result = model.predict(X, batch_size=n_batch, verbose=0)
# calculate error
expected = [invert(x, alphabet) for x in y]
predicted = [invert(x, alphabet) for x in result]