In [1]:
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np

##### Generating dataset

In [78]:
TRAINING_SIZE = 50000
DIGITS = 3
REVERSE = False

MAX_QUESTION_LEN = DIGITS + 1 + DIGITS
MAX_ANSWER_LEN = DIGITS + 1

In [79]:
class CharacterTable:
    def __init__(self, chars):
        self.chars = sorted(set(chars))
        self.c2i = dict((c, idx) for idx, c in enumerate(self.chars))
        self.i2c = dict((idx, c) for idx, c in enumerate(self.chars))

    def encode(self, math_str, seq_len):
        x = np.zeros((seq_len, len(self.chars)))
        for i, c in enumerate(math_str):
            x[i, self.c2i[c]] = 1

        return x
    
    def decode(self, x, calc_argmax=True):
        if calc_argmax:
            x = x.argmax(axis=-1)

        return "".join(self.i2c[i] for i in x)

In [80]:
chars = "0123456789+ "
ctable = CharacterTable(chars)

In [81]:
questions = []
expected = []
seen = set()
print("Generating data...")
while len(questions) < TRAINING_SIZE:
    f = lambda: int(
        "".join(
            np.random.choice(list("0123456789"))
            for i in range(np.random.randint(1, DIGITS + 1))
        )
    )
    a, b = f(), f()
    # Skip any addition questions we've already seen
    # Also skip any such that x+Y == Y+x (hence the sorting).
    key = tuple(sorted((a, b)))
    if key in seen:
        continue
    seen.add(key)
    # Pad the data with spaces such that it is always MAXLEN.
    q = "{}+{}".format(a, b)
    query = q + " " * (MAX_QUESTION_LEN - len(q))
    ans = str(a + b)
    # Answers can be of maximum size DIGITS + 1.
    ans += " " * (DIGITS + 1 - len(ans))
    if REVERSE:
        # Reverse the query, e.g., '12+345  ' becomes '  543+21'. (Note the
        # space used for padding.)
        query = query[::-1]
    questions.append(query)
    expected.append(ans)
print("Total questions:", len(questions))


Generating data...
Total questions: 50000


In [82]:
questions[:5]

['7+125  ', '7+662  ', '684+7  ', '29+69  ', '32+33  ']

In [83]:
expected[:5]

['132 ', '669 ', '691 ', '98  ', '65  ']

In [84]:
x = np.zeros((len(questions), MAX_QUESTION_LEN, len(chars)), dtype=np.bool)
y = np.zeros((len(questions), MAX_ANSWER_LEN, len(chars)), dtype=np.bool)

for i, sentence in enumerate(questions):
    x[i] = ctable.encode(math_str=sentence, seq_len=MAX_QUESTION_LEN)

for i, sentence in enumerate(expected):
    y[i] = ctable.encode(math_str=sentence, seq_len=MAX_ANSWER_LEN)

# shuffle
indices = np.arange(len(y))
np.random.shuffle(indices)
x = x[indices]
y = y[indices]


split_at = len(x) - len(x) // 10
(x_train, x_val) = x[:split_at], x[split_at:]
(y_train, y_val) = y[:split_at], y[split_at:]

print(x_train.shape, y_train.shape)
print(x_val.shape, y_val.shape)

(45000, 7, 12) (45000, 4, 12)
(5000, 7, 12) (5000, 4, 12)


##### Build model

In [48]:
num_layers = 3

model = keras.Sequential()
model.add(layers.LSTM(128, input_shape=(MAX_QUESTION_LEN, len(chars))))
model.add(layers.RepeatVector(MAX_ANSWER_LEN))
for _ in range(num_layers):
    model.add(layers.LSTM(128, return_sequences=True))
model.add(layers.Dense(len(chars), activation="softmax"))
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_10 (LSTM)               (None, 128)               72192     
_________________________________________________________________
repeat_vector_2 (RepeatVecto (None, 4, 128)            0         
_________________________________________________________________
lstm_11 (LSTM)               (None, 4, 128)            131584    
_________________________________________________________________
lstm_12 (LSTM)               (None, 4, 128)            131584    
_________________________________________________________________
lstm_13 (LSTM)               (None, 4, 128)            131584    
_________________________________________________________________
dense_2 (Dense)              (None, 4, 12)             1548      
Total params: 468,492
Trainable params: 468,492
Non-trainable params: 0
________________________________________________

In [99]:
EPOCHS = 100
BATCH_SIZE = 1048

for epoch in range(1, EPOCHS):
    model.fit(
        x_train,
        y_train,
        batch_size=BATCH_SIZE,
        epochs=1,
        validation_data=(x_val, y_val),
    )
    if epoch % 25 == 0:
        print(f"Epoch: {epoch}")
        for i in range(10):
            ind = np.random.randint(0, len(x_val))
            rowx, rowy = x_val[np.array([ind])], y_val[np.array([ind])]
            preds = np.argmax(model.predict(rowx), axis=-1)
    
            q = ctable.decode(rowx[0])
            correct = ctable.decode(rowy[0])
            guess = ctable.decode(preds[0], calc_argmax=False)
    
            print("Q", q[::-1] if REVERSE else q, end=" ")
            print("T", correct, end=" ")
            if correct == guess:
                print("☑ " + guess)
            else:
                print("☒ " + guess)
    

Epoch: 25
Q 750+395 T 1145 ☒ 1135
Q 124+139 T 263  ☑ 263 
Q 881+7   T 888  ☑ 888 
Q 56+91   T 147  ☑ 147 
Q 765+13  T 778  ☑ 778 
Q 888+524 T 1412 ☑ 1412
Q 167+983 T 1150 ☒ 1240
Q 628+422 T 1050 ☑ 1050
Q 73+49   T 122  ☑ 122 
Q 454+504 T 958  ☑ 958 
Epoch: 50
Q 795+95  T 890  ☒ 880 
Q 73+498  T 571  ☑ 571 
Q 380+474 T 854  ☑ 854 
Q 43+347  T 390  ☑ 390 
Q 6+286   T 292  ☑ 292 
Q 277+200 T 477  ☑ 477 
Q 835+59  T 894  ☒ 994 
Q 831+55  T 886  ☑ 886 
Q 861+69  T 930  ☑ 930 
Q 58+855  T 913  ☑ 913 
Epoch: 75
Q 280+7   T 287  ☑ 287 
Q 408+66  T 474  ☑ 474 
Q 307+355 T 662  ☑ 662 
Q 590+44  T 634  ☑ 634 
Q 31+242  T 273  ☑ 273 
Q 317+72  T 389  ☑ 389 
Q 476+546 T 1022 ☑ 1022
Q 33+979  T 1012 ☑ 1012
Q 1+14    T 15   ☑ 15  
Q 14+40   T 54   ☑ 54  


#### Reference:

1. https://keras.io/examples/nlp/addition_rnn/
