In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers

In [2]:
TRAINING_SIZE = 50_000
DIGITS = 3
REVERSE = True
MAXLEN = DIGITS + 1 + DIGITS

## Generate the Data

In [3]:
class CharacterTable:
    
    def __init__(self, chars: str):
        self.chars = sorted(set(chars))
        self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
        self.indices_char = dict((i, c) for i, c in enumerate(self.chars))
    
    def encode(self, C: str, num_rows: int) -> np.ndarray:
        x = np.zeros((num_rows, len(self.chars)))
        for i, c in enumerate(C):
            x[i, self.char_indices[c]] = 1
        return x
    
    def decode(self, x: np.ndarray, calc_argmax = True):
        """Decode the given vector or 2D array to their character output.
        # Arguments
            x: A vector or a 2D array of probabilities or one-hot representations;
                or a vector of character indices (used with `calc_argmax=False`).
            calc_argmax: Whether to find the character index with maximum
                probability, defaults to `True`.
        """
        if calc_argmax:
            x = x.argmax(axis=-1)
        return ''.join(self.indices_char[c] for c in x)

In [4]:
chars = '0123456789+ '
ctable = CharacterTable(chars)
questions = []
expected = []
seen = set()

In [5]:
def f():
    result = []
    no_of_digits = np.random.randint(1, DIGITS + 1)
    for _ in range(no_of_digits):
        random_num = np.random.choice(list('0123456789'))
        result.append(random_num)
    return int(''.join(result))

while len(questions) < TRAINING_SIZE:
    a, b = f(), f()
    key = tuple(sorted((a, b)))
    if key in seen:
        continue
    seen.add(key)
    q = f'{a}+{b}'
    query = q + ' ' * (MAXLEN - len(q))
    ans = str(a + b)
    ans += " " * (DIGITS + 1 - len(ans))
    if REVERSE:
        query = query[::-1]
    questions.append(query)
    expected.append(ans)
print(f'Total questions: {len(questions)}')

Total questions: 50000


## Vectorize the data

In [6]:
x = np.zeros((len(questions), MAXLEN, len(chars)), dtype=np.bool)
y = np.zeros((len(questions), DIGITS+1, len(chars)), dtype=np.bool)
for i, sentence in enumerate(questions):
    x[i] = ctable.encode(sentence, MAXLEN)
for i, sentence in enumerate(expected):
    y[i] = ctable.encode(sentence, DIGITS+1)

indices = np.arange(len(y))
np.random.shuffle(indices)
x = x[indices]
y = y[indices]
split_at = len(x) - len(x) // 10
(x_train, x_val) = x[:split_at], x[split_at:]
(y_train, y_val) = y[:split_at], y[split_at:]

print("Training Data:")
print(x_train.shape)
print(y_train.shape)

print("Validation Data:")
print(x_val.shape)
print(y_val.shape)

Training Data:
(45000, 7, 12)
(45000, 4, 12)
Validation Data:
(5000, 7, 12)
(5000, 4, 12)


In [7]:
num_layers = 1
model = tf.keras.Sequential()
model.add(layers.LSTM(128, input_shape=(MAXLEN, len(chars))))
model.add(layers.RepeatVector(DIGITS + 1))
for _ in range(num_layers):
    model.add(layers.LSTM(128, return_sequences=True))
model.add(layers.Dense(len(chars), activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 128)               72192     
_________________________________________________________________
repeat_vector (RepeatVector) (None, 4, 128)            0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 4, 128)            131584    
_________________________________________________________________
dense (Dense)                (None, 4, 12)             1548      
Total params: 205,324
Trainable params: 205,324
Non-trainable params: 0
_________________________________________________________________


## Train the model

In [8]:
epochs = 30
batch_size = 32

for epoch in range(1, epochs):
    model.fit(x_train, y_train ,batch_size=batch_size, epochs=1, validation_data=(x_val, y_val))
    for i in range(10):
        ind = np.random.randint(0, len(x_val))
        rowx, rowy = x_val[np.array([ind])], y_val[np.array([ind])]
        preds = np.argmax(model.predict(rowx), axis=-1)
        q = ctable.decode(rowx[0])
        correct = ctable.decode(rowy[0])
        guess = ctable.decode(preds[0], calc_argmax=False)
        print("Q", q[::-1] if REVERSE else q, end=" ")
        print("T", correct, end=" ")
        if correct == guess:
            print("☑ " + guess)
        else:
            print("☒ " + guess)

Q 58+377  T 435  ☒ 355 
Q 440+6   T 446  ☒ 454 
Q 880+204 T 1084 ☒ 1010
Q 77+36   T 113  ☒ 13  
Q 76+341  T 417  ☒ 451 
Q 6+559   T 565  ☒ 555 
Q 25+444  T 469  ☒ 455 
Q 2+669   T 671  ☒ 664 
Q 79+85   T 164  ☒ 185 
Q 310+7   T 317  ☒ 133 
Q 674+517 T 1191 ☒ 1102
Q 6+559   T 565  ☒ 552 
Q 567+72  T 639  ☒ 642 
Q 780+20  T 800  ☒ 792 
Q 512+81  T 593  ☒ 592 
Q 9+141   T 150  ☒ 147 
Q 1+978   T 979  ☒ 992 
Q 647+57  T 704  ☒ 692 
Q 128+287 T 415  ☒ 492 
Q 91+543  T 634  ☒ 612 
Q 15+542  T 557  ☒ 559 
Q 453+13  T 466  ☒ 458 
Q 422+8   T 430  ☒ 429 
Q 162+803 T 965  ☒ 952 
Q 980+6   T 986  ☒ 988 
Q 1+542   T 543  ☒ 546 
Q 974+934 T 1908 ☒ 1816
Q 167+96  T 263  ☒ 262 
Q 205+3   T 208  ☒ 210 
Q 487+353 T 840  ☒ 842 
Q 145+44  T 189  ☒ 187 
Q 26+19   T 45   ☒ 42  
Q 73+49   T 122  ☒ 132 
Q 476+697 T 1173 ☒ 1177
Q 79+2    T 81   ☒ 82  
Q 972+98  T 1070 ☒ 1073
Q 673+715 T 1388 ☒ 1380
Q 34+23   T 57   ☑ 57  
Q 398+5   T 403  ☒ 407 
Q 212+13  T 225  ☒ 227 
Q 565+105 T 670  ☒ 679 
Q 784+262 T 1046