# Introduction 

Example: 
- Input: "535+61"
- Output: "596"

# Setup 

In [1]:
from tensorflow import keras 
from tensorflow.keras import layers 
import numpy as np 

# parametrs for model and datset 
TRAINING_SIZE = 50_000 
DIGITS = 3 
REVERSE = True 

# max len of input 
MAXLEN = DIGITS + 1 + DIGITS 

# Generate the Data

In [3]:
class CharacterTable: 
    '''
        Given a set of characters: 
        + encode them to a one-hot integer representation 
        + decode the one-hot or integer repreesntation to thier character output 
        + decode a vector of probabilities to their character output 
    '''
    def __init__(self, chars) -> None:
        '''
        # Arguments
            chars: Characters that can appear in the input. 
        '''
        
        self.chars = sorted(set(chars))

        self.char_indicies = dict((c,i) for i, c in enumerate(self.chars))
        self.indicies_char = dict((i,c) for i, c in enumerate(self.chars))

    def encode(self, C, num_rows):
        '''
        # Arguments 
            C: string, to be encoded 
            num_rows: Number of rows in the returned one-ot encoding. 
                This is used to keep the # of rows for each data the same. 
        '''
        x = np.zeros((num_rows, len(self.chars)))
        for i, c in enumerate(C): 
            x[i, self.char_indicies[c]] = 1 
        return x 
    
    def decode(self, x, calc_argmax=True):
        '''
        # Arguments 
            x: A vector or 2D array of probabilities or one-hot representation;
                or a vector of character indicies
            calc_argmax: whether to find the character index with maximum 
                probability, defaluts to `True`. 
        '''
        if calc_argmax:
            x = x.argmax(axis=-1)
        return ''.join(self.indicies_char[x] for x in x)


In [4]:
chars = '0123456789+ ' 
ctable = CharacterTable(chars)

questions = [] 
expected = [] 
seen = set() 
print('Generating Data...') 
while len(questions) < TRAINING_SIZE:
    f = lambda: int(
        ''.join(
            np.random.choice(list('0123456789'))
            for i in range(np.random.randint(1,DIGITS+1))
        )
    )
    a, b = f(), f() # generate two `int` numbers 

    # skip any addition questions we've see 
    # also skip x+y == y+x 
    key = tuple(sorted((a,b)))

    if key in seen: 
        continue 
    seen.add(key) 

    q = '{}+{}'.format(a,b)
    query = q + ' ' * (MAXLEN - len(q))
    ans = str(a+b)
    # answer can be of max size DIGITS + 1
    ans += ' ' * (DIGITS + 1 - len(ans))

    if REVERSE: 
        query = query[::-1]
    
    questions.append(query)
    expected.append(ans) 

print('Total questions:', len(questions))

Generating Data...
Total questions: 50000


# Vectorize the Data 

In [8]:
from tqdm import tqdm 

In [9]:
print('Vectorizing...')
x = np.zeros((len(questions), MAXLEN, len(chars)), dtype=np.bool) 
y = np.zeros((len(questions), DIGITS+1, len(chars)), dtype=np.bool) 

for i, sentence in  tqdm(enumerate(questions)):
    x[i] = ctable.encode(sentence, MAXLEN)
for i, sentence in tqdm(enumerate(expected)):
    y[i] = ctable.encode(sentence, DIGITS+1)

Vectorizing...


50000it [00:00, 192309.82it/s]
50000it [00:00, 271746.15it/s]


In [11]:
# shuffle data 
indicies = np.arange(len(y))
np.random.shuffle(indicies)
x = x[indicies]
y = y[indicies]


In [27]:

# 10% for validation 
split_at = len(x) - len(x) // 10
print('splitting at', split_at)
(x_train, x_val) = x[:split_at], x[split_at:]
(y_train, y_val) = y[:split_at], y[split_at:]

splitting at 45000


In [28]:
print('Training Data:', x_train.shape, y_train.shape)
print('Training Data:', x_val.shape, y_val.shape)

Training Data: (45000, 7, 12) (45000, 4, 12)
Training Data: (5000, 7, 12) (5000, 4, 12)


# Building the Model 

In [29]:
num_layers = 1 # 2 
model = keras.Sequential() 
model.add(layers.LSTM(128, input_shape=(MAXLEN, len(chars)))) # output size = 128
model.add(layers.RepeatVector(DIGITS+1))

for _ in range(num_layers):
    # return_sequeces: returns all the outputs so far in the form of (num_samples, timesteps, output_dim)
    model.add(layers.LSTM(128, return_sequences=True)) 

model.add(layers.Dense(len(chars), activation='softmax'))


In [30]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_4 (LSTM)                (None, 128)               72192     
_________________________________________________________________
repeat_vector_2 (RepeatVecto (None, 4, 128)            0         
_________________________________________________________________
lstm_5 (LSTM)                (None, 4, 128)            131584    
_________________________________________________________________
dense_2 (Dense)              (None, 4, 12)             1548      
Total params: 205,324
Trainable params: 205,324
Non-trainable params: 0
_________________________________________________________________


In [31]:
epochs = 30 
batch_size = 32

for epoch in range(1, epochs): 
    print()
    print('Iteration:', epoch)
    model.fit(
        x_train, 
        y_train, 
        batch_size=batch_size, 
        epochs=1, 
        validation_data=(x_val, y_val)
    )
    print('visualizing data:')
    # select 10 samples form validation set at random to visualize errors 
    for i in range(10): 
        ind = np.random.randint(0, len(x_val))
        rowx, rowy = x_val[np.array([ind])], y_val[np.array([ind])]
        preds = np.argmax(model.predict(rowx),axis=-1)
        q = ctable.decode(rowx[0])
        correct = ctable.decode(rowy[0])
        guess = ctable.decode(preds[0], calc_argmax=False)
        print('Q', q[::-1] if REVERSE else q, end=' ')
        print('T', correct, end=' ')
        if correct == guess: 
            print('[^]', guess)
        else: 
            print('[x]', guess)


Iteration: 1
visualizing data:
Q 972+0   T 972  [x] 900 
Q 327+952 T 1279 [x] 1326
Q 465+139 T 604  [x] 616 
Q 5+633   T 638  [x] 556 
Q 20+700  T 720  [x] 218 
Q 21+66   T 87   [x] 12  
Q 239+212 T 451  [x] 566 
Q 6+981   T 987  [x] 900 
Q 88+61   T 149  [x] 101 
Q 7+89    T 96   [x] 180 

Iteration: 2
visualizing data:
Q 84+857  T 941  [x] 933 
Q 244+87  T 331  [x] 326 
Q 978+50  T 1028 [x] 1046
Q 358+205 T 563  [x] 634 
Q 180+662 T 842  [x] 864 
Q 790+428 T 1218 [x] 1211
Q 819+636 T 1455 [x] 1377
Q 174+661 T 835  [x] 864 
Q 820+682 T 1502 [x] 1476
Q 92+135  T 227  [x] 216 

Iteration: 3
visualizing data:
Q 8+313   T 321  [x] 323 
Q 42+262  T 304  [x] 300 
Q 987+66  T 1053 [x] 1050
Q 257+637 T 894  [x] 800 
Q 71+372  T 443  [x] 441 
Q 68+146  T 214  [x] 213 
Q 454+182 T 636  [x] 735 
Q 980+23  T 1003 [x] 1001
Q 558+6   T 564  [x] 563 
Q 63+136  T 199  [x] 203 

Iteration: 4
visualizing data:
Q 973+24  T 997  [x] 990 
Q 55+47   T 102  [x] 105 
Q 8+973   T 981  [x] 976 
Q 552+659 T 12

In [33]:
model.evaluate(x_train, y_train)
model.evaluate(x_val, y_val)



[0.004691528156399727, 0.9991999864578247]

In [34]:
model.save('./models/lstm_digit_addition')



INFO:tensorflow:Assets written to: ./models/lstm_digit_addition\assets


INFO:tensorflow:Assets written to: ./models/lstm_digit_addition\assets
