# Training an RNN with Keras to add strings!

Given the string "54+7", the model should return a prediction: "61".

In [None]:
import numpy as np

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import TimeDistributed, Dense, Dropout, SimpleRNN, RepeatVector
from tensorflow.keras.callbacks import EarlyStopping, LambdaCallback

from termcolor import colored

## Generate Data


In [None]:
all_chars = '0123456789+'

In [None]:
num_features = len(all_chars)

In [None]:
char_to_indx = dict((char, indx) for indx, char in enumerate(all_chars))
char_to_indx

{'0': 0,
 '1': 1,
 '2': 2,
 '3': 3,
 '4': 4,
 '5': 5,
 '6': 6,
 '7': 7,
 '8': 8,
 '9': 9,
 '+': 10}

In [None]:
indx_to_char = dict((indx, char) for indx, char in enumerate(all_chars))
indx_to_char

{0: '0',
 1: '1',
 2: '2',
 3: '3',
 4: '4',
 5: '5',
 6: '6',
 7: '7',
 8: '8',
 9: '9',
 10: '+'}

In [None]:
def generate_data():
    first = np.random.randint(0,100)
    second = np.random.randint(0,100)
    example = str(first) + '+' + str(second)
    label = str(first + second)
    return example, label

generate_data()

('35+88', '123')

## Create the Model


In [None]:
hidden_units = 128
max_time_steps = 5   # 99 + 99: 5 digits

model = Sequential([
    SimpleRNN(hidden_units, input_shape=(None, num_features)),
    RepeatVector(max_time_steps),
    SimpleRNN(hidden_units, return_sequences=True),
    TimeDistributed(Dense(num_features, activation='softmax'))
])

model.compile(
    loss = 'categorical_crossentropy',
    optimizer = 'adam',
    metrics = ['accuracy']
)

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
simple_rnn (SimpleRNN)       (None, 128)               17920     
_________________________________________________________________
repeat_vector (RepeatVector) (None, 5, 128)            0         
_________________________________________________________________
simple_rnn_1 (SimpleRNN)     (None, 5, 128)            32896     
_________________________________________________________________
time_distributed (TimeDistri (None, 5, 11)             1419      
Total params: 52,235
Trainable params: 52,235
Non-trainable params: 0
_________________________________________________________________


## Vectorize and De-Vectorize Data


In [None]:
def vectorized_example(example, label):
    x = np.zeros((max_time_steps, num_features))
    y = np.zeros((max_time_steps, num_features))
    
    diff_x = max_time_steps - len(example)
    diff_y = max_time_steps - len(label)
    
    for indx, char in enumerate(example):
        x[indx + diff_x, char_to_indx[char]] = 1
    for indx in range(diff_x):
        x[indx, char_to_indx['0']] = 1
        
    for indx, char in enumerate(label):
        y[indx + diff_y, char_to_indx[char]] = 1
    for indx in range(diff_y):
        y[indx, char_to_indx['0']] = 1
        
    return x, y

e, l = generate_data()
print(e, l)
x, y = vectorized_example(e, l)
print(x.shape, y.shape)

66+73 139
(5, 11) (5, 11)


In [None]:
def devectorized_example(example):
    result = [indx_to_char[np.argmax(vec)] for i, vec in enumerate(example)]
    return ''.join(result)

devectorized_example(x)

'66+73'

## Create Dataset


In [None]:
def create_dataset(num_examples = 10000):
    x = np.zeros((num_examples, max_time_steps, num_features))
    y = np.zeros((num_examples, max_time_steps, num_features))
    
    for i in range(num_examples):
        e, l = generate_data()
        e_vec, l_vec = vectorized_example(e, l)
        x[i] = e_vec
        y[i] = l_vec
        
    return x, y
x, y = create_dataset()

## Training the Model


In [None]:
l_cb = LambdaCallback(
    on_epoch_end = lambda epoch, logs: print('{:.2f}'.format(logs['val_acc']), end = ' _ ')
)

es_cb = EarlyStopping(
    monitor = 'val_loss',
    patience = 10
)

In [None]:
model.fit(x, y, 
          epochs = 500, 
          batch_size = 256, 
          validation_split = 0.2,
          verbose = False,
          callbacks = [es_cb, l_cb]
         )

0.62 _ 0.63 _ 0.65 _ 0.66 _ 0.69 _ 0.72 _ 0.75 _ 0.76 _ 0.77 _ 0.76 _ 0.77 _ 0.79 _ 0.81 _ 0.82 _ 0.83 _ 0.87 _ 0.88 _ 0.92 _ 0.92 _ 0.91 _ 0.95 _ 0.96 _ 0.96 _ 0.97 _ 0.98 _ 0.97 _ 0.98 _ 0.98 _ 0.98 _ 0.99 _ 0.99 _ 0.98 _ 0.99 _ 0.99 _ 0.99 _ 0.99 _ 0.99 _ 0.99 _ 0.99 _ 0.99 _ 0.99 _ 0.99 _ 1.00 _ 1.00 _ 1.00 _ 1.00 _ 1.00 _ 1.00 _ 1.00 _ 0.96 _ 0.99 _ 1.00 _ 1.00 _ 1.00 _ 1.00 _ 1.00 _ 1.00 _ 1.00 _ 1.00 _ 1.00 _ 1.00 _ 1.00 _ 1.00 _ 1.00 _ 1.00 _ 1.00 _ 1.00 _ 1.00 _ 1.00 _ 1.00 _ 0.99 _ 0.99 _ 0.96 _ 0.99 _ 1.00 _ 1.00 _ 1.00 _ 1.00 _ 

<tensorflow.python.keras.callbacks.History at 0x20b8f28c780>

In [None]:
x_test, y_test = create_dataset(20)
preds = model.predict(x_test)

for i, pred in enumerate(preds):
    y = devectorized_example(y_test[i])
    y_hat = devectorized_example(pred)
    color = 'green'
    if y != y_hat:
        color = 'red'
    out = 'Input: ' + devectorized_example(x_test[i]) + ', Out: ' + y + ', Pred: ' + y_hat
    print(colored(out, color))

[32mInput: 02+68, Out: 00070, Pred: 00070[0m
[32mInput: 00+45, Out: 00045, Pred: 00045[0m
[32mInput: 25+87, Out: 00112, Pred: 00112[0m
[32mInput: 66+40, Out: 00106, Pred: 00106[0m
[32mInput: 94+37, Out: 00131, Pred: 00131[0m
[32mInput: 21+63, Out: 00084, Pred: 00084[0m
[32mInput: 00+32, Out: 00032, Pred: 00032[0m
[32mInput: 79+32, Out: 00111, Pred: 00111[0m
[32mInput: 13+62, Out: 00075, Pred: 00075[0m
[32mInput: 77+76, Out: 00153, Pred: 00153[0m
[32mInput: 88+92, Out: 00180, Pred: 00180[0m
[32mInput: 90+89, Out: 00179, Pred: 00179[0m
[32mInput: 064+7, Out: 00071, Pred: 00071[0m
[32mInput: 96+77, Out: 00173, Pred: 00173[0m
[32mInput: 01+44, Out: 00045, Pred: 00045[0m
[32mInput: 050+9, Out: 00059, Pred: 00059[0m
[32mInput: 54+17, Out: 00071, Pred: 00071[0m
[32mInput: 55+49, Out: 00104, Pred: 00104[0m
[32mInput: 61+60, Out: 00121, Pred: 00121[0m
[32mInput: 32+34, Out: 00066, Pred: 00066[0m
