In [1]:
import numpy as np
import tensorflow as tf

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import TimeDistributed, Dense, Dropout, SimpleRNN, RepeatVector
from tensorflow.keras.callbacks import EarlyStopping, LambdaCallback

from termcolor import colored
print('Using tensorflow version:', tf.__version__)

Using tensorflow version: 2.2.0


In [2]:
all_chars = '0123456789+-'

In [3]:
num_features = len(all_chars)

char_to_index = dict((c, i) for i, c in enumerate(all_chars))
index_to_char = dict((i, c) for i, c in enumerate(all_chars))

print('Number of features:', num_features)

Number of features: 12


In [4]:
def generate_data():
    first_num = np.random.randint(low=0,high=100)
    second_num = np.random.randint(low=0,high=100)
    add = np.squeeze(np.random.randint(low=0, high=100)) > 50.
    if add:
        example = str(first_num) + '+' + str(second_num)
        label = str(first_num+second_num)
    else:
        example = str(first_num) + '-' + str(second_num)
        label = str(first_num-second_num)
    return example, label

generate_data()

('99-64', '35')

In [5]:
hidden_units = 128
max_time_steps = 5

model = Sequential([
    SimpleRNN(hidden_units, input_shape=(None, num_features)),
    RepeatVector(max_time_steps),
    SimpleRNN(hidden_units, return_sequences=True),
    TimeDistributed(Dense(num_features, activation='softmax'))
])

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
simple_rnn (SimpleRNN)       (None, 128)               18048     
_________________________________________________________________
repeat_vector (RepeatVector) (None, 5, 128)            0         
_________________________________________________________________
simple_rnn_1 (SimpleRNN)     (None, 5, 128)            32896     
_________________________________________________________________
time_distributed (TimeDistri (None, 5, 12)             1548      
Total params: 52,492
Trainable params: 52,492
Non-trainable params: 0
_________________________________________________________________


In [6]:
def vectorize_example(example, label):
    
    x = np.zeros((max_time_steps, num_features))
    y = np.zeros((max_time_steps, num_features))
    
    diff_x = max_time_steps - len(example)
    diff_y = max_time_steps - len(label)
    
    for i, c in enumerate(example):
        x[diff_x+i, char_to_index[c]] = 1
    for i in range(diff_x):
        x[i, char_to_index['0']] = 1
    for i, c in enumerate(label):
        y[diff_y+i, char_to_index[c]] = 1
    for i in range(diff_y):
        y[i, char_to_index['0']] = 1
        
    return x, y

e, l = generate_data()
print('Text Example and Label:', e, l)
x, y = vectorize_example(e, l)
print('Vectorized Example and Label Shapes:', x.shape, y.shape)

Text Example and Label: 99-34 65
Vectorized Example and Label Shapes: (5, 12) (5, 12)


In [7]:
def devectorize_example(example):
    result = [index_to_char[np.argmax(vec)] for i, vec in enumerate(example)]
    return ''.join(result)

def strip_padding(example):
    encountered_non_zero = False
    output = ''
    for c in example:
        if not encountered_non_zero and c == '0':
            continue
        if c == '+' or c == '-':
            encountered_non_zero = False
        else:
            encountered_non_zero = True
        output += c
    return output

devectorize_example(x)

'99-34'

In [8]:
print(devectorize_example(y), ':', strip_padding(devectorize_example(y)))

00065 : 65


In [9]:
def create_dataset(num_examples=2000):

    x_train = np.zeros((num_examples, max_time_steps, num_features))
    y_train = np.zeros((num_examples, max_time_steps, num_features))

    for i in range(num_examples):
        e, l = generate_data()
        x, y = vectorize_example(e, l)
        x_train[i] = x
        y_train[i] = y
    
    return x_train, y_train

x_train, y_train = create_dataset(40000)
print(x_train.shape, y_train.shape)

(40000, 5, 12) (40000, 5, 12)


In [10]:
devectorize_example(x_train[0])

'01-50'

In [11]:
devectorize_example(y_train[0])

'00-49'

In [12]:
simple_logger = LambdaCallback(
    on_epoch_end=lambda e, l: print('{:.2f}'.format(l['val_accuracy']), end=' _ ')
)
early_stopping = EarlyStopping(monitor='val_loss', patience=10)

model.fit(x_train, y_train, epochs=100, validation_split=0.2, verbose=False,
         batch_size=1024, callbacks=[simple_logger, early_stopping])

0.61 _ 0.63 _ 0.65 _ 0.66 _ 0.68 _ 0.70 _ 0.72 _ 0.73 _ 0.75 _ 0.76 _ 0.78 _ 0.79 _ 0.79 _ 0.79 _ 0.82 _ 0.85 _ 0.87 _ 0.89 _ 0.91 _ 0.92 _ 0.93 _ 0.95 _ 0.95 _ 0.96 _ 0.96 _ 0.97 _ 0.97 _ 0.97 _ 0.98 _ 0.98 _ 0.99 _ 0.98 _ 0.99 _ 0.99 _ 0.99 _ 0.99 _ 0.99 _ 0.99 _ 0.99 _ 0.99 _ 0.99 _ 0.99 _ 0.99 _ 0.99 _ 0.99 _ 0.99 _ 0.99 _ 0.99 _ 0.99 _ 1.00 _ 1.00 _ 1.00 _ 1.00 _ 1.00 _ 1.00 _ 1.00 _ 1.00 _ 1.00 _ 1.00 _ 1.00 _ 1.00 _ 0.99 _ 0.99 _ 0.99 _ 0.99 _ 0.99 _ 1.00 _ 1.00 _ 1.00 _ 1.00 _ 1.00 _ 1.00 _ 1.00 _ 1.00 _ 1.00 _ 1.00 _ 1.00 _ 1.00 _ 1.00 _ 1.00 _ 1.00 _ 1.00 _ 1.00 _ 1.00 _ 1.00 _ 1.00 _ 1.00 _ 1.00 _ 1.00 _ 1.00 _ 1.00 _ 1.00 _ 1.00 _ 0.89 _ 0.99 _ 1.00 _ 1.00 _ 1.00 _ 1.00 _ 1.00 _ 

<tensorflow.python.keras.callbacks.History at 0x7fe7a0426f60>

Create a test set and look at some predictions:

In [13]:
x_test, y_test = create_dataset(num_examples=1000)
preds = model.predict(x_test)
full_seq_acc = 0

for i, pred in enumerate(preds):
    pred_str = strip_padding(devectorize_example(pred))
    y_test_str = strip_padding(devectorize_example(y_test[i]))
    x_test_str = strip_padding(devectorize_example(x_test[i]))
    col = 'green' if pred_str == y_test_str else 'red'
    full_seq_acc += 1/len(preds) * int(pred_str == y_test_str)
    outstring = 'Input: {}, Out: {}, Pred: {}'.format(x_test_str, y_test_str, pred_str)
    print(colored(outstring, col))
print('\nFull sequence accuracy: {:.3f} %'.format(100 * full_seq_acc))

[32mInput: 69+28, Out: 97, Pred: 97[0m
[32mInput: 39+7, Out: 46, Pred: 46[0m
[32mInput: 94-52, Out: 42, Pred: 42[0m
[32mInput: 98+94, Out: 192, Pred: 192[0m
[32mInput: 39+77, Out: 116, Pred: 116[0m
[32mInput: 91+27, Out: 118, Pred: 118[0m
[31mInput: 68-98, Out: -30, Pred: -20[0m
[32mInput: 67-62, Out: 5, Pred: 5[0m
[32mInput: 6-45, Out: -39, Pred: -39[0m
[32mInput: 75+84, Out: 159, Pred: 159[0m
[32mInput: 61-16, Out: 45, Pred: 45[0m
[32mInput: 62+3, Out: 65, Pred: 65[0m
[32mInput: 6-35, Out: -29, Pred: -29[0m
[32mInput: 99+89, Out: 188, Pred: 188[0m
[32mInput: 84-35, Out: 49, Pred: 49[0m
[32mInput: 53-89, Out: -36, Pred: -36[0m
[32mInput: 28+73, Out: 101, Pred: 101[0m
[32mInput: 13+28, Out: 41, Pred: 41[0m
[32mInput: 76+69, Out: 145, Pred: 145[0m
[32mInput: 50-26, Out: 24, Pred: 24[0m
[32mInput: 38-77, Out: -39, Pred: -39[0m
[32mInput: 44+45, Out: 89, Pred: 89[0m
[32mInput: 67+61, Out: 128, Pred: 128[0m
[32mInput: 94-95, Out: -1, Pred: -1