### Making Your Model Learn Addition!

Given the string "54+7", the model should return a prediction: "61".

In [None]:
#importing the required libraries
import numpy as np

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import TimeDistributed, Dense, Dropout, SimpleRNN, RepeatVector
from tensorflow.keras.callbacks import EarlyStopping, LambdaCallback

from termcolor import colored

In [None]:
all_chars = '0123456789+'
#list of all the possible characters

In [None]:
#dimension for the one hot encoding of characters
num_features = len(all_chars)

#to tokenize the character to numeric values
char_to_index = dict((c, i) for i, c in enumerate(all_chars))
#to inverse the above dictionary
index_to_char = dict((i, c) for i, c in enumerate(all_chars))

print('Number of features:', num_features)

Number of features: 11


In [None]:
#function to create a single example and label pair
def generate_data():
    first_num = np.random.randint(low=0,high=100)
    second_num = np.random.randint(low=0,high=100)
    example = str(first_num) + '+' + str(second_num)
    label = str(first_num+second_num)
    return example, label
#to check the function working
generate_data()

('93+31', '124')

Since to understand the reviews properly where the word came in sentence is also important along with the word.

Consider these two reviews:

Review 1: This movie is not terrible at all.

Review 2: This movie is pretty decent.

In [None]:
#creation of simple RNN model
hidden_units = 128
max_time_steps = 5
#maximum length of the input expression

#we will use simple RNN layer
#encoder is the simple RNN layer
#to acheive the single vector representation of the entire input we will use repeat vector
#now decoder will contain another simple RNN layer which will return the sequence
#so that model understands that the hidden layer will vary according to the time stamps so we will add the dense layer in the time distriuted 
model = Sequential([
    SimpleRNN(hidden_units, input_shape=(None, num_features)),
    RepeatVector(max_time_steps),
    SimpleRNN(hidden_units, return_sequences=True),
    TimeDistributed(Dense(num_features, activation='softmax'))
])

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
simple_rnn (SimpleRNN)       (None, 128)               17920     
_________________________________________________________________
repeat_vector (RepeatVector) (None, 5, 128)            0         
_________________________________________________________________
simple_rnn_1 (SimpleRNN)     (None, 5, 128)            32896     
_________________________________________________________________
time_distributed (TimeDistri (None, 5, 11)             1419      
Total params: 52,235
Trainable params: 52,235
Non-trainable params: 0
_________________________________________________________________


In [None]:
#Function to vectorize the examples and labels
def vectorize_example(example, label):
    
    #placeholder for the example and labels, and we will make them of same length
    x = np.zeros((max_time_steps, num_features))
    y = np.zeros((max_time_steps, num_features))
    
    #to check how much extra zeros added
    diff_x = max_time_steps - len(example)
    diff_y = max_time_steps - len(label)
    
    #doing the one hot encoding
    for i, c in enumerate(example):
        x[diff_x+i, char_to_index[c]] = 1
    #for the padding at the begining
    for i in range(diff_x):
        x[i, char_to_index['0']] = 1
    #same process for the labels
    for i, c in enumerate(label):
        y[diff_y+i, char_to_index[c]] = 1
    for i in range(diff_y):
        y[i, char_to_index['0']] = 1
        
    return x, y

#to check the vectorizer function
e, l = generate_data()
print('Text Example and Label:', e, l)
x, y = vectorize_example(e, l)
#shape of the vectorized example
print('Vectorized Example and Label Shapes:', x.shape, y.shape)

Text Example and Label: 29+60 89
Vectorized Example and Label Shapes: (5, 11) (5, 11)


In [None]:
#devectorization of the examples, that is reverse of the above process
def devectorize_example(example):
    result = [index_to_char[np.argmax(vec)] for i, vec in enumerate(example)]
    return ''.join(result)
#we will use the index_to char dictionary for the process to get back the normal example
#checking the function
devectorize_example(x)

'29+60'

In [None]:
devectorize_example(y)
#checking the function on label
#it will do additional padding which won't affect the labels

'00089'

In [None]:
#function to create the dataset
def create_dataset(num_examples=2000):
    #placeholders for training data and labels
    x_train = np.zeros((num_examples, max_time_steps, num_features))
    y_train = np.zeros((num_examples, max_time_steps, num_features))

    for i in range(num_examples):
        e, l = generate_data()
        #generating the data
        x, y = vectorize_example(e, l)
        #vectorizing the generated data
        x_train[i] = x
        y_train[i] = y
    
    return x_train, y_train

#checking the dataset shape
x_train, y_train = create_dataset()
print(x_train.shape, y_train.shape)

(2000, 5, 11) (2000, 5, 11)


In [None]:
#to check the devectorized of the example
devectorize_example(x_train[0])

'61+89'

In [None]:
#devectorizing the corresponding label
devectorize_example(y_train[0])

'00150'

In [None]:
#training of the model
#lambda callback for just printing the validation accuracy
l_cb = LambdaCallback(
    on_epoch_end = lambda e, l: print('{:.2f}'.format(l['val_acc']),end = ' _ ')
)
#early stopping callback for stopping the training early  using the validation loss
es_cb = EarlyStopping(monitor = 'val_loss', patience = 10)
#training of the model
model.fit(x_train, y_train, epochs =500, batch_size = 256, validation_split = 0.2,
         verbose = False, callbacks = [es_cb, l_cb])

0.55 _ 0.60 _ 0.62 _ 0.62 _ 0.62 _ 0.62 _ 0.63 _ 0.63 _ 0.63 _ 0.64 _ 0.64 _ 0.65 _ 0.65 _ 0.65 _ 0.65 _ 0.66 _ 0.66 _ 0.66 _ 0.68 _ 0.67 _ 0.68 _ 0.68 _ 0.70 _ 0.70 _ 0.70 _ 0.71 _ 0.71 _ 0.71 _ 0.72 _ 0.71 _ 0.73 _ 0.73 _ 0.73 _ 0.73 _ 0.74 _ 0.74 _ 0.75 _ 0.75 _ 0.74 _ 0.75 _ 0.76 _ 0.76 _ 0.76 _ 0.75 _ 0.76 _ 0.77 _ 0.77 _ 0.78 _ 0.78 _ 0.78 _ 0.79 _ 0.79 _ 0.80 _ 0.80 _ 0.80 _ 0.80 _ 0.81 _ 0.81 _ 0.82 _ 0.83 _ 0.82 _ 0.83 _ 0.83 _ 0.83 _ 0.83 _ 0.84 _ 0.85 _ 0.85 _ 0.86 _ 0.85 _ 0.86 _ 0.87 _ 0.87 _ 0.87 _ 0.88 _ 0.88 _ 0.88 _ 0.88 _ 0.89 _ 0.88 _ 0.89 _ 0.88 _ 0.89 _ 0.89 _ 0.89 _ 0.90 _ 0.89 _ 0.90 _ 0.90 _ 0.91 _ 0.91 _ 0.91 _ 0.91 _ 0.92 _ 0.91 _ 0.91 _ 0.92 _ 0.91 _ 0.92 _ 0.92 _ 0.92 _ 0.93 _ 0.93 _ 0.93 _ 0.93 _ 0.93 _ 0.93 _ 0.92 _ 0.93 _ 0.93 _ 0.93 _ 0.93 _ 0.93 _ 0.94 _ 0.93 _ 0.93 _ 0.94 _ 0.93 _ 0.93 _ 0.94 _ 0.94 _ 0.94 _ 0.94 _ 0.95 _ 0.94 _ 0.95 _ 0.95 _ 0.94 _ 0.95 _ 0.95 _ 0.95 _ 0.95 _ 0.95 _ 0.95 _ 0.95 _ 0.95 _ 0.95 _ 0.95 _ 0.95 _ 0.95 _ 0.95 _ 0.95 _ 0.95 _

<tensorflow.python.keras.callbacks.History at 0x26f6c9dbc50>

In [None]:
#evaluating the performnace of the model
x_test, y_test = create_dataset(10)
preds = model.predict(x_test)

for i, pred in enumerate(preds):
    y = devectorize_example(y_test[i])
    y_hat = devectorize_example(pred)
    col = 'blue'
    if y!= y_hat:
        col = 'red'
    out = 'Input: '+ devectorize_example(x_test[i]) + ' Out: '+y + ' Pred: ' + y_hat
    print(colored(out, col))


[34mInput: 86+52 Out: 00138 Pred: 00138[0m
[34mInput: 48+30 Out: 00078 Pred: 00078[0m
[34mInput: 60+26 Out: 00086 Pred: 00086[0m
[34mInput: 55+93 Out: 00148 Pred: 00148[0m
[34mInput: 46+49 Out: 00095 Pred: 00095[0m
[34mInput: 66+77 Out: 00143 Pred: 00143[0m
[34mInput: 81+63 Out: 00144 Pred: 00144[0m
[31mInput: 52+37 Out: 00089 Pred: 00099[0m
[34mInput: 24+31 Out: 00055 Pred: 00055[0m
[34mInput: 70+45 Out: 00115 Pred: 00115[0m
