Making My Model Learn Addition

In [1]:
# Introduction

import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import TimeDistributed,Dense,Dropout,SimpleRNN, RepeatVector
from tensorflow.keras.callbacks import EarlyStopping, LambdaCallback

from termcolor import colored

In [2]:
#generate data

all_chars = '0123456789+'

# convert into one hot encoding vectors and the dimension of the vector will be equal to the length of the string(all_chars)
num_features = len(all_chars)
print(num_features)

11


In [3]:
# create a dict to tokenize the character into indexes
# dict with character values as keys and indexes as values
char_to_index = dict((c,i) for i,c in enumerate(all_chars))
index_to_char = dict((i,c) for i,c in enumerate(all_chars))

In [4]:
# generate a simple data

def generate_data():
  first = np.random.randint(0,100)
  second = np.random.randint(0,100)

  example = str(first) + '+' + str(second)
  label = str(first+ second)

  return example, label

generate_data()

('95+52', '147')

In [16]:
# create the model
# rnn helps us to input varied lengths and output varied lengths
# our model has 2 simple RNNs one is encoder and the other is the decoder.
# rnn models are really difficult to make using manual code but keras does easy for us. Put all the steps into the Sequential class.

hidden_units = 128
max_time_steps = 5

model = Sequential([
                    SimpleRNN(hidden_units, input_shape = (None, num_features)),
                    RepeatVector(max_time_steps), # to vectorize the output
                    SimpleRNN(hidden_units, return_sequences=True),
                    # now we want a dense layer with a softmax activation fn because we need the sequence as the output but we will embed the dense
                    # layer here with a time distributed layer cause we need to predict for each step the probability scores of each character in the sequence
                    TimeDistributed(Dense(num_features, activation='softmax'))
])

model.compile(
    loss = 'categorical_crossentropy',
    optimizer = 'adam',
    metrics = ['accuracy']
)

model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
simple_rnn_2 (SimpleRNN)     (None, 128)               17920     
_________________________________________________________________
repeat_vector_1 (RepeatVecto (None, 5, 128)            0         
_________________________________________________________________
simple_rnn_3 (SimpleRNN)     (None, 5, 128)            32896     
_________________________________________________________________
time_distributed_1 (TimeDist (None, 5, 11)             1419      
Total params: 52,235
Trainable params: 52,235
Non-trainable params: 0
_________________________________________________________________


In [6]:
# vectorize and devectorize our data
def vectorize_data(example, label):
  x = np.zeros((max_time_steps, num_features))
  y = np.zeros((max_time_steps, num_features))

  diff_x = max_time_steps - len(example)
  diff_y = max_time_steps - len(label)

  for i,c in enumerate(example):
    x[i+diff_x, char_to_index[c]] = 1
  
  for i in range(diff_x):
    x[i, char_to_index['0']] = 1

    
  for i,c in enumerate(label):
    y[i+diff_y, char_to_index[c]] = 1
  
  for i in range(diff_y):
    y[i, char_to_index['0']] = 1

  return x,y

e,l = generate_data()
print(e,l)

x,y = vectorize_data(e,l)
print(x.shape, y.shape)

45+11 56
(5, 11) (5, 11)


In [8]:
def devectorize_example(example):
  result = [index_to_char[np.argmax(vec)] for i,vec in enumerate(example)]
  return ''.join(result)

devectorize_example(x)

'45+11'

In [9]:
devectorize_example(y)

'00056'

In [35]:
# create a dataset to train our model

def create_dataset(num_examples = 2000):
  x = np.zeros((num_examples, max_time_steps, num_features))
  y = np.zeros((num_examples, max_time_steps, num_features))

  for i in range(num_examples):
    e,l = generate_data()
    e_v, l_v = vectorize_data(e,l)
    x[i] = e_v
    y[i] = l_v

  return x,y

x,y = create_dataset()
print(x.shape, y.shape)
#print(x[1])

(2000, 5, 11) (2000, 5, 11)
[[0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]]


In [12]:
devectorize_example(x[0])

'21+71'

In [13]:
devectorize_example(y[0])

'00092'

In [36]:
#training the model
l_cb = LambdaCallback(
    on_epoch_end = lambda e,l: print('{:.2f}'.format(l['val_accuracy']), end = '_')
)

es_cb = EarlyStopping(
    monitor = 'val_loss',
    patience = 10
)
model.fit(x,y, epochs = 500, batch_size = 256, validation_split=0.2, verbose=False, callbacks=[es_cb, l_cb])


0.58_0.60_0.61_0.62_0.63_0.63_0.63_0.64_0.66_0.66_0.67_0.68_0.68_0.70_0.69_0.69_0.71_0.71_0.73_0.72_0.73_0.73_0.74_0.74_0.76_0.72_0.75_0.76_0.75_0.77_0.76_0.76_0.76_0.76_0.77_0.77_0.77_0.77_0.76_0.77_0.78_0.78_0.79_0.79_0.79_0.80_0.80_0.81_0.81_0.81_0.81_0.82_0.82_0.83_0.83_0.83_0.84_0.84_0.85_0.84_0.85_0.85_0.86_0.87_0.86_0.87_0.87_0.86_0.87_0.88_0.87_0.88_0.88_0.87_0.89_0.89_0.88_0.89_0.90_0.89_0.90_0.89_0.90_0.90_0.90_0.89_0.90_0.90_0.91_0.91_0.91_0.91_0.91_0.91_0.91_0.91_0.91_0.91_0.92_0.92_0.92_0.92_0.92_0.92_0.92_0.92_0.93_0.93_0.93_0.93_0.93_0.93_0.93_0.93_0.92_0.92_0.92_0.93_0.93_0.93_0.94_0.94_0.94_0.93_0.94_0.94_0.93_0.94_0.94_0.93_0.94_0.94_0.94_0.94_0.93_0.94_0.94_0.94_0.94_0.94_0.94_0.94_0.94_0.94_0.94_0.94_0.94_0.94_0.94_0.94_0.94_0.94_0.94_0.94_0.94_0.94_0.94_0.94_0.94_0.94_0.94_0.94_0.94_0.94_0.94_0.94_0.94_0.94_0.94_0.94_0.94_0.94_0.94_0.94_0.94_0.94_0.95_0.94_0.94_0.95_0.94_0.94_0.94_0.94_0.94_0.94_0.94_0.95_0.94_0.94_0.94_0.94_0.95_0.94_0.95_0.94_0.94_0.95_0.94_0.94_

<tensorflow.python.keras.callbacks.History at 0x7f517db9feb8>

In [37]:
x_test, y_test = create_dataset(10)

preds = model.predict(x_test)

for i, pred in enumerate(preds):
  y = devectorize_example(y_test[i])
  y_hat = devectorize_example(pred)

  col = 'green'
  if y != y_hat:
    color = 'red'
  
  out = 'input:' + devectorize_example(x_test[i]) + ' out:' + y + ' pred:' + y_hat
  print(colored(out,col))


[32minput:001+6 out:00007 pred:00007[0m
[32minput:010+3 out:00013 pred:00014[0m
[32minput:33+29 out:00062 pred:00062[0m
[32minput:14+48 out:00062 pred:00061[0m
[32minput:57+54 out:00111 pred:00111[0m
[32minput:074+8 out:00082 pred:00082[0m
[32minput:68+34 out:00102 pred:00102[0m
[32minput:98+69 out:00167 pred:00166[0m
[32minput:042+8 out:00050 pred:00041[0m
[32minput:76+16 out:00092 pred:00092[0m
