In [1]:
from keras.models import Sequential
from keras import layers
import numpy as np
from six.moves import range

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
class colors:
    ok = '\033[92m'
    fail = '\033[91m'
    close = '\033[0m'

In [20]:
TRAINING_SIZE = 50000
DIGITS = 3
REVERSE = False
MAXLEN = DIGITS + 1 + DIGITS
chars = '0123456789+ '
RNN = layers.LSTM
HIDDEN_SIZE = 128
BATCH_SIZE = 128
LAYERS = 1

In [21]:
class CharacterTable(object):
    def __init__(self, chars):
        self.chars = sorted(set(chars))
        self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
        self.indices_char = dict((i, c) for i, c in enumerate(self.chars))
    
    def encode(self, C, num_rows):
        x = np.zeros((num_rows, len(self.chars)))
        for i, c in enumerate(C):
            x[i, self.char_indices[c]] = 1
        return x
    
    def decode(self, x, calc_argmax=True):
        if calc_argmax:
            x = x.argmax(axis=-1)
        return "".join(self.indices_char[i] for i in x)

In [22]:
ctable = CharacterTable(chars)

In [23]:
ctable.indices_char

{0: ' ',
 1: '+',
 2: '0',
 3: '1',
 4: '2',
 5: '3',
 6: '4',
 7: '5',
 8: '6',
 9: '7',
 10: '8',
 11: '9'}

In [24]:
questions = []
expected = []
seen = set()
print('Generating data...')
while len(questions) < TRAINING_SIZE:
    f = lambda: int(''.join(np.random.choice(list('0123456789')) for i in range(np.random.randint(1, DIGITS + 1))))
    a, b = f(), f()
    key = tuple(sorted((a, b)))
    if key in seen:
        continue
    seen.add(key)
    q = '{}+{}'.format(a, b)
    query = q + ' ' * (MAXLEN - len(q))
    ans = str(a + b)
    ans += ' ' * (DIGITS + 1 - len(ans))
    if REVERSE:
        query = query[::-1]
    questions.append(query)
    expected.append(ans)
print('Total addition questions:', len(questions))

Generating data...
Total addition questions: 50000


In [30]:
print(questions[:5], expected[:5])

['7+9    ', '8+26   ', '94+89  ', '33+884 ', '34+184 '] ['16  ', '34  ', '183 ', '917 ', '218 ']


In [25]:
print('Vectorization...')
x = np.zeros((len(questions), MAXLEN, len(chars)), dtype=np.bool)
y = np.zeros((len(expected), DIGITS + 1, len(chars)), dtype=np.bool)
for i, sentence in enumerate(questions):
    x[i] = ctable.encode(sentence, MAXLEN)
for i, sentence in enumerate(expected):
    y[i] = ctable.encode(sentence, DIGITS + 1)

Vectorization...


In [26]:
indices = np.arange(len(y))
np.random.shuffle(indices)
x = x[indices]
y = y[indices]

split_at = len(x) - len(x) // 10
(x_train, x_val) = x[:split_at], x[split_at:]
(y_train, y_val) = y[:split_at], y[split_at:]

print('Training Data:')
print(x_train.shape)
print(y_train.shape)

print('Validation Data:')
print(x_val.shape)
print(y_val.shape)

Training Data:
(45000, 7, 12)
(45000, 4, 12)
Validation Data:
(5000, 7, 12)
(5000, 4, 12)


In [27]:
print(x_train[:10], y_train[:10])

[[[False False False False  True False False False False False False
   False]
  [False False False False False False False  True False False False
   False]
  [False False False False False False False False  True False False
   False]
  [False  True False False False False False False False False False
   False]
  [False False False False False False False False  True False False
   False]
  [False False False False False False  True False False False False
   False]
  [False False False False False False False  True False False False
   False]]

 [[False False False False False False False False  True False False
   False]
  [False False False False False False False False False  True False
   False]
  [False False False  True False False False False False False False
   False]
  [False  True False False False False False False False False False
   False]
  [False False False False False False False False False  True False
   False]
  [False False False False False False False False

In [28]:
print('Build model...')
model = Sequential()
model.add(RNN(HIDDEN_SIZE, input_shape=(MAXLEN, len(chars))))
model.add(layers.RepeatVector(DIGITS + 1))
for _ in range(LAYERS):
    model.add(RNN(HIDDEN_SIZE, return_sequences=True))

model.add(layers.TimeDistributed(layers.Dense(len(chars))))
model.add(layers.Activation('softmax'))
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
model.summary()

Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_3 (LSTM)                (None, 128)               72192     
_________________________________________________________________
repeat_vector_2 (RepeatVecto (None, 4, 128)            0         
_________________________________________________________________
lstm_4 (LSTM)                (None, 4, 128)            131584    
_________________________________________________________________
time_distributed_2 (TimeDist (None, 4, 12)             1548      
_________________________________________________________________
activation_2 (Activation)    (None, 4, 12)             0         
Total params: 205,324
Trainable params: 205,324
Non-trainable params: 0
_________________________________________________________________


In [29]:
for iteration in range(40):
    print()
    print('-' * 50)
    print('Iteration', iteration)
    model.fit(x_train, y_train,
              batch_size=BATCH_SIZE,
              epochs=1,
              validation_data=(x_val, y_val))
    for i in range(10):
        ind = np.random.randint(0, len(x_val))
        rowx, rowy = x_val[np.array([ind])], y_val[np.array([ind])]
        preds = model.predict_classes(rowx, verbose=0)
        q = ctable.decode(rowx[0])
        correct = ctable.decode(rowy[0])
        guess = ctable.decode(preds[0], calc_argmax=False)
        print('Q', q[::-1] if REVERSE else q, end=' ')
        print('T', correct, end=' ')
        if correct == guess:
            print(colors.ok + '☑' + colors.close, end=' ')
        else:
            print(colors.fail + '☒' + colors.close, end=' ')
        print(guess)


--------------------------------------------------
Iteration 0
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 104+12  T 116  [91m☒[0m 135 
Q 332+23  T 355  [91m☒[0m 333 
Q 327+75  T 402  [91m☒[0m 138 
Q 39+92   T 131  [91m☒[0m 195 
Q 13+680  T 693  [91m☒[0m 138 
Q 100+82  T 182  [91m☒[0m 198 
Q 856+84  T 940  [91m☒[0m 108 
Q 49+432  T 481  [91m☒[0m 104 
Q 88+95   T 183  [91m☒[0m 108 
Q 599+995 T 1594 [91m☒[0m 1198

--------------------------------------------------
Iteration 1
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 432+8   T 440  [91m☒[0m 33  
Q 474+81  T 555  [91m☒[0m 500 
Q 822+307 T 1129 [91m☒[0m 138 
Q 909+196 T 1105 [91m☒[0m 1108
Q 844+811 T 1655 [91m☒[0m 1408
Q 40+386  T 426  [91m☒[0m 408 
Q 85+960  T 1045 [91m☒[0m 107 
Q 707+31  T 738  [91m☒[0m 708 
Q 77+800  T 877  [91m☒[0m 888 
Q 735+482 T 1217 [91m☒[0m 1108

--------------------------------------------------
Iteration 2
Train on 45000 samples, valida

Q 847+3   T 850  [92m☑[0m 850 
Q 772+19  T 791  [92m☑[0m 791 
Q 98+285  T 383  [92m☑[0m 383 
Q 455+49  T 504  [92m☑[0m 504 
Q 58+912  T 970  [92m☑[0m 970 
Q 70+170  T 240  [92m☑[0m 240 
Q 549+31  T 580  [91m☒[0m 570 
Q 40+28   T 68   [92m☑[0m 68  
Q 594+404 T 998  [92m☑[0m 998 
Q 383+84  T 467  [92m☑[0m 467 

--------------------------------------------------
Iteration 15
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 844+94  T 938  [92m☑[0m 938 
Q 715+566 T 1281 [91m☒[0m 1371
Q 26+156  T 182  [92m☑[0m 182 
Q 298+32  T 330  [91m☒[0m 320 
Q 932+39  T 971  [91m☒[0m 981 
Q 28+672  T 700  [91m☒[0m 690 
Q 97+423  T 520  [91m☒[0m 510 
Q 8+367   T 375  [92m☑[0m 375 
Q 39+230  T 269  [92m☑[0m 269 
Q 915+0   T 915  [92m☑[0m 915 

--------------------------------------------------
Iteration 16
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 4+479   T 483  [92m☑[0m 483 
Q 854+660 T 1514 [91m☒[0m 1524
Q 79+509  T 588  [92m☑[0

Q 536+902 T 1438 [92m☑[0m 1438
Q 882+8   T 890  [92m☑[0m 890 
Q 8+254   T 262  [92m☑[0m 262 
Q 12+650  T 662  [92m☑[0m 662 
Q 557+6   T 563  [92m☑[0m 563 
Q 955+79  T 1034 [92m☑[0m 1034
Q 673+0   T 673  [92m☑[0m 673 
Q 785+19  T 804  [92m☑[0m 804 
Q 928+885 T 1813 [92m☑[0m 1813
Q 19+444  T 463  [92m☑[0m 463 

--------------------------------------------------
Iteration 29
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 664+15  T 679  [92m☑[0m 679 
Q 15+722  T 737  [92m☑[0m 737 
Q 361+46  T 407  [92m☑[0m 407 
Q 210+362 T 572  [92m☑[0m 572 
Q 94+809  T 903  [92m☑[0m 903 
Q 725+5   T 730  [92m☑[0m 730 
Q 729+822 T 1551 [92m☑[0m 1551
Q 5+179   T 184  [92m☑[0m 184 
Q 725+76  T 801  [92m☑[0m 801 
Q 334+53  T 387  [92m☑[0m 387 

--------------------------------------------------
Iteration 30
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 427+22  T 449  [92m☑[0m 449 
Q 107+954 T 1061 [92m☑[0m 1061
Q 750+908 T 1658 [91m☒[0

In [36]:
print("MSG : Prediction")
test_x = ["555+275", "860+7  ", "340+29 "]
test_y = ["830 ", "867 ", "369 "]
x = np.zeros((len(test_x), MAXLEN, len(chars)), dtype=np.bool)
y = np.zeros((len(test_y), DIGITS + 1, len(chars)), dtype=np.bool)
for j, (i, c) in enumerate(zip(test_x, test_y)):
    x[j] = ctable.encode(i, MAXLEN)
    y[j] = ctable.encode(c, DIGITS + 1)

MSG : Prediction


In [37]:
x = np.array(x)
y = np.array(y)
preds = model.predict_classes(x, verbose=0)
for i in range(len(preds)):
    q = ctable.decode(x[i])
    correct = ctable.decode(y[i])
    guess = ctable.decode(preds[i], calc_argmax=False)
    print('Q', q[::-1] if REVERSE else q, end=' ')
    print('T', correct, end=' ')
    if correct == guess:
        print(colors.ok + '☑' + colors.close, end=' ')
    else:
        print(colors.fail + '☒' + colors.close, end=' ')
    print(guess)

Q 555+275 T 830  [92m☑[0m 830 
Q 860+7   T 867  [92m☑[0m 867 
Q 340+29  T 369  [92m☑[0m 369 
