In [1]:
import numpy as np
from keras.models import Model, Sequential
from keras.layers import Input, LSTM, Dense, RNN
from keras import layers

Using TensorFlow backend.


In [2]:
class colors:
    ok = '\033[92m'
    fail = '\033[91m'
    close = '\033[0m'

In [3]:
TRAINING_SIZE = 80000
DIGITS = 3
REVERSE = False
MAXLEN = DIGITS + 1 + DIGITS
chars = '0123456789+ '
RNN = layers.LSTM
HIDDEN_SIZE = 128
BATCH_SIZE = 128
LAYERS = 1

In [4]:
class CharacterTable(object):
    def __init__(self, chars):
        self.chars = sorted(set(chars))
        self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
        self.indices_char = dict((i, c) for i, c in enumerate(self.chars))
    
    def encode(self, C, num_rows):
        x = np.zeros((num_rows, len(self.chars)))
        for i, c in enumerate(C):
            x[i, self.char_indices[c]] = 1
        return x
    
    def decode(self, x, calc_argmax=True):
        if calc_argmax:
            x = x.argmax(axis=-1)
        return "".join(self.indices_char[i] for i in x)

In [5]:
ctable = CharacterTable(chars)

In [6]:
questions_with_plus = []
expected_with_plus = []
seen_with_plus = set()
print('Generating data...')
while len(questions_with_plus) < TRAINING_SIZE:
    f = lambda: int(''.join(np.random.choice(list('0123456789')) for i in range(np.random.randint(1, DIGITS + 1))))
    a, b = f(), f()
    key = tuple(sorted((a, b)))
    if key in seen_with_plus:
        continue
    seen_with_plus.add(key)
    q = '{}+{}'.format(a, b)
    query = q + ' ' * (MAXLEN - len(q))
    ans = str(a + b)
    ans += ' ' * (DIGITS + 1 - len(ans))
    if REVERSE:
        query = query[::-1]
    questions_with_plus.append(query)
    expected_with_plus.append(ans)
print('Total addition questions:', len(questions_with_plus))

Generating data...
Total addition questions: 80000


# Processing
## transfer data to one-hot representation

In [7]:
print(questions_with_plus[:5], expected_with_plus[:5])

['8+36   ', '49+2   ', '9+16   ', '9+918  ', '592+5  '] ['44  ', '51  ', '25  ', '927 ', '597 ']


In [8]:
questions = questions_with_plus
expected = expected_with_plus

## Split data into training, validation, testing

In [9]:
print('Vectorization...')
x = np.zeros((len(questions), MAXLEN, len(chars)), dtype=np.bool)
y = np.zeros((len(expected), DIGITS + 1, len(chars)), dtype=np.bool)
for i, sentence in enumerate(questions):
    x[i] = ctable.encode(sentence, MAXLEN)
for i, sentence in enumerate(expected):
    y[i] = ctable.encode(sentence, DIGITS + 1)

Vectorization...


In [10]:
indices = np.arange(len(y))
np.random.shuffle(indices)
x = x[indices]
y = y[indices]

# train_test_split
train_x = x[:20000]
train_y = y[:20000]
test_x = x[20000:]
test_y = y[20000:]


split_at = len(train_x) - len(train_x) // 10
(x_train, x_val) = train_x[:split_at], train_x[split_at:]
(y_train, y_val) = train_y[:split_at], train_y[split_at:]

print('Training Data:')
print(x_train.shape)
print(y_train.shape)

print('Validation Data:')
print(x_val.shape)
print(y_val.shape)

print('Testing Data:')
print(test_x.shape)
print(test_y.shape)

Training Data:
(18000, 7, 12)
(18000, 4, 12)
Validation Data:
(2000, 7, 12)
(2000, 4, 12)
Testing Data:
(60000, 7, 12)
(60000, 4, 12)


In [11]:
# print("input: ", x_train[:3], '\n\n', "label: ", y_train[:3])

input:  [[[False False False False False  True False False False False False
   False]
  [False False False False False False False False False  True False
   False]
  [False False False False False False False  True False False False
   False]
  [False  True False False False False False False False False False
   False]
  [False False False False False False False  True False False False
   False]
  [False False False False False False  True False False False False
   False]
  [ True False False False False False False False False False False
   False]]

 [[False False False False  True False False False False False False
   False]
  [False False False False False  True False False False False False
   False]
  [False  True False False False False False False False False False
   False]
  [False False False False False False False  True False False False
   False]
  [False False False False False False False False False  True False
   False]
  [False False False False False  True Fal

## Build Model

In [17]:
from keras.layers import *

model = Sequential()
model.add(LSTM(HIDDEN_SIZE, input_shape=(MAXLEN, len(chars))))
model.add(layers.RepeatVector(DIGITS + 1))
model.add(LSTM(HIDDEN_SIZE, return_sequences=True))
model.add(layers.TimeDistributed(layers.Dense(len(chars), activation='softmax')))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_8 (LSTM)                (None, 128)               72192     
_________________________________________________________________
repeat_vector_5 (RepeatVecto (None, 4, 128)            0         
_________________________________________________________________
lstm_9 (LSTM)                (None, 4, 128)            131584    
_________________________________________________________________
time_distributed_2 (TimeDist (None, 4, 12)             1548      
Total params: 205,324
Trainable params: 205,324
Non-trainable params: 0
_________________________________________________________________


In [18]:
trainData = x_train
trainTarget = y_train
testData = test_x
testTarget = test_y

print("~~~")
for iteration in range(100):
    print()
    print('-' * 50)
    print('Iteration', iteration)
    model.fit(trainData, trainTarget,
              epochs=1,
              batch_size=100,
              shuffle=True,
              validation_data=(x_val, y_val),
              verbose =2,
              )
    for i in range(10):
        ind = np.random.randint(0, len(testData))
        rowx, rowy = testData[np.array([ind])], testTarget[np.array([ind])]
        preds = model.predict(rowx)[0]
        q = ctable.decode(rowx[0])
        correct = ctable.decode(rowy[0])
        guess = ctable.decode(preds)
        print('Q', q[::-1] if REVERSE else q, end=' ')
        print('T', correct, end=' ')
        if correct == guess:
            print(colors.ok + '☑' + colors.close, end=' ')
        else:
            print(colors.fail + '☒' + colors.close, end=' ')
        print(guess)

~~~

--------------------------------------------------
Iteration 0
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
 - 254s - loss: 1.9902 - acc: 0.2948 - val_loss: 1.8717 - val_acc: 0.3241
Q 543+19  T 562  [91m☒[0m 137 
Q 86+466  T 552  [91m☒[0m 107 
Q 99+647  T 746  [91m☒[0m 104 
Q 668+182 T 850  [91m☒[0m 110 
Q 20+656  T 676  [91m☒[0m 137 
Q 875+32  T 907  [91m☒[0m 137 
Q 610+528 T 1138 [91m☒[0m 100 
Q 6+43    T 49   [91m☒[0m 13  
Q 48+7    T 55   [91m☒[0m 13  
Q 44+928  T 972  [91m☒[0m 137 

--------------------------------------------------
Iteration 1
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
 - 249s - loss: 1.8356 - acc: 0.3310 - val_loss: 1.8139 - val_acc: 0.3329
Q 937+66  T 1003 [91m☒[0m 104 
Q 46+993  T 1039 [91m☒[0m 104 
Q 36+639  T 675  [91m☒[0m 104 
Q 3+878   T 881  [91m☒[0m 444 
Q 665+119 T 784  [91m☒[0m 104 
Q 625+28  T 653  [91m☒[0m 434 
Q 187+40  T 227  [91m☒[0m 104 
Q 27+598  T 625  [91m☒[0m 104 
Q 24+185

Q 701+1   T 702  [91m☒[0m 802 
Q 6+564   T 570  [91m☒[0m 561 
Q 613+57  T 670  [91m☒[0m 685 
Q 70+181  T 251  [91m☒[0m 265 
Q 178+12  T 190  [91m☒[0m 109 
Q 212+0   T 212  [91m☒[0m 211 
Q 54+330  T 384  [91m☒[0m 379 

--------------------------------------------------
Iteration 16
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
 - 303s - loss: 1.0361 - acc: 0.6147 - val_loss: 1.0540 - val_acc: 0.5984
Q 45+603  T 648  [91m☒[0m 655 
Q 4+82    T 86   [91m☒[0m 94  
Q 32+720  T 752  [91m☒[0m 741 
Q 986+403 T 1389 [91m☒[0m 1365
Q 175+242 T 417  [91m☒[0m 421 
Q 321+791 T 1112 [91m☒[0m 1115
Q 274+429 T 703  [91m☒[0m 748 
Q 353+283 T 636  [91m☒[0m 618 
Q 371+217 T 588  [91m☒[0m 698 
Q 723+4   T 727  [91m☒[0m 726 

--------------------------------------------------
Iteration 17
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
 - 243s - loss: 0.9919 - acc: 0.6339 - val_loss: 1.0119 - val_acc: 0.6144
Q 411+860 T 1271 [91m☒[0m 1212
Q 5+333   

Train on 18000 samples, validate on 2000 samples
Epoch 1/1
 - 85s - loss: 0.2533 - acc: 0.9397 - val_loss: 0.3033 - val_acc: 0.9110
Q 654+50  T 704  [92m☑[0m 704 
Q 857+98  T 955  [92m☑[0m 955 
Q 36+474  T 510  [92m☑[0m 510 
Q 282+19  T 301  [91m☒[0m 201 
Q 1+758   T 759  [92m☑[0m 759 
Q 84+240  T 324  [92m☑[0m 324 
Q 635+807 T 1442 [91m☒[0m 1443
Q 10+27   T 37   [92m☑[0m 37  
Q 30+290  T 320  [92m☑[0m 320 
Q 962+38  T 1000 [92m☑[0m 1000

--------------------------------------------------
Iteration 32
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
 - 99s - loss: 0.2233 - acc: 0.9512 - val_loss: 0.2500 - val_acc: 0.9304
Q 28+96   T 124  [92m☑[0m 124 
Q 257+9   T 266  [92m☑[0m 266 
Q 407+710 T 1117 [92m☑[0m 1117
Q 35+356  T 391  [92m☑[0m 391 
Q 267+572 T 839  [92m☑[0m 839 
Q 245+473 T 718  [92m☑[0m 718 
Q 875+375 T 1250 [92m☑[0m 1250
Q 543+836 T 1379 [92m☑[0m 1379
Q 118+392 T 510  [91m☒[0m 519 
Q 26+178  T 204  [92m☑[0m 204 

----------

Q 86+538  T 624  [92m☑[0m 624 
Q 635+3   T 638  [92m☑[0m 638 
Q 50+561  T 611  [92m☑[0m 611 
Q 522+540 T 1062 [92m☑[0m 1062
Q 95+770  T 865  [92m☑[0m 865 

--------------------------------------------------
Iteration 47
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
 - 56s - loss: 0.0684 - acc: 0.9885 - val_loss: 0.1241 - val_acc: 0.9631
Q 536+577 T 1113 [92m☑[0m 1113
Q 910+959 T 1869 [91m☒[0m 1879
Q 381+319 T 700  [91m☒[0m 790 
Q 234+54  T 288  [92m☑[0m 288 
Q 81+949  T 1030 [92m☑[0m 1030
Q 8+498   T 506  [92m☑[0m 506 
Q 750+31  T 781  [92m☑[0m 781 
Q 327+861 T 1188 [92m☑[0m 1188
Q 197+45  T 242  [92m☑[0m 242 
Q 914+227 T 1141 [92m☑[0m 1141

--------------------------------------------------
Iteration 48
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
 - 90s - loss: 0.1050 - acc: 0.9707 - val_loss: 0.0949 - val_acc: 0.9743
Q 300+971 T 1271 [92m☑[0m 1271
Q 360+55  T 415  [92m☑[0m 415 
Q 732+581 T 1313 [92m☑[0m 1313
Q 449+44  T 

 - 99s - loss: 0.0207 - acc: 0.9982 - val_loss: 0.0578 - val_acc: 0.9828
Q 87+931  T 1018 [92m☑[0m 1018
Q 605+366 T 971  [92m☑[0m 971 
Q 86+76   T 162  [92m☑[0m 162 
Q 84+305  T 389  [92m☑[0m 389 
Q 637+35  T 672  [92m☑[0m 672 
Q 0+840   T 840  [92m☑[0m 840 
Q 448+651 T 1099 [92m☑[0m 1099
Q 44+34   T 78   [92m☑[0m 78  
Q 19+589  T 608  [92m☑[0m 608 
Q 955+15  T 970  [92m☑[0m 970 

--------------------------------------------------
Iteration 63
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
 - 105s - loss: 0.0202 - acc: 0.9979 - val_loss: 0.1073 - val_acc: 0.9656
Q 151+419 T 570  [92m☑[0m 570 
Q 55+121  T 176  [92m☑[0m 176 
Q 268+475 T 743  [92m☑[0m 743 
Q 287+300 T 587  [91m☒[0m 588 
Q 74+529  T 603  [92m☑[0m 603 
Q 602+48  T 650  [92m☑[0m 650 
Q 396+0   T 396  [92m☑[0m 396 
Q 83+25   T 108  [92m☑[0m 108 
Q 117+371 T 488  [92m☑[0m 488 
Q 493+5   T 498  [92m☑[0m 498 

--------------------------------------------------
Iteration 64
Trai

Q 22+695  T 717  [92m☑[0m 717 
Q 963+0   T 963  [92m☑[0m 963 
Q 542+583 T 1125 [92m☑[0m 1125
Q 33+471  T 504  [92m☑[0m 504 

--------------------------------------------------
Iteration 78
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
 - 245s - loss: 0.0089 - acc: 0.9995 - val_loss: 0.0446 - val_acc: 0.9864
Q 342+331 T 673  [92m☑[0m 673 
Q 247+307 T 554  [92m☑[0m 554 
Q 50+50   T 100  [91m☒[0m 101 
Q 84+186  T 270  [92m☑[0m 270 
Q 894+66  T 960  [91m☒[0m 950 
Q 374+69  T 443  [92m☑[0m 443 
Q 1+646   T 647  [92m☑[0m 647 
Q 491+774 T 1265 [92m☑[0m 1265
Q 980+96  T 1076 [92m☑[0m 1076
Q 540+2   T 542  [92m☑[0m 542 

--------------------------------------------------
Iteration 79
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
 - 286s - loss: 0.0088 - acc: 0.9995 - val_loss: 0.0412 - val_acc: 0.9869
Q 813+99  T 912  [92m☑[0m 912 
Q 862+76  T 938  [92m☑[0m 938 
Q 3+118   T 121  [92m☑[0m 121 
Q 838+33  T 871  [92m☑[0m 871 
Q 333+14  

Q 403+67  T 470  [92m☑[0m 470 
Q 74+917  T 991  [92m☑[0m 991 
Q 61+249  T 310  [92m☑[0m 310 
Q 972+7   T 979  [92m☑[0m 979 
Q 87+557  T 644  [92m☑[0m 644 
Q 560+43  T 603  [92m☑[0m 603 
Q 357+733 T 1090 [92m☑[0m 1090
Q 534+79  T 613  [92m☑[0m 613 
Q 133+50  T 183  [92m☑[0m 183 
Q 294+501 T 795  [91m☒[0m 895 

--------------------------------------------------
Iteration 94
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
 - 137s - loss: 0.0056 - acc: 0.9999 - val_loss: 0.0375 - val_acc: 0.9875
Q 28+569  T 597  [92m☑[0m 597 
Q 61+846  T 907  [92m☑[0m 907 
Q 53+432  T 485  [92m☑[0m 485 
Q 0+621   T 621  [92m☑[0m 621 
Q 984+792 T 1776 [92m☑[0m 1776
Q 441+164 T 605  [92m☑[0m 605 
Q 945+799 T 1744 [92m☑[0m 1744
Q 15+765  T 780  [92m☑[0m 780 
Q 69+888  T 957  [92m☑[0m 957 
Q 4+224   T 228  [92m☑[0m 228 

--------------------------------------------------
Iteration 95
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
 - 114s - loss: 0.

## test

In [None]:
score, acc = model.evaluate(testData, testTarget)
print('Test score:', score)
print('Test accuracy:', acc)