In [2]:
# -*- coding: utf-8 -*-
'''
# An implementation of sequence to sequence learning for performing addition

Input: "535+61"
Output: "596"
Padding is handled by using a repeated sentinel character (space)

Input may optionally be reversed, shown to increase performance in many tasks in:
"Learning to Execute"
http://arxiv.org/abs/1410.4615
and
"Sequence to Sequence Learning with Neural Networks"
http://papers.nips.cc/paper/5346-sequence-to-sequence-learning-with-neural-networks.pdf
Theoretically it introduces shorter term dependencies between source and target.

Two digits reversed:
+ One layer LSTM (128 HN), 5k training examples = 99% train/test accuracy in 55 epochs

Three digits reversed:
+ One layer LSTM (128 HN), 50k training examples = 99% train/test accuracy in 100 epochs

Four digits reversed:
+ One layer LSTM (128 HN), 400k training examples = 99% train/test accuracy in 20 epochs

Five digits reversed:
+ One layer LSTM (128 HN), 550k training examples = 99% train/test accuracy in 30 epochs
'''  # noqa

from __future__ import print_function
from keras.models import Sequential
from keras import layers
import numpy as np
from six.moves import range

Using TensorFlow backend.


In [3]:
class CharacterTable(object):
    """Given a set of characters:
    + Encode them to a one-hot integer representation
    + Decode the one-hot or integer representation to their character output
    + Decode a vector of probabilities to their character output
    """
    def __init__(self, chars):
        """Initialize character table.

        # Arguments
            chars: Characters that can appear in the input.
        """
        self.chars = sorted(set(chars))
        self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
        self.indices_char = dict((i, c) for i, c in enumerate(self.chars))

    def encode(self, C, num_rows):
        """One-hot encode given string C.

        # Arguments
            C: string, to be encoded.
            num_rows: Number of rows in the returned one-hot encoding. This is
                used to keep the # of rows for each data the same.
        """
        x = np.zeros((num_rows, len(self.chars)))
        for i, c in enumerate(C):
            x[i, self.char_indices[c]] = 1
        return x

    def decode(self, x, calc_argmax=True):
        """Decode the given vector or 2D array to their character output.

        # Arguments
            x: A vector or a 2D array of probabilities or one-hot representations;
                or a vector of character indices (used with `calc_argmax=False`).
            calc_argmax: Whether to find the character index with maximum
                probability, defaults to `True`.
        """
        if calc_argmax:
            x = x.argmax(axis=-1)
        return ''.join(self.indices_char[x] for x in x)

In [4]:
class colors:
    ok = '\033[92m'
    fail = '\033[91m'
    close = '\033[0m'

In [5]:
# Parameters for the model and dataset.
TRAINING_SIZE = 50000
DIGITS = 3
REVERSE = True

# Maximum length of input is 'int + int' (e.g., '345+678'). Maximum length of
# int is DIGITS.
MAXLEN = DIGITS + 1 + DIGITS

In [6]:
# All the numbers, plus sign and space for padding.
chars = '0123456789+ '
ctable = CharacterTable(chars)

In [7]:
questions = []
expected = []
seen = set()
print('Generating data...')
while len(questions) < TRAINING_SIZE:
    f = lambda: int(''.join(np.random.choice(list('0123456789'))
                    for i in range(np.random.randint(1, DIGITS + 1))))
    a, b = f(), f()
    # Skip any addition questions we've already seen
    # Also skip any such that x+Y == Y+x (hence the sorting).
    key = tuple(sorted((a, b)))
    if key in seen:
        continue
    seen.add(key)
    # Pad the data with spaces such that it is always MAXLEN.
    q = '{}+{}'.format(a, b)
    query = q + ' ' * (MAXLEN - len(q))
    ans = str(a + b)
    # Answers can be of maximum size DIGITS + 1.
    ans += ' ' * (DIGITS + 1 - len(ans))
    if REVERSE:
        # Reverse the query, e.g., '12+345  ' becomes '  543+21'. (Note the
        # space used for padding.)
        query = query[::-1]
    questions.append(query)
    expected.append(ans)
print('Total addition questions:', len(questions))

Generating data...
Total addition questions: 50000


In [8]:
print('Vectorization...')
x = np.zeros((len(questions), MAXLEN, len(chars)), dtype=np.bool)
y = np.zeros((len(questions), DIGITS + 1, len(chars)), dtype=np.bool)
for i, sentence in enumerate(questions):
    x[i] = ctable.encode(sentence, MAXLEN)
for i, sentence in enumerate(expected):
    y[i] = ctable.encode(sentence, DIGITS + 1)

Vectorization...


In [9]:
# Shuffle (x, y) in unison as the later parts of x will almost all be larger
# digits.
indices = np.arange(len(y))
np.random.shuffle(indices)
x = x[indices]
y = y[indices]

# Explicitly set apart 10% for validation data that we never train over.
split_at = len(x) - len(x) // 10
(x_train, x_val) = x[:split_at], x[split_at:]
(y_train, y_val) = y[:split_at], y[split_at:]

print('Training Data:')
print(x_train.shape)
print(y_train.shape)

print('Validation Data:')
print(x_val.shape)
print(y_val.shape)

Training Data:
(45000, 7, 12)
(45000, 4, 12)
Validation Data:
(5000, 7, 12)
(5000, 4, 12)


In [10]:
# Try replacing GRU, or SimpleRNN.
RNN = layers.LSTM
HIDDEN_SIZE = 128
BATCH_SIZE = 128
LAYERS = 1

In [11]:
print('Build model...')
model = Sequential()
# "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE.
# Note: In a situation where your input sequences have a variable length,
# use input_shape=(None, num_feature).
model.add(RNN(HIDDEN_SIZE, input_shape=(MAXLEN, len(chars))))
# As the decoder RNN's input, repeatedly provide with the last output of
# RNN for each time step. Repeat 'DIGITS + 1' times as that's the maximum
# length of output, e.g., when DIGITS=3, max output is 999+999=1998.
model.add(layers.RepeatVector(DIGITS + 1))
# The decoder RNN could be multiple layers stacked or a single layer.
for _ in range(LAYERS):
    # By setting return_sequences to True, return not only the last output but
    # all the outputs so far in the form of (num_samples, timesteps,
    # output_dim). This is necessary as TimeDistributed in the below expects
    # the first dimension to be the timesteps.
    model.add(RNN(HIDDEN_SIZE, return_sequences=True))

# Apply a dense layer to the every temporal slice of an input. For each of step
# of the output sequence, decide which character should be chosen.
model.add(layers.TimeDistributed(layers.Dense(len(chars), activation='softmax')))
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
model.summary()

W0722 12:27:13.544011  1588 deprecation_wrapper.py:119] From C:\Users\lilanqing\Anaconda3\lib\site-packages\keras\backend\tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.



Build model...


W0722 12:27:14.022901  1588 deprecation_wrapper.py:119] From C:\Users\lilanqing\Anaconda3\lib\site-packages\keras\backend\tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0722 12:27:14.120470  1588 deprecation_wrapper.py:119] From C:\Users\lilanqing\Anaconda3\lib\site-packages\keras\backend\tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0722 12:27:14.973714  1588 deprecation_wrapper.py:119] From C:\Users\lilanqing\Anaconda3\lib\site-packages\keras\optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.

W0722 12:27:15.003582  1588 deprecation_wrapper.py:119] From C:\Users\lilanqing\Anaconda3\lib\site-packages\keras\backend\tensorflow_backend.py:3295: The name tf.log is deprecated. Please use tf.math.log instead.



_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 128)               72192     
_________________________________________________________________
repeat_vector_1 (RepeatVecto (None, 4, 128)            0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 4, 128)            131584    
_________________________________________________________________
time_distributed_1 (TimeDist (None, 4, 12)             1548      
Total params: 205,324
Trainable params: 205,324
Non-trainable params: 0
_________________________________________________________________


In [12]:
# Train the model each generation and show predictions against the validation
# dataset.
for iteration in range(1, 200):
    print()
    print('-' * 50)
    print('Iteration', iteration)
    model.fit(x_train, y_train,
              batch_size=BATCH_SIZE,
              epochs=1,
              validation_data=(x_val, y_val))
    # Select 10 samples from the validation set at random so we can visualize
    # errors.
    for i in range(10):
        ind = np.random.randint(0, len(x_val))
        rowx, rowy = x_val[np.array([ind])], y_val[np.array([ind])]
        preds = model.predict_classes(rowx, verbose=0)
        q = ctable.decode(rowx[0])
        correct = ctable.decode(rowy[0])
        guess = ctable.decode(preds[0], calc_argmax=False)
        print('Q', q[::-1] if REVERSE else q, end=' ')
        print('T', correct, end=' ')
        if correct == guess:
            print(colors.ok + '☑' + colors.close, end=' ')
        else:
            print(colors.fail + '☒' + colors.close, end=' ')
        print(guess)


--------------------------------------------------
Iteration 1


W0722 12:27:15.629316  1588 deprecation.py:323] From C:\Users\lilanqing\Anaconda3\lib\site-packages\tensorflow\python\ops\math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
W0722 12:27:17.328616  1588 deprecation_wrapper.py:119] From C:\Users\lilanqing\Anaconda3\lib\site-packages\keras\backend\tensorflow_backend.py:986: The name tf.assign_add is deprecated. Please use tf.compat.v1.assign_add instead.



Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 31+964  T 995  [91m☒[0m 102 
Q 25+836  T 861  [91m☒[0m 122 
Q 76+616  T 692  [91m☒[0m 102 
Q 490+289 T 779  [91m☒[0m 102 
Q 130+591 T 721  [91m☒[0m 102 
Q 51+213  T 264  [91m☒[0m 321 
Q 988+739 T 1727 [91m☒[0m 1113
Q 551+8   T 559  [91m☒[0m 52  
Q 942+9   T 951  [91m☒[0m 102 
Q 542+0   T 542  [91m☒[0m 42  

--------------------------------------------------
Iteration 2
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 870+162 T 1032 [91m☒[0m 101 
Q 22+56   T 78   [91m☒[0m 22  
Q 873+1   T 874  [91m☒[0m 101 
Q 435+749 T 1184 [91m☒[0m 102 
Q 879+5   T 884  [91m☒[0m 101 
Q 437+18  T 455  [91m☒[0m 300 
Q 97+749  T 846  [91m☒[0m 100 
Q 0+191   T 191  [91m☒[0m 121 
Q 305+83  T 388  [91m☒[0m 300 
Q 304+348 T 652  [91m☒[0m 500 

--------------------------------------------------
Iteration 3
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 243+1   T 244  [91m☒[0m 334 
Q 

Q 381+20  T 401  [92m☑[0m 401 
Q 91+253  T 344  [92m☑[0m 344 
Q 235+62  T 297  [92m☑[0m 297 
Q 578+6   T 584  [92m☑[0m 584 
Q 422+869 T 1291 [91m☒[0m 1391
Q 1+115   T 116  [92m☑[0m 116 
Q 325+670 T 995  [92m☑[0m 995 
Q 19+936  T 955  [92m☑[0m 955 
Q 958+71  T 1029 [92m☑[0m 1029
Q 5+733   T 738  [92m☑[0m 738 

--------------------------------------------------
Iteration 16
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 501+68  T 569  [91m☒[0m 579 
Q 270+22  T 292  [92m☑[0m 292 
Q 895+39  T 934  [92m☑[0m 934 
Q 4+575   T 579  [91m☒[0m 589 
Q 636+6   T 642  [92m☑[0m 642 
Q 29+687  T 716  [92m☑[0m 716 
Q 24+99   T 123  [92m☑[0m 123 
Q 11+339  T 350  [92m☑[0m 350 
Q 44+301  T 345  [92m☑[0m 345 
Q 688+861 T 1549 [92m☑[0m 1549

--------------------------------------------------
Iteration 17
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 32+248  T 280  [92m☑[0m 280 
Q 410+91  T 501  [92m☑[0m 501 
Q 128+64  T 192  [92m☑[0

Q 2+158   T 160  [92m☑[0m 160 
Q 1+554   T 555  [92m☑[0m 555 
Q 719+3   T 722  [92m☑[0m 722 
Q 217+71  T 288  [92m☑[0m 288 
Q 2+487   T 489  [92m☑[0m 489 
Q 305+3   T 308  [92m☑[0m 308 
Q 129+29  T 158  [92m☑[0m 158 
Q 673+82  T 755  [92m☑[0m 755 
Q 749+224 T 973  [92m☑[0m 973 
Q 998+902 T 1900 [92m☑[0m 1900

--------------------------------------------------
Iteration 30
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 763+44  T 807  [92m☑[0m 807 
Q 4+18    T 22   [92m☑[0m 22  
Q 879+5   T 884  [92m☑[0m 884 
Q 5+637   T 642  [92m☑[0m 642 
Q 28+497  T 525  [92m☑[0m 525 
Q 46+278  T 324  [92m☑[0m 324 
Q 854+943 T 1797 [92m☑[0m 1797
Q 720+35  T 755  [92m☑[0m 755 
Q 424+920 T 1344 [92m☑[0m 1344
Q 464+679 T 1143 [92m☑[0m 1143

--------------------------------------------------
Iteration 31
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 95+667  T 762  [92m☑[0m 762 
Q 906+5   T 911  [92m☑[0m 911 
Q 67+40   T 107  [92m☑[0

Q 771+98  T 869  [92m☑[0m 869 
Q 80+330  T 410  [92m☑[0m 410 
Q 80+320  T 400  [92m☑[0m 400 
Q 873+1   T 874  [92m☑[0m 874 
Q 91+16   T 107  [92m☑[0m 107 
Q 770+163 T 933  [92m☑[0m 933 
Q 26+937  T 963  [92m☑[0m 963 
Q 3+479   T 482  [92m☑[0m 482 
Q 818+2   T 820  [92m☑[0m 820 
Q 57+657  T 714  [92m☑[0m 714 

--------------------------------------------------
Iteration 44
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 45+21   T 66   [92m☑[0m 66  
Q 59+644  T 703  [92m☑[0m 703 
Q 56+492  T 548  [92m☑[0m 548 
Q 722+34  T 756  [92m☑[0m 756 
Q 377+99  T 476  [92m☑[0m 476 
Q 771+77  T 848  [92m☑[0m 848 
Q 16+75   T 91   [92m☑[0m 91  
Q 628+953 T 1581 [92m☑[0m 1581
Q 33+44   T 77   [92m☑[0m 77  
Q 660+321 T 981  [92m☑[0m 981 

--------------------------------------------------
Iteration 45
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 78+424  T 502  [92m☑[0m 502 
Q 770+8   T 778  [92m☑[0m 778 
Q 26+265  T 291  [92m☑[0

Q 634+82  T 716  [92m☑[0m 716 
Q 22+6    T 28   [92m☑[0m 28  
Q 9+779   T 788  [92m☑[0m 788 
Q 230+56  T 286  [92m☑[0m 286 
Q 8+222   T 230  [92m☑[0m 230 
Q 568+7   T 575  [92m☑[0m 575 
Q 94+452  T 546  [92m☑[0m 546 
Q 968+964 T 1932 [92m☑[0m 1932
Q 937+49  T 986  [92m☑[0m 986 
Q 23+735  T 758  [92m☑[0m 758 

--------------------------------------------------
Iteration 58
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 573+3   T 576  [92m☑[0m 576 
Q 946+264 T 1210 [92m☑[0m 1210
Q 314+453 T 767  [92m☑[0m 767 
Q 72+5    T 77   [92m☑[0m 77  
Q 571+91  T 662  [92m☑[0m 662 
Q 80+280  T 360  [92m☑[0m 360 
Q 66+588  T 654  [92m☑[0m 654 
Q 35+459  T 494  [92m☑[0m 494 
Q 63+836  T 899  [92m☑[0m 899 
Q 987+27  T 1014 [92m☑[0m 1014

--------------------------------------------------
Iteration 59
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 30+840  T 870  [92m☑[0m 870 
Q 29+349  T 378  [92m☑[0m 378 
Q 322+25  T 347  [92m☑[0

Q 7+553   T 560  [92m☑[0m 560 
Q 3+477   T 480  [92m☑[0m 480 
Q 29+711  T 740  [92m☑[0m 740 
Q 33+168  T 201  [92m☑[0m 201 
Q 24+948  T 972  [92m☑[0m 972 
Q 178+97  T 275  [92m☑[0m 275 
Q 80+333  T 413  [92m☑[0m 413 
Q 3+790   T 793  [92m☑[0m 793 
Q 698+74  T 772  [92m☑[0m 772 
Q 56+296  T 352  [92m☑[0m 352 

--------------------------------------------------
Iteration 72
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 439+937 T 1376 [92m☑[0m 1376
Q 60+95   T 155  [92m☑[0m 155 
Q 36+39   T 75   [92m☑[0m 75  
Q 768+87  T 855  [92m☑[0m 855 
Q 1+549   T 550  [92m☑[0m 550 
Q 79+771  T 850  [92m☑[0m 850 
Q 25+836  T 861  [92m☑[0m 861 
Q 33+70   T 103  [92m☑[0m 103 
Q 79+258  T 337  [92m☑[0m 337 
Q 757+739 T 1496 [91m☒[0m 1596

--------------------------------------------------
Iteration 73
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 489+815 T 1304 [92m☑[0m 1304
Q 76+954  T 1030 [92m☑[0m 1030
Q 96+971  T 1067 [92m☑[0

Q 518+6   T 524  [92m☑[0m 524 
Q 109+709 T 818  [91m☒[0m 828 
Q 778+126 T 904  [92m☑[0m 904 
Q 31+400  T 431  [92m☑[0m 431 
Q 853+492 T 1345 [92m☑[0m 1345
Q 4+553   T 557  [92m☑[0m 557 
Q 12+391  T 403  [92m☑[0m 403 
Q 693+64  T 757  [92m☑[0m 757 
Q 691+17  T 708  [92m☑[0m 708 
Q 68+542  T 610  [92m☑[0m 610 

--------------------------------------------------
Iteration 86
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 737+6   T 743  [92m☑[0m 743 
Q 672+475 T 1147 [92m☑[0m 1147
Q 74+514  T 588  [92m☑[0m 588 
Q 22+149  T 171  [92m☑[0m 171 
Q 483+26  T 509  [92m☑[0m 509 
Q 720+58  T 778  [92m☑[0m 778 
Q 19+213  T 232  [92m☑[0m 232 
Q 939+297 T 1236 [92m☑[0m 1236
Q 764+8   T 772  [92m☑[0m 772 
Q 2+120   T 122  [92m☑[0m 122 

--------------------------------------------------
Iteration 87
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 98+863  T 961  [92m☑[0m 961 
Q 15+291  T 306  [92m☑[0m 306 
Q 63+242  T 305  [92m☑[0

Q 731+0   T 731  [92m☑[0m 731 
Q 35+924  T 959  [92m☑[0m 959 
Q 810+81  T 891  [92m☑[0m 891 
Q 475+902 T 1377 [92m☑[0m 1377
Q 38+985  T 1023 [92m☑[0m 1023
Q 54+118  T 172  [92m☑[0m 172 
Q 696+59  T 755  [92m☑[0m 755 
Q 78+40   T 118  [92m☑[0m 118 
Q 90+791  T 881  [92m☑[0m 881 
Q 603+86  T 689  [92m☑[0m 689 

--------------------------------------------------
Iteration 100
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 176+729 T 905  [92m☑[0m 905 
Q 720+34  T 754  [92m☑[0m 754 
Q 659+901 T 1560 [92m☑[0m 1560
Q 170+388 T 558  [92m☑[0m 558 
Q 430+65  T 495  [92m☑[0m 495 
Q 32+671  T 703  [92m☑[0m 703 
Q 8+132   T 140  [92m☑[0m 140 
Q 631+91  T 722  [92m☑[0m 722 
Q 496+792 T 1288 [92m☑[0m 1288
Q 58+103  T 161  [92m☑[0m 161 

--------------------------------------------------
Iteration 101
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 287+573 T 860  [92m☑[0m 860 
Q 293+3   T 296  [92m☑[0m 296 
Q 760+530 T 1290 [92m☑

Q 495+207 T 702  [92m☑[0m 702 
Q 43+676  T 719  [92m☑[0m 719 
Q 121+62  T 183  [92m☑[0m 183 
Q 474+85  T 559  [92m☑[0m 559 
Q 260+713 T 973  [92m☑[0m 973 
Q 555+876 T 1431 [92m☑[0m 1431
Q 98+88   T 186  [92m☑[0m 186 
Q 634+748 T 1382 [92m☑[0m 1382
Q 786+10  T 796  [92m☑[0m 796 
Q 987+867 T 1854 [92m☑[0m 1854

--------------------------------------------------
Iteration 114
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 564+262 T 826  [92m☑[0m 826 
Q 911+167 T 1078 [92m☑[0m 1078
Q 29+788  T 817  [92m☑[0m 817 
Q 36+828  T 864  [92m☑[0m 864 
Q 35+984  T 1019 [92m☑[0m 1019
Q 5+102   T 107  [92m☑[0m 107 
Q 97+664  T 761  [92m☑[0m 761 
Q 7+275   T 282  [92m☑[0m 282 
Q 688+5   T 693  [92m☑[0m 693 
Q 125+53  T 178  [92m☑[0m 178 

--------------------------------------------------
Iteration 115
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 79+840  T 919  [92m☑[0m 919 
Q 36+981  T 1017 [92m☑[0m 1017
Q 182+276 T 458  [92m☑

Q 340+756 T 1096 [92m☑[0m 1096
Q 482+72  T 554  [92m☑[0m 554 
Q 140+262 T 402  [92m☑[0m 402 
Q 40+225  T 265  [92m☑[0m 265 
Q 667+88  T 755  [92m☑[0m 755 
Q 485+28  T 513  [92m☑[0m 513 
Q 851+413 T 1264 [92m☑[0m 1264
Q 124+2   T 126  [92m☑[0m 126 
Q 3+884   T 887  [92m☑[0m 887 
Q 695+68  T 763  [92m☑[0m 763 

--------------------------------------------------
Iteration 128
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 46+167  T 213  [92m☑[0m 213 
Q 655+265 T 920  [92m☑[0m 920 
Q 128+64  T 192  [92m☑[0m 192 
Q 20+61   T 81   [92m☑[0m 81  
Q 536+908 T 1444 [92m☑[0m 1444
Q 29+687  T 716  [92m☑[0m 716 
Q 902+293 T 1195 [92m☑[0m 1195
Q 9+27    T 36   [92m☑[0m 36  
Q 19+443  T 462  [92m☑[0m 462 
Q 95+307  T 402  [92m☑[0m 402 

--------------------------------------------------
Iteration 129
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 87+279  T 366  [92m☑[0m 366 
Q 122+27  T 149  [92m☑[0m 149 
Q 97+88   T 185  [92m☑

Q 53+231  T 284  [92m☑[0m 284 
Q 47+253  T 300  [92m☑[0m 300 
Q 792+95  T 887  [92m☑[0m 887 
Q 13+91   T 104  [92m☑[0m 104 
Q 878+344 T 1222 [92m☑[0m 1222
Q 49+60   T 109  [92m☑[0m 109 
Q 758+904 T 1662 [92m☑[0m 1662
Q 81+626  T 707  [92m☑[0m 707 
Q 658+18  T 676  [92m☑[0m 676 
Q 690+324 T 1014 [92m☑[0m 1014

--------------------------------------------------
Iteration 142
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 169+80  T 249  [92m☑[0m 249 
Q 82+240  T 322  [92m☑[0m 322 
Q 598+451 T 1049 [92m☑[0m 1049
Q 96+855  T 951  [92m☑[0m 951 
Q 92+417  T 509  [91m☒[0m 609 
Q 8+185   T 193  [92m☑[0m 193 
Q 57+299  T 356  [92m☑[0m 356 
Q 636+95  T 731  [92m☑[0m 731 
Q 44+372  T 416  [92m☑[0m 416 
Q 99+5    T 104  [92m☑[0m 104 

--------------------------------------------------
Iteration 143
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 232+4   T 236  [92m☑[0m 236 
Q 558+256 T 814  [92m☑[0m 814 
Q 16+987  T 1003 [92m☑

Q 50+651  T 701  [92m☑[0m 701 
Q 48+405  T 453  [92m☑[0m 453 
Q 270+83  T 353  [92m☑[0m 353 
Q 27+602  T 629  [92m☑[0m 629 
Q 66+588  T 654  [92m☑[0m 654 
Q 609+915 T 1524 [92m☑[0m 1524
Q 109+6   T 115  [92m☑[0m 115 
Q 304+348 T 652  [92m☑[0m 652 
Q 988+21  T 1009 [92m☑[0m 1009
Q 594+81  T 675  [92m☑[0m 675 

--------------------------------------------------
Iteration 156
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 28+9    T 37   [92m☑[0m 37  
Q 66+23   T 89   [92m☑[0m 89  
Q 1+554   T 555  [92m☑[0m 555 
Q 7+587   T 594  [92m☑[0m 594 
Q 379+63  T 442  [92m☑[0m 442 
Q 577+233 T 810  [92m☑[0m 810 
Q 57+982  T 1039 [92m☑[0m 1039
Q 954+39  T 993  [92m☑[0m 993 
Q 98+863  T 961  [92m☑[0m 961 
Q 40+47   T 87   [92m☑[0m 87  

--------------------------------------------------
Iteration 157
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 223+35  T 258  [92m☑[0m 258 
Q 268+216 T 484  [92m☑[0m 484 
Q 2+848   T 850  [92m☑

Q 822+11  T 833  [92m☑[0m 833 
Q 66+810  T 876  [92m☑[0m 876 
Q 40+69   T 109  [92m☑[0m 109 
Q 922+86  T 1008 [92m☑[0m 1008
Q 26+972  T 998  [92m☑[0m 998 
Q 40+766  T 806  [92m☑[0m 806 
Q 19+443  T 462  [92m☑[0m 462 
Q 106+59  T 165  [92m☑[0m 165 
Q 630+24  T 654  [92m☑[0m 654 
Q 57+906  T 963  [92m☑[0m 963 

--------------------------------------------------
Iteration 170
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 713+209 T 922  [92m☑[0m 922 
Q 388+150 T 538  [92m☑[0m 538 
Q 22+979  T 1001 [92m☑[0m 1001
Q 21+82   T 103  [92m☑[0m 103 
Q 909+56  T 965  [92m☑[0m 965 
Q 374+3   T 377  [92m☑[0m 377 
Q 179+428 T 607  [92m☑[0m 607 
Q 673+170 T 843  [92m☑[0m 843 
Q 7+434   T 441  [92m☑[0m 441 
Q 8+423   T 431  [92m☑[0m 431 

--------------------------------------------------
Iteration 171
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 74+59   T 133  [92m☑[0m 133 
Q 461+520 T 981  [92m☑[0m 981 
Q 388+45  T 433  [92m☑

Q 76+954  T 1030 [92m☑[0m 1030
Q 33+168  T 201  [92m☑[0m 201 
Q 978+287 T 1265 [92m☑[0m 1265
Q 578+2   T 580  [92m☑[0m 580 
Q 85+110  T 195  [92m☑[0m 195 
Q 905+78  T 983  [92m☑[0m 983 
Q 315+548 T 863  [92m☑[0m 863 
Q 1+85    T 86   [92m☑[0m 86  
Q 652+39  T 691  [92m☑[0m 691 
Q 43+461  T 504  [92m☑[0m 504 

--------------------------------------------------
Iteration 184
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 117+1   T 118  [92m☑[0m 118 
Q 467+526 T 993  [92m☑[0m 993 
Q 51+664  T 715  [92m☑[0m 715 
Q 736+864 T 1600 [92m☑[0m 1600
Q 940+95  T 1035 [92m☑[0m 1035
Q 4+852   T 856  [92m☑[0m 856 
Q 500+597 T 1097 [92m☑[0m 1097
Q 769+4   T 773  [92m☑[0m 773 
Q 17+677  T 694  [92m☑[0m 694 
Q 90+59   T 149  [92m☑[0m 149 

--------------------------------------------------
Iteration 185
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 3+734   T 737  [92m☑[0m 737 
Q 23+87   T 110  [92m☑[0m 110 
Q 362+81  T 443  [92m☑

Q 701+88  T 789  [92m☑[0m 789 
Q 431+637 T 1068 [92m☑[0m 1068
Q 76+160  T 236  [92m☑[0m 236 
Q 628+57  T 685  [92m☑[0m 685 
Q 363+968 T 1331 [92m☑[0m 1331
Q 53+576  T 629  [92m☑[0m 629 
Q 80+134  T 214  [92m☑[0m 214 
Q 605+81  T 686  [92m☑[0m 686 
Q 94+694  T 788  [92m☑[0m 788 
Q 485+756 T 1241 [92m☑[0m 1241

--------------------------------------------------
Iteration 198
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 911+95  T 1006 [92m☑[0m 1006
Q 72+59   T 131  [92m☑[0m 131 
Q 93+52   T 145  [92m☑[0m 145 
Q 208+9   T 217  [92m☑[0m 217 
Q 277+949 T 1226 [92m☑[0m 1226
Q 308+470 T 778  [92m☑[0m 778 
Q 630+24  T 654  [92m☑[0m 654 
Q 478+845 T 1323 [92m☑[0m 1323
Q 742+291 T 1033 [92m☑[0m 1033
Q 268+216 T 484  [92m☑[0m 484 

--------------------------------------------------
Iteration 199
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 4+980   T 984  [92m☑[0m 984 
Q 9+928   T 937  [92m☑[0m 937 
Q 415+936 T 1351 [92m☑

In [14]:
import os
save_dir = os.path.join(os.getcwd(), 'saved_models')
model_name = 'addition_rnn_trained_model.h5'

# Save model and weights
if not os.path.isdir(save_dir):
    os.makedirs(save_dir)
model_path = os.path.join(save_dir, model_name)
model.save(model_path)
print('Saved trained model at %s ' % model_path)

Saved trained model at D:\lilanqing\CODE\Python_learn\keras-study\saved_models\addition_rnn_trained_model.h5 
