In [1]:
# -*- coding: utf-8 -*-
'''
# An implementation of sequence to sequence learning for performing addition

Input: "535+61"
Output: "596"
Padding is handled by using a repeated sentinel character (space)

Input may optionally be reversed, shown to increase performance in many tasks in:
"Learning to Execute"
http://arxiv.org/abs/1410.4615
and
"Sequence to Sequence Learning with Neural Networks"
http://papers.nips.cc/paper/5346-sequence-to-sequence-learning-with-neural-networks.pdf
Theoretically it introduces shorter term dependencies between source and target.

Two digits reversed:
+ One layer LSTM (128 HN), 5k training examples = 99% train/test accuracy in 55 epochs

Three digits reversed:
+ One layer LSTM (128 HN), 50k training examples = 99% train/test accuracy in 100 epochs

Four digits reversed:
+ One layer LSTM (128 HN), 400k training examples = 99% train/test accuracy in 20 epochs

Five digits reversed:
+ One layer LSTM (128 HN), 550k training examples = 99% train/test accuracy in 30 epochs
'''  # noqa

from __future__ import print_function
from keras.models import Sequential
from keras import layers
import numpy as np
from six.moves import range

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
class CharacterTable(object):
    """Given a set of characters:
    + Encode them to a one-hot integer representation
    + Decode the one-hot or integer representation to their character output
    + Decode a vector of probabilities to their character output
    """
    def __init__(self, chars):
        """Initialize character table.

        # Arguments
            chars: Characters that can appear in the input.
        """
        self.chars = sorted(set(chars))
        self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
        self.indices_char = dict((i, c) for i, c in enumerate(self.chars))

    def encode(self, C, num_rows):
        """One-hot encode given string C.

        # Arguments
            C: string, to be encoded.
            num_rows: Number of rows in the returned one-hot encoding. This is
                used to keep the # of rows for each data the same.
        """
        x = np.zeros((num_rows, len(self.chars)))
        for i, c in enumerate(C):
            x[i, self.char_indices[c]] = 1
        return x

    def decode(self, x, calc_argmax=True):
        """Decode the given vector or 2D array to their character output.

        # Arguments
            x: A vector or a 2D array of probabilities or one-hot representations;
                or a vector of character indices (used with `calc_argmax=False`).
            calc_argmax: Whether to find the character index with maximum
                probability, defaults to `True`.
        """
        if calc_argmax:
            x = x.argmax(axis=-1)
        return ''.join(self.indices_char[x] for x in x)

In [3]:
class colors:
    ok = '\033[92m'
    fail = '\033[91m'
    close = '\033[0m'

In [4]:
# Parameters for the model and dataset.
TRAINING_SIZE = 50000
DIGITS = 3
REVERSE = True

# Maximum length of input is 'int + int' (e.g., '345+678'). Maximum length of
# int is DIGITS.
MAXLEN = DIGITS + 1 + DIGITS

In [5]:
# All the numbers, plus sign and space for padding.
chars = '0123456789+ '
ctable = CharacterTable(chars)

In [7]:
questions = []
expected = []
seen = set()
print('Generating data...')
while len(questions) < TRAINING_SIZE:
    f = lambda: int(''.join(np.random.choice(list('0123456789'))
                    for i in range(np.random.randint(1, DIGITS + 1))))
    a, b = f(), f()
    # Skip any addition questions we've already seen
    # Also skip any such that x+Y == Y+x (hence the sorting).
    key = tuple(sorted((a, b)))
    if key in seen:
        continue
    seen.add(key)
    # Pad the data with spaces such that it is always MAXLEN.
    q = '{}+{}'.format(a, b)
    query = q + ' ' * (MAXLEN - len(q))
    ans = str(a + b)
    # Answers can be of maximum size DIGITS + 1.
    ans += ' ' * (DIGITS + 1 - len(ans))
    if REVERSE:
        # Reverse the query, e.g., '12+345  ' becomes '  543+21'. (Note the
        # space used for padding.)
        query = query[::-1]
    questions.append(query)
    expected.append(ans)
print('Total addition questions:', len(questions))

Generating data...
Total addition questions: 50000


In [8]:
print('Vectorization...')
x = np.zeros((len(questions), MAXLEN, len(chars)), dtype=np.bool)
y = np.zeros((len(questions), DIGITS + 1, len(chars)), dtype=np.bool)
for i, sentence in enumerate(questions):
    x[i] = ctable.encode(sentence, MAXLEN)
for i, sentence in enumerate(expected):
    y[i] = ctable.encode(sentence, DIGITS + 1)

Vectorization...


In [9]:
# Shuffle (x, y) in unison as the later parts of x will almost all be larger
# digits.
indices = np.arange(len(y))
np.random.shuffle(indices)
x = x[indices]
y = y[indices]

# Explicitly set apart 10% for validation data that we never train over.
split_at = len(x) - len(x) // 10
(x_train, x_val) = x[:split_at], x[split_at:]
(y_train, y_val) = y[:split_at], y[split_at:]

print('Training Data:')
print(x_train.shape)
print(y_train.shape)

print('Validation Data:')
print(x_val.shape)
print(y_val.shape)

Training Data:
(45000, 7, 12)
(45000, 4, 12)
Validation Data:
(5000, 7, 12)
(5000, 4, 12)


In [10]:
# Try replacing GRU, or SimpleRNN.
RNN = layers.LSTM
HIDDEN_SIZE = 128
BATCH_SIZE = 128
LAYERS = 1

In [11]:
print('Build model...')
model = Sequential()
# "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE.
# Note: In a situation where your input sequences have a variable length,
# use input_shape=(None, num_feature).
model.add(RNN(HIDDEN_SIZE, input_shape=(MAXLEN, len(chars))))
# As the decoder RNN's input, repeatedly provide with the last output of
# RNN for each time step. Repeat 'DIGITS + 1' times as that's the maximum
# length of output, e.g., when DIGITS=3, max output is 999+999=1998.
model.add(layers.RepeatVector(DIGITS + 1))
# The decoder RNN could be multiple layers stacked or a single layer.
for _ in range(LAYERS):
    # By setting return_sequences to True, return not only the last output but
    # all the outputs so far in the form of (num_samples, timesteps,
    # output_dim). This is necessary as TimeDistributed in the below expects
    # the first dimension to be the timesteps.
    model.add(RNN(HIDDEN_SIZE, return_sequences=True))

# Apply a dense layer to the every temporal slice of an input. For each of step
# of the output sequence, decide which character should be chosen.
model.add(layers.TimeDistributed(layers.Dense(len(chars), activation='softmax')))
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
model.summary()

Build model...
Instructions for updating:
Colocations handled automatically by placer.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 128)               72192     
_________________________________________________________________
repeat_vector_1 (RepeatVecto (None, 4, 128)            0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 4, 128)            131584    
_________________________________________________________________
time_distributed_1 (TimeDist (None, 4, 12)             1548      
Total params: 205,324
Trainable params: 205,324
Non-trainable params: 0
_________________________________________________________________


In [12]:
# Train the model each generation and show predictions against the validation
# dataset.
for iteration in range(1, 200):
    print()
    print('-' * 50)
    print('Iteration', iteration)
    model.fit(x_train, y_train,
              batch_size=BATCH_SIZE,
              epochs=1,
              validation_data=(x_val, y_val))
    # Select 10 samples from the validation set at random so we can visualize
    # errors.
    for i in range(10):
        ind = np.random.randint(0, len(x_val))
        rowx, rowy = x_val[np.array([ind])], y_val[np.array([ind])]
        preds = model.predict_classes(rowx, verbose=0)
        q = ctable.decode(rowx[0])
        correct = ctable.decode(rowy[0])
        guess = ctable.decode(preds[0], calc_argmax=False)
        print('Q', q[::-1] if REVERSE else q, end=' ')
        print('T', correct, end=' ')
        if correct == guess:
            print(colors.ok + '☑' + colors.close, end=' ')
        else:
            print(colors.fail + '☒' + colors.close, end=' ')
        print(guess)


--------------------------------------------------
Iteration 1
Instructions for updating:
Use tf.cast instead.
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 64+96   T 160  [91m☒[0m 111 
Q 649+444 T 1093 [91m☒[0m 100 
Q 821+76  T 897  [91m☒[0m 150 
Q 43+767  T 810  [91m☒[0m 647 
Q 444+938 T 1382 [91m☒[0m 115 
Q 77+237  T 314  [91m☒[0m 143 
Q 233+65  T 298  [91m☒[0m 743 
Q 4+771   T 775  [91m☒[0m 111 
Q 393+132 T 525  [91m☒[0m 133 
Q 707+162 T 869  [91m☒[0m 105 

--------------------------------------------------
Iteration 2
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 13+125  T 138  [91m☒[0m 23  
Q 49+812  T 861  [91m☒[0m 899 
Q 905+5   T 910  [91m☒[0m 966 
Q 490+896 T 1386 [91m☒[0m 1464
Q 167+41  T 208  [91m☒[0m 778 
Q 533+43  T 576  [91m☒[0m 467 
Q 66+313  T 379  [91m☒[0m 467 
Q 41+188  T 229  [91m☒[0m 222 
Q 61+838  T 899  [92m☑[0m 899 
Q 955+403 T 1358 [91m☒[0m 1464

---------------------------------------------

Q 87+711  T 798  [91m☒[0m 898 
Q 807+683 T 1490 [92m☑[0m 1490
Q 63+396  T 459  [91m☒[0m 469 
Q 11+559  T 570  [92m☑[0m 570 
Q 34+34   T 68   [92m☑[0m 68  
Q 29+54   T 83   [92m☑[0m 83  
Q 770+47  T 817  [92m☑[0m 817 
Q 11+288  T 299  [91m☒[0m 399 
Q 81+736  T 817  [92m☑[0m 817 
Q 979+149 T 1128 [92m☑[0m 1128

--------------------------------------------------
Iteration 16
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 586+91  T 677  [92m☑[0m 677 
Q 272+51  T 323  [92m☑[0m 323 
Q 518+39  T 557  [92m☑[0m 557 
Q 962+96  T 1058 [92m☑[0m 1058
Q 59+187  T 246  [92m☑[0m 246 
Q 93+387  T 480  [92m☑[0m 480 
Q 987+311 T 1298 [92m☑[0m 1298
Q 776+93  T 869  [92m☑[0m 869 
Q 691+25  T 716  [92m☑[0m 716 
Q 603+15  T 618  [92m☑[0m 618 

--------------------------------------------------
Iteration 17
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 68+236  T 304  [92m☑[0m 304 
Q 796+670 T 1466 [92m☑[0m 1466
Q 17+725  T 742  [92m☑[0

Q 34+34   T 68   [92m☑[0m 68  
Q 705+51  T 756  [92m☑[0m 756 
Q 430+615 T 1045 [92m☑[0m 1045
Q 18+88   T 106  [92m☑[0m 106 
Q 664+79  T 743  [92m☑[0m 743 
Q 459+62  T 521  [92m☑[0m 521 
Q 255+700 T 955  [92m☑[0m 955 
Q 65+827  T 892  [92m☑[0m 892 
Q 78+938  T 1016 [92m☑[0m 1016
Q 19+765  T 784  [92m☑[0m 784 

--------------------------------------------------
Iteration 30
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 406+701 T 1107 [92m☑[0m 1107
Q 27+59   T 86   [92m☑[0m 86  
Q 77+787  T 864  [92m☑[0m 864 
Q 45+920  T 965  [92m☑[0m 965 
Q 19+60   T 79   [92m☑[0m 79  
Q 22+808  T 830  [92m☑[0m 830 
Q 592+772 T 1364 [92m☑[0m 1364
Q 69+60   T 129  [92m☑[0m 129 
Q 981+45  T 1026 [92m☑[0m 1026
Q 344+45  T 389  [92m☑[0m 389 

--------------------------------------------------
Iteration 31
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 558+0   T 558  [92m☑[0m 558 
Q 72+356  T 428  [92m☑[0m 428 
Q 75+604  T 679  [92m☑[0

Q 80+369  T 449  [92m☑[0m 449 
Q 200+83  T 283  [92m☑[0m 283 
Q 457+11  T 468  [92m☑[0m 468 
Q 282+33  T 315  [92m☑[0m 315 
Q 318+555 T 873  [92m☑[0m 873 
Q 983+82  T 1065 [92m☑[0m 1065
Q 518+398 T 916  [92m☑[0m 916 
Q 216+338 T 554  [92m☑[0m 554 
Q 923+571 T 1494 [92m☑[0m 1494
Q 787+22  T 809  [92m☑[0m 809 

--------------------------------------------------
Iteration 44
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 396+559 T 955  [92m☑[0m 955 
Q 866+883 T 1749 [92m☑[0m 1749
Q 94+170  T 264  [92m☑[0m 264 
Q 13+432  T 445  [92m☑[0m 445 
Q 618+62  T 680  [92m☑[0m 680 
Q 165+73  T 238  [92m☑[0m 238 
Q 889+835 T 1724 [92m☑[0m 1724
Q 856+79  T 935  [92m☑[0m 935 
Q 717+76  T 793  [92m☑[0m 793 
Q 857+20  T 877  [92m☑[0m 877 

--------------------------------------------------
Iteration 45
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 471+0   T 471  [92m☑[0m 471 
Q 7+781   T 788  [92m☑[0m 788 
Q 77+916  T 993  [92m☑[0

Q 90+355  T 445  [92m☑[0m 445 
Q 2+855   T 857  [92m☑[0m 857 
Q 95+431  T 526  [92m☑[0m 526 
Q 79+795  T 874  [92m☑[0m 874 
Q 919+108 T 1027 [92m☑[0m 1027
Q 19+67   T 86   [92m☑[0m 86  
Q 758+322 T 1080 [92m☑[0m 1080
Q 545+9   T 554  [92m☑[0m 554 
Q 997+66  T 1063 [92m☑[0m 1063
Q 184+274 T 458  [92m☑[0m 458 

--------------------------------------------------
Iteration 58
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 404+740 T 1144 [92m☑[0m 1144
Q 26+89   T 115  [92m☑[0m 115 
Q 408+902 T 1310 [92m☑[0m 1310
Q 481+71  T 552  [92m☑[0m 552 
Q 500+374 T 874  [92m☑[0m 874 
Q 589+52  T 641  [92m☑[0m 641 
Q 908+42  T 950  [92m☑[0m 950 
Q 95+132  T 227  [92m☑[0m 227 
Q 34+864  T 898  [92m☑[0m 898 
Q 841+30  T 871  [92m☑[0m 871 

--------------------------------------------------
Iteration 59
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 725+859 T 1584 [92m☑[0m 1584
Q 21+670  T 691  [92m☑[0m 691 
Q 429+227 T 656  [92m☑[0

Q 76+415  T 491  [92m☑[0m 491 
Q 1+410   T 411  [92m☑[0m 411 
Q 85+789  T 874  [92m☑[0m 874 
Q 69+643  T 712  [92m☑[0m 712 
Q 82+273  T 355  [92m☑[0m 355 
Q 21+642  T 663  [92m☑[0m 663 
Q 262+7   T 269  [92m☑[0m 269 
Q 79+833  T 912  [92m☑[0m 912 
Q 50+382  T 432  [92m☑[0m 432 
Q 91+59   T 150  [92m☑[0m 150 

--------------------------------------------------
Iteration 72
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 61+838  T 899  [92m☑[0m 899 
Q 226+447 T 673  [92m☑[0m 673 
Q 855+80  T 935  [92m☑[0m 935 
Q 164+461 T 625  [92m☑[0m 625 
Q 31+635  T 666  [92m☑[0m 666 
Q 873+32  T 905  [92m☑[0m 905 
Q 363+53  T 416  [92m☑[0m 416 
Q 8+603   T 611  [92m☑[0m 611 
Q 535+7   T 542  [92m☑[0m 542 
Q 7+5     T 12   [92m☑[0m 12  

--------------------------------------------------
Iteration 73
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 1+893   T 894  [92m☑[0m 894 
Q 797+2   T 799  [92m☑[0m 799 
Q 958+5   T 963  [92m☑[0

Q 385+159 T 544  [92m☑[0m 544 
Q 344+966 T 1310 [92m☑[0m 1310
Q 379+650 T 1029 [92m☑[0m 1029
Q 2+517   T 519  [92m☑[0m 519 
Q 87+27   T 114  [92m☑[0m 114 
Q 470+5   T 475  [92m☑[0m 475 
Q 36+32   T 68   [92m☑[0m 68  
Q 370+764 T 1134 [92m☑[0m 1134
Q 500+374 T 874  [92m☑[0m 874 
Q 88+68   T 156  [92m☑[0m 156 

--------------------------------------------------
Iteration 86
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 816+6   T 822  [92m☑[0m 822 
Q 6+565   T 571  [92m☑[0m 571 
Q 94+991  T 1085 [92m☑[0m 1085
Q 615+44  T 659  [92m☑[0m 659 
Q 476+78  T 554  [92m☑[0m 554 
Q 619+936 T 1555 [92m☑[0m 1555
Q 364+776 T 1140 [92m☑[0m 1140
Q 919+49  T 968  [92m☑[0m 968 
Q 342+734 T 1076 [92m☑[0m 1076
Q 361+59  T 420  [92m☑[0m 420 

--------------------------------------------------
Iteration 87
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 915+19  T 934  [92m☑[0m 934 
Q 24+523  T 547  [92m☑[0m 547 
Q 170+864 T 1034 [92m☑[0

Q 35+403  T 438  [92m☑[0m 438 
Q 5+496   T 501  [92m☑[0m 501 
Q 46+346  T 392  [92m☑[0m 392 
Q 465+439 T 904  [92m☑[0m 904 
Q 91+60   T 151  [92m☑[0m 151 
Q 39+381  T 420  [92m☑[0m 420 
Q 955+34  T 989  [92m☑[0m 989 
Q 57+103  T 160  [92m☑[0m 160 
Q 4+610   T 614  [92m☑[0m 614 
Q 66+682  T 748  [92m☑[0m 748 

--------------------------------------------------
Iteration 100
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 892+398 T 1290 [92m☑[0m 1290
Q 132+996 T 1128 [92m☑[0m 1128
Q 143+40  T 183  [92m☑[0m 183 
Q 1+789   T 790  [92m☑[0m 790 
Q 573+294 T 867  [92m☑[0m 867 
Q 903+651 T 1554 [92m☑[0m 1554
Q 981+423 T 1404 [92m☑[0m 1404
Q 73+321  T 394  [92m☑[0m 394 
Q 81+464  T 545  [92m☑[0m 545 
Q 206+11  T 217  [92m☑[0m 217 

--------------------------------------------------
Iteration 101
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 6+565   T 571  [92m☑[0m 571 
Q 4+771   T 775  [92m☑[0m 775 
Q 779+71  T 850  [92m☑

Q 842+9   T 851  [92m☑[0m 851 
Q 207+4   T 211  [92m☑[0m 211 
Q 839+52  T 891  [92m☑[0m 891 
Q 756+93  T 849  [92m☑[0m 849 
Q 58+300  T 358  [92m☑[0m 358 
Q 26+648  T 674  [92m☑[0m 674 
Q 56+348  T 404  [92m☑[0m 404 
Q 35+38   T 73   [92m☑[0m 73  
Q 365+11  T 376  [92m☑[0m 376 
Q 623+223 T 846  [92m☑[0m 846 

--------------------------------------------------
Iteration 114
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 344+384 T 728  [92m☑[0m 728 
Q 63+52   T 115  [92m☑[0m 115 
Q 968+647 T 1615 [92m☑[0m 1615
Q 1+789   T 790  [92m☑[0m 790 
Q 25+123  T 148  [92m☑[0m 148 
Q 853+185 T 1038 [92m☑[0m 1038
Q 29+908  T 937  [92m☑[0m 937 
Q 225+662 T 887  [92m☑[0m 887 
Q 848+453 T 1301 [92m☑[0m 1301
Q 974+49  T 1023 [92m☑[0m 1023

--------------------------------------------------
Iteration 115
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 60+80   T 140  [92m☑[0m 140 
Q 77+745  T 822  [92m☑[0m 822 
Q 218+978 T 1196 [92m☑

Q 221+567 T 788  [92m☑[0m 788 
Q 163+31  T 194  [92m☑[0m 194 
Q 52+633  T 685  [92m☑[0m 685 
Q 621+389 T 1010 [92m☑[0m 1010
Q 532+11  T 543  [92m☑[0m 543 
Q 602+799 T 1401 [92m☑[0m 1401
Q 91+64   T 155  [92m☑[0m 155 
Q 109+258 T 367  [92m☑[0m 367 
Q 2+228   T 230  [92m☑[0m 230 
Q 752+919 T 1671 [92m☑[0m 1671

--------------------------------------------------
Iteration 128
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 928+729 T 1657 [92m☑[0m 1657
Q 0+515   T 515  [92m☑[0m 515 
Q 367+0   T 367  [92m☑[0m 367 
Q 23+960  T 983  [92m☑[0m 983 
Q 799+9   T 808  [92m☑[0m 808 
Q 785+963 T 1748 [92m☑[0m 1748
Q 17+21   T 38   [92m☑[0m 38  
Q 158+156 T 314  [92m☑[0m 314 
Q 68+73   T 141  [92m☑[0m 141 
Q 941+841 T 1782 [92m☑[0m 1782

--------------------------------------------------
Iteration 129
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 106+88  T 194  [92m☑[0m 194 
Q 940+8   T 948  [92m☑[0m 948 
Q 44+29   T 73   [92m☑

Q 605+85  T 690  [92m☑[0m 690 
Q 117+744 T 861  [92m☑[0m 861 
Q 824+108 T 932  [92m☑[0m 932 
Q 852+46  T 898  [92m☑[0m 898 
Q 55+92   T 147  [92m☑[0m 147 
Q 71+714  T 785  [92m☑[0m 785 
Q 16+514  T 530  [92m☑[0m 530 
Q 1+221   T 222  [92m☑[0m 222 
Q 80+369  T 449  [92m☑[0m 449 
Q 910+873 T 1783 [92m☑[0m 1783

--------------------------------------------------
Iteration 142
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 544+83  T 627  [92m☑[0m 627 
Q 974+51  T 1025 [92m☑[0m 1025
Q 935+59  T 994  [92m☑[0m 994 
Q 9+204   T 213  [92m☑[0m 213 
Q 646+53  T 699  [92m☑[0m 699 
Q 907+64  T 971  [92m☑[0m 971 
Q 767+32  T 799  [92m☑[0m 799 
Q 79+795  T 874  [92m☑[0m 874 
Q 336+62  T 398  [92m☑[0m 398 
Q 928+55  T 983  [92m☑[0m 983 

--------------------------------------------------
Iteration 143
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 786+57  T 843  [92m☑[0m 843 
Q 66+28   T 94   [92m☑[0m 94  
Q 581+97  T 678  [92m☑

Q 31+155  T 186  [92m☑[0m 186 
Q 123+253 T 376  [92m☑[0m 376 
Q 509+202 T 711  [92m☑[0m 711 
Q 568+79  T 647  [92m☑[0m 647 
Q 646+64  T 710  [92m☑[0m 710 
Q 291+43  T 334  [92m☑[0m 334 
Q 978+676 T 1654 [92m☑[0m 1654
Q 17+711  T 728  [92m☑[0m 728 
Q 12+214  T 226  [92m☑[0m 226 
Q 860+293 T 1153 [92m☑[0m 1153

--------------------------------------------------
Iteration 156
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 181+7   T 188  [92m☑[0m 188 
Q 79+32   T 111  [92m☑[0m 111 
Q 486+57  T 543  [91m☒[0m 552 
Q 300+99  T 399  [92m☑[0m 399 
Q 67+289  T 356  [92m☑[0m 356 
Q 126+498 T 624  [92m☑[0m 624 
Q 460+926 T 1386 [92m☑[0m 1386
Q 476+867 T 1343 [92m☑[0m 1343
Q 88+119  T 207  [91m☒[0m 217 
Q 946+114 T 1060 [92m☑[0m 1060

--------------------------------------------------
Iteration 157
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 0+515   T 515  [92m☑[0m 515 
Q 3+855   T 858  [92m☑[0m 858 
Q 29+144  T 173  [92m☑

Q 632+58  T 690  [92m☑[0m 690 
Q 93+808  T 901  [92m☑[0m 901 
Q 471+139 T 610  [92m☑[0m 610 
Q 1+419   T 420  [92m☑[0m 420 
Q 765+273 T 1038 [92m☑[0m 1038
Q 23+78   T 101  [92m☑[0m 101 
Q 482+106 T 588  [92m☑[0m 588 
Q 9+624   T 633  [92m☑[0m 633 
Q 650+405 T 1055 [92m☑[0m 1055
Q 772+352 T 1124 [92m☑[0m 1124

--------------------------------------------------
Iteration 170
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 231+37  T 268  [92m☑[0m 268 
Q 580+401 T 981  [92m☑[0m 981 
Q 672+506 T 1178 [92m☑[0m 1178
Q 32+62   T 94   [92m☑[0m 94  
Q 375+280 T 655  [92m☑[0m 655 
Q 93+71   T 164  [92m☑[0m 164 
Q 21+670  T 691  [92m☑[0m 691 
Q 96+655  T 751  [92m☑[0m 751 
Q 447+86  T 533  [92m☑[0m 533 
Q 685+39  T 724  [92m☑[0m 724 

--------------------------------------------------
Iteration 171
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 292+154 T 446  [92m☑[0m 446 
Q 98+771  T 869  [92m☑[0m 869 
Q 362+791 T 1153 [92m☑

Q 1+651   T 652  [92m☑[0m 652 
Q 8+782   T 790  [92m☑[0m 790 
Q 958+681 T 1639 [92m☑[0m 1639
Q 526+316 T 842  [92m☑[0m 842 
Q 595+3   T 598  [92m☑[0m 598 
Q 79+498  T 577  [92m☑[0m 577 
Q 946+548 T 1494 [92m☑[0m 1494
Q 4+256   T 260  [92m☑[0m 260 
Q 77+852  T 929  [92m☑[0m 929 
Q 44+318  T 362  [92m☑[0m 362 

--------------------------------------------------
Iteration 184
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 85+95   T 180  [92m☑[0m 180 
Q 84+570  T 654  [92m☑[0m 654 
Q 528+91  T 619  [92m☑[0m 619 
Q 374+688 T 1062 [92m☑[0m 1062
Q 67+272  T 339  [92m☑[0m 339 
Q 42+68   T 110  [92m☑[0m 110 
Q 497+90  T 587  [92m☑[0m 587 
Q 920+655 T 1575 [92m☑[0m 1575
Q 60+425  T 485  [92m☑[0m 485 
Q 55+830  T 885  [92m☑[0m 885 

--------------------------------------------------
Iteration 185
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 272+4   T 276  [92m☑[0m 276 
Q 21+153  T 174  [92m☑[0m 174 
Q 4+220   T 224  [92m☑

Q 825+237 T 1062 [92m☑[0m 1062
Q 789+253 T 1042 [92m☑[0m 1042
Q 589+7   T 596  [92m☑[0m 596 
Q 18+522  T 540  [92m☑[0m 540 
Q 688+549 T 1237 [92m☑[0m 1237
Q 94+57   T 151  [92m☑[0m 151 
Q 4+729   T 733  [92m☑[0m 733 
Q 635+2   T 637  [92m☑[0m 637 
Q 960+89  T 1049 [92m☑[0m 1049
Q 70+858  T 928  [92m☑[0m 928 

--------------------------------------------------
Iteration 198
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 561+935 T 1496 [92m☑[0m 1496
Q 313+860 T 1173 [92m☑[0m 1173
Q 78+732  T 810  [92m☑[0m 810 
Q 35+913  T 948  [92m☑[0m 948 
Q 956+42  T 998  [92m☑[0m 998 
Q 621+358 T 979  [92m☑[0m 979 
Q 852+359 T 1211 [92m☑[0m 1211
Q 254+26  T 280  [92m☑[0m 280 
Q 59+897  T 956  [92m☑[0m 956 
Q 794+805 T 1599 [92m☑[0m 1599

--------------------------------------------------
Iteration 199
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 37+977  T 1014 [92m☑[0m 1014
Q 67+289  T 356  [92m☑[0m 356 
Q 201+44  T 245  [92m☑

In [14]:
import os
save_dir = os.path.join(os.getcwd(), 'saved_models')
model_name = 'addition_rnn_trained_model.h5'

# Save model and weights
if not os.path.isdir(save_dir):
    os.makedirs(save_dir)
model_path = os.path.join(save_dir, model_name)
model.save(model_path)
print('Saved trained model at %s ' % model_path)

Saved trained model at D:\lilanqing\CODE\Python_learn\keras-study\saved_models\addition_rnn_trained_model.h5 
