In [26]:
verbose =1 # print log
impl = 2

from keras.models import Sequential
from keras import layers
import numpy as np
from six.moves import range
import argparse
import time


class CharacterTable(object):
    """Given a set of characters:
    + Encode them to a one hot integer representation
    + Decode the one hot integer representation to their character output
    + Decode a vector of probabilities to their character output
    """
    def __init__(self, chars):
        """Initialize character table.
        # Arguments
            chars: Characters that can appear in the input.
        """
        self.chars = sorted(set(chars))
        self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
        self.indices_char = dict((i, c) for i, c in enumerate(self.chars))

    def encode(self, C, num_rows):
        """One hot encode given string C.
        # Arguments
            num_rows: Number of rows in the returned one hot encoding. This is
                used to keep the # of rows for each data the same.
        """
        x = np.zeros((num_rows, len(self.chars)))
        for i, c in enumerate(C):
            x[i, self.char_indices[c]] = 1
        return x

    def decode(self, x, calc_argmax=True):
        if calc_argmax:
            x = x.argmax(axis=-1)
        return ''.join(self.indices_char[x] for x in x)

## Data

In [27]:
# Parameters for the model and dataset.
TRAINING_SIZE = 50000
DIGITS = 3
INVERT = True

# Maximum length of input is 'int + int' (e.g., '345+678'). Maximum length of
# int is DIGITS.
MAXLEN = DIGITS + 1 + DIGITS

In [28]:
# All the numbers, plus sign and space for padding.
chars = '0123456789+ '
ctable = CharacterTable(chars)
ctable

<__main__.CharacterTable at 0x28989977048>

In [29]:
ctable.chars

[' ', '+', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

## Generate Data

In [30]:
questions = []
expected = []
seen = set()
print('Generating data...')
while len(questions) < TRAINING_SIZE:
    f = lambda: int(''.join(np.random.choice(list('0123456789'))
                    for i in range(np.random.randint(1, DIGITS + 1))))
    a, b = f(), f()
    # Skip any addition questions we've already seen
    # Also skip any such that x+Y == Y+x (hence the sorting).
    key = tuple(sorted((a, b)))
    if key in seen:
        continue
    seen.add(key)
    # Pad the data with spaces such that it is always MAXLEN.
    q = '{}+{}'.format(a, b)
    query = q + ' ' * (MAXLEN - len(q))
    ans = str(a + b)
    # Answers can be of maximum size DIGITS + 1.
    ans += ' ' * (DIGITS + 1 - len(ans))
    if INVERT:
        # Reverse the query, e.g., '12+345  ' becomes '  543+21'. (Note the
        # space used for padding.)
        query = query[::-1]
    questions.append(query)
    expected.append(ans)
print('Total addition questions:', len(questions))
# please print questions and expected to understand the data

Generating data...
Total addition questions: 50000


In [31]:
print('Vectorization...')
x = np.zeros((len(questions), MAXLEN, len(chars)), dtype=np.bool)  # Why len(chars) = 12 ? char = '0123456789+ ' do not forget SPACE
y = np.zeros((len(questions), DIGITS + 1, len(chars)), dtype=np.bool)
for i, sentence in enumerate(questions):
    x[i] = ctable.encode(sentence, MAXLEN)
for i, sentence in enumerate(expected):
    y[i] = ctable.encode(sentence, DIGITS + 1)

Vectorization...


In [32]:
print('x.shape: ', x.shape )
print('y.shape: ', y.shape)

x.shape:  (50000, 7, 12)
y.shape:  (50000, 4, 12)


In [33]:
# Shuffle (x, y) in unison as the later parts of x will almost all be larger
# digits.
indices = np.arange(len(y))
np.random.shuffle(indices)
x = x[indices]
y = y[indices]

# Explicitly set apart 10% for validation data that we never train over.
split_at = len(x) - len(x) // 10
(x_train, x_val) = x[:split_at], x[split_at:]
(y_train, y_val) = y[:split_at], y[split_at:]

print('Training Data:')
print(x_train.shape)
print(y_train.shape)

print('Validation Data:')
print(x_val.shape)
print(y_val.shape)

Training Data:
(45000, 7, 12)
(45000, 4, 12)
Validation Data:
(5000, 7, 12)
(5000, 4, 12)


## Model Building

In [34]:
# Try replacing GRU, or SimpleRNN.
RNN = layers.LSTM
HIDDEN_SIZE = 128
BATCH_SIZE = 128
LAYERS = 1
Dropout = 0.0

print('Build model...')
model = Sequential()
# "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE.
# Note: In a situation where your input sequences have a variable length,
# use input_shape=(None, num_feature).
model.add(RNN(HIDDEN_SIZE, input_shape=(MAXLEN, len(chars)), recurrent_dropout=Dropout, implementation=impl))
# As the decoder RNN's input, repeatedly provide with the last hidden state of
# RNN for each time step. Repeat 'DIGITS + 1' times as that's the maximum
# length of output, e.g., when DIGITS=3, max output is 999+999=1998.
model.add(layers.RepeatVector(DIGITS + 1))
# The decoder RNN could be multiple layers stacked or a single layer.
for _ in range(LAYERS):
    # By setting return_sequences to True, return not only the last output but
    # all the outputs so far in the form of (num_samples, timesteps,
    # output_dim). This is necessary as TimeDistributed in the below expects
    # the first dimension to be the timesteps.
    model.add(RNN(HIDDEN_SIZE, return_sequences=True, recurrent_dropout=Dropout, implementation=impl))

# Apply a dense layer to the every temporal slice of an input. For each of step
# of the output sequence, decide which character should be chosen.
model.add(layers.TimeDistributed(layers.Dense(len(chars))))
#model.add(layers.Dense(len(chars)))
model.add(layers.Activation('softmax'))
model.summary()

Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_13 (LSTM)               (None, 128)               72192     
_________________________________________________________________
repeat_vector_7 (RepeatVecto (None, 4, 128)            0         
_________________________________________________________________
lstm_14 (LSTM)               (None, 4, 128)            131584    
_________________________________________________________________
time_distributed_5 (TimeDist (None, 4, 12)             1548      
_________________________________________________________________
activation_7 (Activation)    (None, 4, 12)             0         
Total params: 205,324
Trainable params: 205,324
Non-trainable params: 0
_________________________________________________________________


## Train

In [35]:
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
# Train the model each generation and show predictions against the validation
# dataset.

iterations = 50
for iteration in range(1, iterations + 1):
    print()
    print('-' * 50)
    print('Iteration', iteration)
    model.fit(x_train, y_train,
              batch_size=BATCH_SIZE,
              epochs=1,
              validation_data=(x_val, y_val), verbose=verbose)
    # Select 10 samples from the validation set at random so we can visualize
    # errors.
    for i in range(10):
        ind = np.random.randint(0, len(x_val))
        rowx, rowy = x_val[np.array([ind])], y_val[np.array([ind])]
        preds = model.predict_classes(rowx, verbose=0)
        q = ctable.decode(rowx[0])
        correct = ctable.decode(rowy[0])
        guess = ctable.decode(preds[0], calc_argmax=False)
        print('Q', q[::-1] if INVERT else q)
        print('T', correct)
        if correct == guess:
            print('+', end=" ")
        else:
            print('-', end=" ")
        print(guess)
    print('---')
print()
print("Ending:", time.ctime())


--------------------------------------------------
Iteration 1
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 891+6  
T 897 
- 100 
Q 682+111
T 793 
- 109 
Q 51+11  
T 62  
+ 62  
Q 75+900 
T 975 
- 106 
Q 31+976 
T 1007
- 106 
Q 93+82  
T 175 
- 100 
Q 50+166 
T 216 
- 106 
Q 172+65 
T 237 
- 126 
Q 382+208
T 590 
- 109 
Q 485+20 
T 505 
- 109 
---

--------------------------------------------------
Iteration 2
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 558+596
T 1154
- 1111
Q 647+59 
T 706 
- 701 
Q 47+85  
T 132 
- 100 
Q 548+88 
T 636 
- 901 
Q 731+27 
T 758 
- 271 
Q 19+36  
T 55  
- 11  
Q 264+47 
T 311 
- 371 
Q 747+90 
T 837 
- 101 
Q 23+53  
T 76  
- 330 
Q 89+229 
T 318 
- 300 
---

--------------------------------------------------
Iteration 3
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 489+625
T 1114
- 1007
Q 520+600
T 1120
- 1007
Q 183+864
T 1047
- 100 
Q 314+499
T 813 
- 702 
Q 65+3   
T 68  
- 16  
Q 902+944
T 1846
- 1777

Q 29+67  
T 96  
+ 96  
Q 863+727
T 1590
+ 1590
Q 98+885 
T 983 
+ 983 
Q 407+90 
T 497 
+ 497 
Q 814+74 
T 888 
+ 888 
Q 766+94 
T 860 
+ 860 
Q 567+479
T 1046
+ 1046
Q 60+351 
T 411 
+ 411 
Q 131+73 
T 204 
+ 204 
Q 22+72  
T 94  
+ 94  
---

--------------------------------------------------
Iteration 19
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 753+958
T 1711
+ 1711
Q 55+827 
T 882 
+ 882 
Q 53+517 
T 570 
+ 570 
Q 54+81  
T 135 
+ 135 
Q 51+807 
T 858 
+ 858 
Q 100+9  
T 109 
+ 109 
Q 529+574
T 1103
+ 1103
Q 114+99 
T 213 
+ 213 
Q 130+221
T 351 
+ 351 
Q 73+984 
T 1057
+ 1057
---

--------------------------------------------------
Iteration 20
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 319+101
T 420 
+ 420 
Q 800+995
T 1795
+ 1795
Q 182+931
T 1113
+ 1113
Q 27+838 
T 865 
+ 865 
Q 24+939 
T 963 
+ 963 
Q 632+9  
T 641 
+ 641 
Q 648+70 
T 718 
+ 718 
Q 906+68 
T 974 
+ 974 
Q 380+119
T 499 
- 409 
Q 76+26  
T 102 
+ 102 
---

-------------------

Q 325+90 
T 415 
+ 415 
Q 541+43 
T 584 
+ 584 
Q 80+144 
T 224 
+ 224 
Q 430+40 
T 470 
+ 470 
Q 26+17  
T 43  
+ 43  
Q 142+985
T 1127
+ 1127
Q 762+338
T 1100
+ 1100
Q 543+37 
T 580 
+ 580 
Q 45+278 
T 323 
+ 323 
Q 8+854  
T 862 
+ 862 
---

--------------------------------------------------
Iteration 36
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 60+95  
T 155 
+ 155 
Q 167+87 
T 254 
+ 254 
Q 702+75 
T 777 
+ 777 
Q 688+365
T 1053
+ 1053
Q 644+88 
T 732 
+ 732 
Q 548+815
T 1363
+ 1363
Q 698+86 
T 784 
+ 784 
Q 625+901
T 1526
+ 1526
Q 792+85 
T 877 
+ 877 
Q 213+222
T 435 
+ 435 
---

--------------------------------------------------
Iteration 37
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 704+52 
T 756 
+ 756 
Q 838+43 
T 881 
+ 881 
Q 63+507 
T 570 
+ 570 
Q 207+68 
T 275 
+ 275 
Q 28+60  
T 88  
+ 88  
Q 765+68 
T 833 
+ 833 
Q 802+637
T 1439
+ 1439
Q 331+75 
T 406 
+ 406 
Q 299+736
T 1035
+ 1035
Q 634+582
T 1216
+ 1216
---

-------------------