In [2]:
import numpy as np
from keras.models import Sequential
from keras.engine.training import _slice_arrays as slice_X
from keras.layers import Activation, TimeDistributed, Dense, RepeatVector, recurrent
from six.moves import range

In [3]:

class CharTable(object):
    
    def __init__(self, chars, maxlen):
        self.chars = sorted(set(chars))
        self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
        self.indices_char = dict((i, c) for i, c in enumerate(self.chars))
        self.maxlen = maxlen
        
    def encode(self, C, maxlen=None):
        maxlen = maxlen if maxlen else self.maxlen
        X = np.zeros((maxlen, len(self.chars)))
        for i, c in enumerate(C):
            X[i, self.char_indices[c]] = 1
        return X
    
    def decode(self, X, calc_argmax=True):
        if calc_argmax:
            X = X.argmax(axis=-1)
        return ''.join(self.indices_char[x] for x in X)

In [4]:
class Colors:
    ok = '\033[92m'
    fail = '\033[91m'
    close = '\033[0m'

In [5]:

TRAIN_SIZE = 50000
DIGITS = 3
INVERT = True

RNN = recurrent.LSTM
HIDDEN_SIZE = 128
BATCH_SIZE = 128
LAYERS = 1
MAXLEN = DIGITS + 2 + DIGITS

chars = '0123456789+='
ctable = CharTable(chars, MAXLEN)

questions = []
expected = []
seen = set()

In [13]:

%%time

# Generate data
while len(questions) < TRAIN_SIZE:
    f = lambda: int(''.join(np.random.choice(list('0123456789')) for i in range(np.random.randint(1, DIGITS+1))))
    a, b = f(), f()
    
    key = tuple(sorted((a,b)))
    if key in seen:
        continue
        
    seen.add(key)
    q = '{}+{}='.format(a, b)
    query = q + ''*(MAXLEN - len(q))
    
    ans = str(a+b)
    ans += ''*(DIGITS + 1 - len(ans))
    
    if INVERT:
        query = query[::-1]
        
    questions.append(query)
    expected.append(ans)
    
print(questions[:10])
print(expected[:10])
print('Total addition questions:', len(questions))

['=3+7', '=14+171', '=464+2', '=685+9', '=75+455', '=73+0', '=058+3', '=2+65', '=2+328', '=834+546']
['10', '212', '466', '595', '611', '37', '853', '58', '825', '1083']
Total addition questions: 50000
CPU times: user 5.14 s, sys: 42.1 ms, total: 5.18 s
Wall time: 5.26 s


In [19]:

print('Vectorization...')
X = np.zeros((len(questions), MAXLEN, len(chars)), dtype=np.bool)
y = np.zeros((len(questions), DIGITS + 1, len(chars)), dtype=np.bool)
for i, sentence in enumerate(questions):
    X[i] = ctable.encode(sentence, maxlen=MAXLEN)
for i, sentence in enumerate(expected):
    y[i] = ctable.encode(sentence, maxlen=DIGITS + 1)

# Shuffle (X, y) in unison as the later parts of X will almost all be larger digits
indices = np.arange(len(y))
np.random.shuffle(indices)
X = X[indices]
y = y[indices]

# Explicitly set apart 10% for validation data that we never train over
split_at = int(len(X) - len(X) / 10)

X_train = X[:int(X.shape[0]*0.9)]
X_val = X[int(X.shape[0]*0.9):]
# (X_train, X_val) = (slice_X(X, 0, split_at), slice_X(X, split_at))
(y_train, y_val) = (y[:split_at], y[split_at:])

Vectorization...


In [20]:
# X_train = np.array(X_train)


print(X_train.shape)
print(y_train.shape)
print(X_val.shape)
print(y_val.shape)

(45000, 8, 12)
(45000, 4, 12)
(5000, 8, 12)
(5000, 4, 12)


In [21]:

%%time

#Training the model with the encoded inputs
print('Build model...')
model = Sequential()
# "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE
# note: in a situation where your input sequences have a variable length,
# use input_shape=(None, nb_feature).

model.add(RNN(HIDDEN_SIZE, input_shape=(MAXLEN, len(chars))))
# For the decoder's input, we repeat the encoded input for each time step

model.add(RepeatVector(DIGITS + 1))

# The decoder RNN could be multiple layers stacked or a single layer
for _ in range(LAYERS):
    model.add(RNN(HIDDEN_SIZE, return_sequences=True))

# For each of step of the output sequence, decide which character should be chosen
model.add(TimeDistributed(Dense(len(chars))))
model.add(Activation('softmax'))

Build model...
CPU times: user 835 ms, sys: 37.6 ms, total: 873 ms
Wall time: 897 ms


In [22]:
%%time

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

# Train the model each generation and show predictions against the validation dataset
for iteration in range(1, 20):
    print()
    print('-' * 50)
    print('Iteration', iteration)
    model.fit(X_train, y_train, batch_size=BATCH_SIZE, nb_epoch=1,
              validation_data=(X_val, y_val))
    
    score = model.evaluate(X_val, y_val, verbose=0)
    print('\n')
    print('Test score:', score[0])
    print('Test accuracy:', score[1])
    print('\n')


--------------------------------------------------
Iteration 1




Train on 45000 samples, validate on 5000 samples
Epoch 1/1


Test score: 1.67726400871
Test accuracy: 0.14805



--------------------------------------------------
Iteration 2
Train on 45000 samples, validate on 5000 samples
Epoch 1/1

KeyboardInterrupt: 