In [1]:
from keras.models import Sequential
from keras import layers
import numpy as np
from six.moves import range

Using TensorFlow backend.


In [2]:
class colors:
    ok = '\033[92m'
    fail = '\033[91m'
    close = '\033[0m'

In [3]:
TRAINING_SIZE = 80000
DIGITS = 3
REVERSE = False
MAXLEN = DIGITS + 1 + DIGITS
chars = '0123456789- '
RNN = layers.LSTM
HIDDEN_SIZE = 128
BATCH_SIZE = 128
LAYERS = 1

In [4]:
class CharacterTable(object):
    def __init__(self, chars):
        self.chars = sorted(set(chars))
        self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
        self.indices_char = dict((i, c) for i, c in enumerate(self.chars))
    
    def encode(self, C, num_rows):
        x = np.zeros((num_rows, len(self.chars)))
        for i, c in enumerate(C):
            x[i, self.char_indices[c]] = 1
        return x
    
    def decode(self, x, calc_argmax=True):
        if calc_argmax:
            x = x.argmax(axis=-1)
        return "".join(self.indices_char[i] for i in x)

In [5]:
ctable = CharacterTable(chars)

In [6]:
ctable.indices_char

{0: ' ',
 1: '-',
 2: '0',
 3: '1',
 4: '2',
 5: '3',
 6: '4',
 7: '5',
 8: '6',
 9: '7',
 10: '8',
 11: '9'}

In [7]:
questions = []
expected = []
seen = set()
print('Generating data...')
while len(questions) < TRAINING_SIZE:
    f = lambda: int(''.join(np.random.choice(list('0123456789')) for i in range(np.random.randint(1, DIGITS + 1))))
    a, b = f(), f()
    if b > a:
        b = a^b
        a = a^b
        b = a^b
    key = tuple(sorted((a, b)))
    if key in seen:
        continue
    seen.add(key)
    q = '{}-{}'.format(a, b)
    query = q + ' ' * (MAXLEN - len(q))
    ans = str(a - b)
    ans += ' ' * (DIGITS + 1 - len(ans))
    if REVERSE:
        query = query[::-1]
    questions.append(query)
    expected.append(ans)
print('Total addition questions:', len(questions))

Generating data...
Total addition questions: 80000


In [8]:
print(questions[:5], expected[:5])

['980-2  ', '4-1    ', '62-21  ', '75-35  ', '73-2   '] ['978 ', '3   ', '41  ', '40  ', '71  ']


In [9]:
print('Vectorization...')
x = np.zeros((len(questions), MAXLEN, len(chars)), dtype=np.bool)
y = np.zeros((len(expected), DIGITS + 1, len(chars)), dtype=np.bool)
for i, sentence in enumerate(questions):
    x[i] = ctable.encode(sentence, MAXLEN)
for i, sentence in enumerate(expected):
    y[i] = ctable.encode(sentence, DIGITS + 1)

Vectorization...


In [10]:
indices = np.arange(len(y))
np.random.shuffle(indices)
x = x[indices]
y = y[indices]

# train_test_split
train_x = x[:20000]
train_y = y[:20000]
test_x = x[20000:]
test_y = y[20000:]

split_at = len(train_x) - len(train_x) // 10
(x_train, x_val) = train_x[:split_at], train_x[split_at:]
(y_train, y_val) = train_y[:split_at], train_y[split_at:]

print('Training Data:')
print(x_train.shape)
print(y_train.shape)

print('Validation Data:')
print(x_val.shape)
print(y_val.shape)

print('Testing Data:')
print(test_x.shape)
print(test_y.shape)

Training Data:
(18000, 7, 12)
(18000, 4, 12)
Validation Data:
(2000, 7, 12)
(2000, 4, 12)
Testing Data:
(60000, 7, 12)
(60000, 4, 12)


In [11]:
print("input: ", x_train[:3], '\n\n', "label: ", y_train[:3])

input:  [[[False False False False False False False  True False False False
   False]
  [False False False  True False False False False False False False
   False]
  [False False False False False False False False False False  True
   False]
  [False  True False False False False False False False False False
   False]
  [False False False  True False False False False False False False
   False]
  [False False False False  True False False False False False False
   False]
  [False False False False  True False False False False False False
   False]]

 [[False False False False False False False False  True False False
   False]
  [False False False False False False False False False False False
    True]
  [False False False False False False False False  True False False
   False]
  [False  True False False False False False False False False False
   False]
  [False False False False False  True False False False False False
   False]
  [False False False False False False Fal

In [12]:
print('Build model...')
model = Sequential()
model.add(RNN(HIDDEN_SIZE, input_shape=(MAXLEN, len(chars))))
model.add(layers.RepeatVector(DIGITS + 1))
for _ in range(LAYERS):
    model.add(RNN(HIDDEN_SIZE, return_sequences=True))

model.add(layers.TimeDistributed(layers.Dense(len(chars), activation='softmax')))
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
model.summary()

Build model...
Instructions for updating:
Colocations handled automatically by placer.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 128)               72192     
_________________________________________________________________
repeat_vector_1 (RepeatVecto (None, 4, 128)            0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 4, 128)            131584    
_________________________________________________________________
time_distributed_1 (TimeDist (None, 4, 12)             1548      
Total params: 205,324
Trainable params: 205,324
Non-trainable params: 0
_________________________________________________________________


In [13]:
for iteration in range(10):
    print()
    print('-' * 50)
    print('Iteration', iteration)
    model.fit(x_train, y_train,
              batch_size=BATCH_SIZE,
              epochs=10,
              validation_data=(x_val, y_val))
    for i in range(10):
        ind = np.random.randint(0, len(x_val))
        rowx, rowy = x_val[np.array([ind])], y_val[np.array([ind])]
        preds = model.predict_classes(rowx, verbose=0)
        q = ctable.decode(rowx[0])
        correct = ctable.decode(rowy[0])
        guess = ctable.decode(preds[0], calc_argmax=False)
        print('Q', q[::-1] if REVERSE else q, end=' ')
        print('T', correct, end=' ')
        if correct == guess:
            print(colors.ok + '☑' + colors.close, end=' ')
        else:
            print(colors.fail + '☒' + colors.close, end=' ')
        print(guess)


--------------------------------------------------
Iteration 0
Instructions for updating:
Use tf.cast instead.
Train on 18000 samples, validate on 2000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Q 652-29  T 623  [91m☒[0m 647 
Q 645-84  T 561  [91m☒[0m 565 
Q 715-40  T 675  [91m☒[0m 680 
Q 752-126 T 626  [91m☒[0m 654 
Q 321-83  T 238  [91m☒[0m 293 
Q 166-20  T 146  [91m☒[0m 130 
Q 326-5   T 321  [91m☒[0m 213 
Q 859-712 T 147  [91m☒[0m 45  
Q 514-124 T 390  [91m☒[0m 492 
Q 809-526 T 283  [91m☒[0m 102 

--------------------------------------------------
Iteration 1
Train on 18000 samples, validate on 2000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Q 738-269 T 469  [91m☒[0m 464 
Q 968-485 T 483  [91m☒[0m 416 
Q 394-96  T 298  [91m☒[0m 200 
Q 359-76  T 283  [91m☒[0m 280 
Q 846-838 T 8    [91m☒[0m 1   
Q 400-84 

Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Q 933-933 T 0    [91m☒[0m 10  
Q 951-592 T 359  [91m☒[0m 360 
Q 387-78  T 309  [91m☒[0m 300 
Q 879-76  T 803  [91m☒[0m 802 
Q 509-70  T 439  [91m☒[0m 449 
Q 864-46  T 818  [92m☑[0m 818 
Q 227-38  T 189  [91m☒[0m 188 
Q 519-66  T 453  [92m☑[0m 453 
Q 632-45  T 587  [92m☑[0m 587 
Q 419-50  T 369  [91m☒[0m 379 

--------------------------------------------------
Iteration 5
Train on 18000 samples, validate on 2000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Q 830-4   T 826  [92m☑[0m 826 
Q 593-239 T 354  [92m☑[0m 354 
Q 952-76  T 876  [92m☑[0m 876 
Q 624-89  T 535  [92m☑[0m 535 
Q 629-309 T 320  [91m☒[0m 310 
Q 34-18   T 16   [92m☑[0m 16  
Q 700-478 T 222  [91m☒[0m 223 
Q 30-16   T 14   [91m☒[0m 13  
Q 344-67  T 277  [92m☑[0m 277 
Q 854-619 T 235  [91m☒[0m 234 

------------------------------------------------

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Q 978-621 T 357  [92m☑[0m 357 
Q 696-602 T 94   [91m☒[0m 9   
Q 891-27  T 864  [92m☑[0m 864 
Q 737-463 T 274  [92m☑[0m 274 
Q 416-32  T 384  [92m☑[0m 384 
Q 566-52  T 514  [92m☑[0m 514 
Q 770-118 T 652  [92m☑[0m 652 
Q 201-8   T 193  [92m☑[0m 193 
Q 985-74  T 911  [92m☑[0m 911 
Q 207-87  T 120  [91m☒[0m 110 


In [14]:
Right = 0
Wrong = 0
for i in range(10000):
    ind = np.random.randint(0, len(test_x))
    rowx, rowy = test_x[np.array([i])], test_y[np.array([i])]
    preds = model.predict_classes(rowx, verbose=0)
    q = ctable.decode(rowx[0])
    correct = ctable.decode(rowy[0])
    guess = ctable.decode(preds[0], calc_argmax=False)
    
    #print('Q', q[::-1] if REVERSE else q, end=' ')
    #print('T', correct, end=' ')

    if correct == guess:
        Right += 1
    else:
        Wrong += 1
print(Right/10000)

0.7995
