In [2]:
from keras.models import Sequential
from keras import layers
import numpy as np
from six.moves import range

Using TensorFlow backend.


# Parameters Config

In [3]:
class colors:
    ok = '\033[92m'
    fail = '\033[91m'
    close = '\033[0m'

In [4]:
TRAINING_SIZE = 80000
DIGITS = 3
REVERSE = False
MAXLEN = DIGITS + 1 + DIGITS
chars = '0123456789+ '
RNN = layers.LSTM
HIDDEN_SIZE = 128
BATCH_SIZE = 128
LAYERS = 1

In [5]:
class CharacterTable(object):
    def __init__(self, chars):
        self.chars = sorted(set(chars))
        self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
        self.indices_char = dict((i, c) for i, c in enumerate(self.chars))
    
    def encode(self, C, num_rows): # ex: (C=43+7, num_rows=MAXLEN) or (C=50, num_rows=DIGITS + 1)
        x = np.zeros((num_rows, len(self.chars))) 
        for i, c in enumerate(C):
            x[i, self.char_indices[c]] = 1
        return x
    
    def decode(self, x, calc_argmax=True):
        if calc_argmax:
            x = x.argmax(axis=-1)
        return "".join(self.indices_char[i] for i in x)

In [6]:
ctable = CharacterTable(chars)

In [7]:
ctable.indices_char

{0: ' ',
 1: '+',
 2: '0',
 3: '1',
 4: '2',
 5: '3',
 6: '4',
 7: '5',
 8: '6',
 9: '7',
 10: '8',
 11: '9'}

In [8]:
ctable.chars

[' ', '+', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

In [9]:
ctable.char_indices

{' ': 0,
 '+': 1,
 '0': 2,
 '1': 3,
 '2': 4,
 '3': 5,
 '4': 6,
 '5': 7,
 '6': 8,
 '7': 9,
 '8': 10,
 '9': 11}

# Data Generation

In [10]:
questions = []
expected = []
seen = set()      # 用來確保資料不會重複
print('Generating data...')
while len(questions) < TRAINING_SIZE:
    # f 是一個產生 1-3 位數亂數的 function，我覺得助教的寫法有點強
    f = lambda: int(''.join(np.random.choice(list('0123456789')) for i in range(np.random.randint(1, DIGITS + 1))))
    a, b = f(), f()
    key = tuple(sorted((a, b)))
    if key in seen: 
        continue
    seen.add(key)
    q = '{}+{}'.format(a, b) # 並沒有說小的數字放前面 大的數字放後面
    query = q + ' ' * (MAXLEN - len(q)) # 確保 input 長度一樣
    ans = str(a + b)
    ans += ' ' * (DIGITS + 1 - len(ans))
    if REVERSE:
        query = query[::-1] # [::-1] 會讓 query 相反過來，例如 "43+7   " 變成 "   7+34"，這個寫法我都不知道
    questions.append(query)
    expected.append(ans)
print('Total addition questions:', len(questions))

Generating data...
Total addition questions: 80000


In [11]:
print(questions[:5], expected[:5])

['390+58 ', '77+25  ', '26+6   ', '3+12   ', '7+230  '] ['448 ', '102 ', '32  ', '15  ', '237 ']


# Processing

In [12]:
print('Vectorization...')
x = np.zeros((len(questions), MAXLEN, len(chars)), dtype=np.bool) 
y = np.zeros((len(expected), DIGITS + 1, len(chars)), dtype=np.bool)
for i, sentence in enumerate(questions):
    x[i] = ctable.encode(sentence, MAXLEN) # 取每一筆 sentence 的意思，每一筆 sentence 是用 7 * 12 的矩陣表示
for i, sentence in enumerate(expected):
    y[i] = ctable.encode(sentence, DIGITS + 1)

Vectorization...


In [13]:
print("question[0]:",questions[0])
print("x[0][1]:    ",x[0][1])
print("x.shape:    ",x.shape)
print("x[0].shape: ",x[0].shape)
ggg = np.zeros((len(questions), MAXLEN, len(chars)), dtype=np.bool) 
for i, sentence in enumerate(questions):
    if i == 1:
        break
    print("i:",i," sentence:",sentence)
    ggg[0] = ctable.encode(sentence, MAXLEN)
    print(ggg[0])
print(x[0])

question[0]: 390+58 
x[0][1]:     [False False False False False False False False False False False  True]
x.shape:     (80000, 7, 12)
x[0].shape:  (7, 12)
i: 0  sentence: 390+58 
[[False False False False False  True False False False False False False]
 [False False False False False False False False False False False  True]
 [False False  True False False False False False False False False False]
 [False  True False False False False False False False False False False]
 [False False False False False False False  True False False False False]
 [False False False False False False False False False False  True False]
 [ True False False False False False False False False False False False]]
[[False False False False False  True False False False False False False]
 [False False False False False False False False False False False  True]
 [False False  True False False False False False False False False False]
 [False  True False False False False False False False False False 

In [14]:
ya = np.arange(len(y))
print(ya)
np.random.shuffle(ya)
print(ya)

[    0     1     2 ... 79997 79998 79999]
[26296  7172 59081 ... 14502 43188 54901]


In [15]:
len(test_x)

NameError: name 'test_x' is not defined

In [18]:
indices = np.arange(len(y))
np.random.shuffle(indices) # 原來把資料弄亂還可以這樣寫
x = x[indices]
y = y[indices]

# train_test_split
train_x = x[:20000]    # 20000
train_y = y[:20000]
test_x = x[20000:]     # 60000
test_y = y[20000:]

split_at = len(train_x) - len(train_x) // 10  # split_at=18000
(x_train, x_val) = train_x[:split_at], train_x[split_at:]
(y_train, y_val) = train_y[:split_at], train_y[split_at:]

print('Training Data:')
print(x_train.shape)
print(y_train.shape)

print('Validation Data:')
print(x_val.shape)
print(y_val.shape)

print('Testing Data:')
print(test_x.shape)
print(test_y.shape)

Training Data:
(18000, 7, 12)
(18000, 4, 12)
Validation Data:
(2000, 7, 12)
(2000, 4, 12)
Testing Data:
(60000, 7, 12)
(60000, 4, 12)


In [17]:
print("input: ", x_train[:3], '\n\n', "label: ", y_train[:3])

input:  [[[False False False False False False False False False False False
    True]
  [False  True False False False False False False False False False
   False]
  [False False False False False False False False False False  True
   False]
  [False False False False False False False False False  True False
   False]
  [False False  True False False False False False False False False
   False]
  [ True False False False False False False False False False False
   False]
  [ True False False False False False False False False False False
   False]]

 [[False False False False False  True False False False False False
   False]
  [False False  True False False False False False False False False
   False]
  [False False  True False False False False False False False False
   False]
  [False  True False False False False False False False False False
   False]
  [False False False  True False False False False False False False
   False]
  [False False False  True False False Fal

# Build Model

In [49]:
print('Build model...')

############################################
##### Build your own model here ############
############################################
model = Sequential()
# "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE.
# Note: In a situation where your input sequences have a variable length,
# use input_shape=(None, num_feature).
# model.add(RNN(HIDDEN_SIZE, input_shape=(MAXLEN, len(chars)),return_sequences=True))
model.add(RNN(HIDDEN_SIZE, input_shape=(MAXLEN, len(chars))))
# As the decoder RNN's input, repeatedly provide with the last output of
# RNN for each time step. Repeat 'DIGITS + 1' times as that's the maximum
# length of output, e.g., when DIGITS=3, max output is 999+999=1998.
model.add(layers.RepeatVector(DIGITS + 1))
# model.add(layers.Lambda(lambda x: x[:,-(DIGITS + 1):,:]))
# The decoder RNN could be multiple layers stacked or a single layer.
for _ in range(LAYERS):
    # By setting return_sequences to True, return not only the last output but
    # all the outputs so far in the form of (num_samples, timesteps,
    # output_dim). This is necessary as TimeDistributed in the below expects
    # the first dimension to be the timesteps.
    model.add(RNN(HIDDEN_SIZE, return_sequences=True))

# Apply a dense layer to the every temporal slice of an input. For each of step
# of the output sequence, decide which character should be chosen.
model.add(layers.TimeDistributed(layers.Dense(len(chars), activation='softmax')))
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['acc'])
model.summary()

Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_27 (LSTM)               (None, 128)               72192     
_________________________________________________________________
repeat_vector_5 (RepeatVecto (None, 4, 128)            0         
_________________________________________________________________
lstm_28 (LSTM)               (None, 4, 128)            131584    
_________________________________________________________________
time_distributed_10 (TimeDis (None, 4, 12)             1548      
Total params: 205,324
Trainable params: 205,324
Non-trainable params: 0
_________________________________________________________________


# Training

In [50]:
for iteration in range(100):
    print()
    print('-' * 50)
    print('Iteration', iteration)
    model.fit(x_train, y_train,
              batch_size=BATCH_SIZE,
              epochs=1,
              validation_data=(x_val, y_val))
    for i in range(10):
        ind = np.random.randint(0, len(x_val))
        rowx, rowy = x_val[np.array([ind])], y_val[np.array([ind])]
        preds = model.predict_classes(rowx, verbose=0)
        q = ctable.decode(rowx[0])
        correct = ctable.decode(rowy[0])
        guess = ctable.decode(preds[0], calc_argmax=False)
        print('Q', q[::-1] if REVERSE else q, end=' ')
        print('T', correct, end=' ')
        if correct == guess:
            print(colors.ok + '☑' + colors.close, end=' ')
        else:
            print(colors.fail + '☒' + colors.close, end=' ')
        print(guess)


--------------------------------------------------
Iteration 0
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 962+95  T 1057 [91m☒[0m 118 
Q 939+48  T 987  [91m☒[0m 110 
Q 442+72  T 514  [91m☒[0m 128 
Q 976+51  T 1027 [91m☒[0m 118 
Q 749+93  T 842  [91m☒[0m 110 
Q 345+7   T 352  [91m☒[0m 12  
Q 51+884  T 935  [91m☒[0m 118 
Q 888+495 T 1383 [91m☒[0m 1100
Q 459+55  T 514  [91m☒[0m 118 
Q 929+961 T 1890 [91m☒[0m 1101

--------------------------------------------------
Iteration 1
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 420+39  T 459  [91m☒[0m 122 
Q 202+793 T 995  [91m☒[0m 100 
Q 391+8   T 399  [91m☒[0m 122 
Q 50+128  T 178  [91m☒[0m 122 
Q 817+23  T 840  [91m☒[0m 122 
Q 172+62  T 234  [91m☒[0m 122 
Q 91+628  T 719  [91m☒[0m 101 
Q 309+28  T 337  [91m☒[0m 102 
Q 44+405  T 449  [91m☒[0m 122 
Q 455+57  T 512  [91m☒[0m 102 

--------------------------------------------------
Iteration 2
Train on 18000 samples, valida

Q 99+151  T 250  [91m☒[0m 200 
Q 720+297 T 1017 [91m☒[0m 100 
Q 71+940  T 1011 [91m☒[0m 1017
Q 648+697 T 1345 [91m☒[0m 1450
Q 311+202 T 513  [91m☒[0m 435 
Q 94+517  T 611  [91m☒[0m 655 
Q 716+9   T 725  [91m☒[0m 768 
Q 668+27  T 695  [91m☒[0m 785 
Q 475+79  T 554  [91m☒[0m 555 
Q 263+82  T 345  [91m☒[0m 355 

--------------------------------------------------
Iteration 15
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 11+810  T 821  [91m☒[0m 833 
Q 8+582   T 590  [91m☒[0m 599 
Q 3+707   T 710  [91m☒[0m 714 
Q 647+14  T 661  [91m☒[0m 688 
Q 904+853 T 1757 [91m☒[0m 1727
Q 448+92  T 540  [91m☒[0m 522 
Q 42+232  T 274  [91m☒[0m 357 
Q 840+812 T 1652 [91m☒[0m 1667
Q 409+709 T 1118 [91m☒[0m 1077
Q 802+35  T 837  [91m☒[0m 845 

--------------------------------------------------
Iteration 16
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 227+392 T 619  [91m☒[0m 525 
Q 289+781 T 1070 [91m☒[0m 1076
Q 80+430  T 510  [91m☒[0

Q 48+973  T 1021 [91m☒[0m 1022
Q 639+991 T 1630 [92m☑[0m 1630
Q 995+791 T 1786 [91m☒[0m 1877
Q 93+897  T 990  [91m☒[0m 981 
Q 341+458 T 799  [91m☒[0m 707 
Q 423+513 T 936  [91m☒[0m 934 
Q 4+72    T 76   [91m☒[0m 77  
Q 578+8   T 586  [92m☑[0m 586 
Q 550+948 T 1498 [91m☒[0m 1487
Q 637+646 T 1283 [91m☒[0m 1282

--------------------------------------------------
Iteration 29
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 72+296  T 368  [91m☒[0m 369 
Q 599+226 T 825  [92m☑[0m 825 
Q 5+652   T 657  [92m☑[0m 657 
Q 1+642   T 643  [91m☒[0m 642 
Q 79+3    T 82   [91m☒[0m 90  
Q 417+14  T 431  [91m☒[0m 420 
Q 142+669 T 811  [91m☒[0m 722 
Q 312+64  T 376  [91m☒[0m 367 
Q 50+289  T 339  [91m☒[0m 359 
Q 886+111 T 997  [92m☑[0m 997 

--------------------------------------------------
Iteration 30
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 187+573 T 760  [91m☒[0m 750 
Q 7+221   T 228  [91m☒[0m 227 
Q 50+186  T 236  [91m☒[0

Q 23+627  T 650  [92m☑[0m 650 
Q 96+320  T 416  [92m☑[0m 416 
Q 67+307  T 374  [92m☑[0m 374 
Q 485+53  T 538  [92m☑[0m 538 
Q 0+888   T 888  [91m☒[0m 898 
Q 727+74  T 801  [91m☒[0m 701 
Q 6+986   T 992  [91m☒[0m 991 
Q 686+371 T 1057 [92m☑[0m 1057
Q 466+74  T 540  [91m☒[0m 530 
Q 98+396  T 494  [91m☒[0m 493 

--------------------------------------------------
Iteration 43
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 45+702  T 747  [92m☑[0m 747 
Q 322+537 T 859  [91m☒[0m 869 
Q 317+89  T 406  [92m☑[0m 406 
Q 344+354 T 698  [91m☒[0m 798 
Q 47+48   T 95   [92m☑[0m 95  
Q 436+11  T 447  [91m☒[0m 448 
Q 768+404 T 1172 [92m☑[0m 1172
Q 82+89   T 171  [92m☑[0m 171 
Q 287+394 T 681  [91m☒[0m 771 
Q 545+674 T 1219 [92m☑[0m 1219

--------------------------------------------------
Iteration 44
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 455+134 T 589  [92m☑[0m 589 
Q 87+267  T 354  [92m☑[0m 354 
Q 599+226 T 825  [91m☒[0

Q 138+80  T 218  [92m☑[0m 218 
Q 502+20  T 522  [91m☒[0m 512 
Q 752+89  T 841  [92m☑[0m 841 
Q 79+208  T 287  [92m☑[0m 287 
Q 659+312 T 971  [92m☑[0m 971 
Q 1+277   T 278  [92m☑[0m 278 
Q 436+11  T 447  [92m☑[0m 447 
Q 621+27  T 648  [92m☑[0m 648 
Q 86+561  T 647  [92m☑[0m 647 
Q 0+908   T 908  [92m☑[0m 908 

--------------------------------------------------
Iteration 57
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 27+561  T 588  [92m☑[0m 588 
Q 80+92   T 172  [92m☑[0m 172 
Q 49+867  T 916  [92m☑[0m 916 
Q 73+489  T 562  [92m☑[0m 562 
Q 83+952  T 1035 [92m☑[0m 1035
Q 383+365 T 748  [91m☒[0m 758 
Q 801+674 T 1475 [92m☑[0m 1475
Q 656+343 T 999  [92m☑[0m 999 
Q 944+97  T 1041 [91m☒[0m 1031
Q 791+602 T 1393 [91m☒[0m 1493

--------------------------------------------------
Iteration 58
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 88+283  T 371  [92m☑[0m 371 
Q 236+971 T 1207 [92m☑[0m 1207
Q 98+719  T 817  [92m☑[0

Q 876+518 T 1394 [92m☑[0m 1394
Q 292+6   T 298  [92m☑[0m 298 
Q 57+14   T 71   [91m☒[0m 61  
Q 658+14  T 672  [92m☑[0m 672 
Q 456+670 T 1126 [92m☑[0m 1126
Q 27+148  T 175  [92m☑[0m 175 
Q 2+846   T 848  [92m☑[0m 848 
Q 631+76  T 707  [92m☑[0m 707 
Q 23+352  T 375  [92m☑[0m 375 
Q 228+305 T 533  [92m☑[0m 533 

--------------------------------------------------
Iteration 71
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 789+72  T 861  [92m☑[0m 861 
Q 455+134 T 589  [92m☑[0m 589 
Q 344+354 T 698  [91m☒[0m 798 
Q 22+175  T 197  [92m☑[0m 197 
Q 631+950 T 1581 [92m☑[0m 1581
Q 59+567  T 626  [92m☑[0m 626 
Q 546+30  T 576  [92m☑[0m 576 
Q 863+216 T 1079 [92m☑[0m 1079
Q 486+677 T 1163 [92m☑[0m 1163
Q 14+730  T 744  [92m☑[0m 744 

--------------------------------------------------
Iteration 72
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 682+48  T 730  [92m☑[0m 730 
Q 282+296 T 578  [92m☑[0m 578 
Q 830+426 T 1256 [92m☑[0

Q 5+114   T 119  [92m☑[0m 119 
Q 84+19   T 103  [92m☑[0m 103 
Q 13+915  T 928  [92m☑[0m 928 
Q 538+723 T 1261 [92m☑[0m 1261
Q 50+128  T 178  [92m☑[0m 178 
Q 48+207  T 255  [92m☑[0m 255 
Q 233+68  T 301  [92m☑[0m 301 
Q 656+343 T 999  [92m☑[0m 999 
Q 295+140 T 435  [91m☒[0m 425 
Q 899+91  T 990  [91m☒[0m 980 

--------------------------------------------------
Iteration 85
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 63+724  T 787  [92m☑[0m 787 
Q 289+82  T 371  [92m☑[0m 371 
Q 171+71  T 242  [92m☑[0m 242 
Q 62+737  T 799  [92m☑[0m 799 
Q 23+352  T 375  [92m☑[0m 375 
Q 300+428 T 728  [92m☑[0m 728 
Q 147+842 T 989  [92m☑[0m 989 
Q 87+267  T 354  [92m☑[0m 354 
Q 19+528  T 547  [92m☑[0m 547 
Q 681+16  T 697  [92m☑[0m 697 

--------------------------------------------------
Iteration 86
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 538+94  T 632  [92m☑[0m 632 
Q 748+673 T 1421 [92m☑[0m 1421
Q 808+97  T 905  [91m☒[0

Q 96+607  T 703  [92m☑[0m 703 
Q 162+44  T 206  [92m☑[0m 206 
Q 1+430   T 431  [92m☑[0m 431 
Q 72+827  T 899  [92m☑[0m 899 
Q 81+865  T 946  [92m☑[0m 946 
Q 860+16  T 876  [92m☑[0m 876 
Q 331+874 T 1205 [92m☑[0m 1205
Q 222+833 T 1055 [92m☑[0m 1055
Q 616+73  T 689  [92m☑[0m 689 
Q 530+656 T 1186 [92m☑[0m 1186

--------------------------------------------------
Iteration 99
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 949+7   T 956  [92m☑[0m 956 
Q 940+1   T 941  [92m☑[0m 941 
Q 10+69   T 79   [92m☑[0m 79  
Q 138+2   T 140  [92m☑[0m 140 
Q 923+53  T 976  [92m☑[0m 976 
Q 34+319  T 353  [92m☑[0m 353 
Q 827+51  T 878  [92m☑[0m 878 
Q 51+170  T 221  [92m☑[0m 221 
Q 208+141 T 349  [91m☒[0m 359 
Q 607+677 T 1284 [92m☑[0m 1284


# Testing

In [52]:
print("MSG : Prediction")
#####################################################
## Try to test and evaluate your model ##############
## ex. test_x = ["555+175", "860+7  ", "340+29 "]
## ex. test_y = ["730 ", "867 ", "369 "] 
#####################################################
loss, acc = model.evaluate(test_x,test_y)
print("Loss:",loss)
print("Accuracy:",acc)

MSG : Prediction
Loss: 0.05882277712672949
Accuracy: 0.979375


In [47]:
model.metrics_names

['loss', 'acc']