# Adder and Subtractor
回家記得改一下資料生成的方式，隨機產生 + -

In [1]:
from keras.models import Sequential
from keras import layers
import numpy as np
from six.moves import range

Using TensorFlow backend.


In [2]:
class colors:
    ok = '\033[92m'
    fail = '\033[91m'
    close = '\033[0m'

In [18]:
TRAINING_SIZE = 80000
DIGITS = 3
REVERSE = False
MAXLEN = DIGITS + 1 + DIGITS
chars = '0123456789-+ '
RNN = layers.LSTM
HIDDEN_SIZE = 128
BATCH_SIZE = 128
LAYERS = 1

In [20]:
class CharacterTable(object):
    def __init__(self, chars):
        self.chars = sorted(set(chars))
        self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
        self.indices_char = dict((i, c) for i, c in enumerate(self.chars))
    
    def encode(self, C, num_rows): # ex: (C=43+7, num_rows=MAXLEN) or (C=50, num_rows=DIGITS + 1)
        x = np.zeros((num_rows, len(self.chars))) 
        for i, c in enumerate(C):
            x[i, self.char_indices[c]] = 1
        return x
    
    def decode(self, x, calc_argmax=True):
        if calc_argmax:
            x = x.argmax(axis=-1)
        return "".join(self.indices_char[i] for i in x)

In [21]:
ctable = CharacterTable(chars)

In [15]:
questions = []
expected = []
seen = set()      # 用來確保資料不會重複
print('Generating data...')
itor = 0
while len(questions) < TRAINING_SIZE:
    # f 是一個產生 1-3 位數亂數的 function，我覺得助教的寫法有點強
    f = lambda: int(''.join(np.random.choice(list('0123456789')) for i in range(np.random.randint(1, DIGITS + 1))))
    a, b = f(), f()
    key = tuple(sorted((a, b)))
    if key in seen: 
        continue
    seen.add(key)
    if(itor % 2):
        q = '{}-{}'.format(a, b) # 並沒有說小的數字放前面 大的數字放後面
        query = q + ' ' * (MAXLEN - len(q)) # 確保 input 長度一樣
        ans = str(a - b)
        ans += ' ' * (DIGITS + 1 - len(ans))
        if REVERSE:
            query = query[::-1] # [::-1] 會讓 query 相反過來，例如 "43+7   " 變成 "   7+34"，這個寫法我都不知道
        questions.append(query)
        expected.append(ans)
    else:
        q = '{}+{}'.format(a, b) # 並沒有說小的數字放前面 大的數字放後面
        query = q + ' ' * (MAXLEN - len(q)) # 確保 input 長度一樣
        ans = str(a + b)
        ans += ' ' * (DIGITS + 1 - len(ans))
        if REVERSE:
            query = query[::-1] # [::-1] 會讓 query 相反過來，例如 "43+7   " 變成 "   7+34"，這個寫法我都不知道
        questions.append(query)
        expected.append(ans)
    itor += 1
    
print('Total addition questions:', len(questions))

Generating data...
Total addition questions: 80000


In [16]:
print(questions[:5], expected[:5])

['61+7   ', '0-2    ', '18+63  ', '173-65 ', '2+30   '] ['68  ', '-2  ', '81  ', '108 ', '32  ']


In [22]:
print('Vectorization...')
x = np.zeros((len(questions), MAXLEN, len(chars)), dtype=np.bool) 
y = np.zeros((len(expected), DIGITS + 1, len(chars)), dtype=np.bool)
for i, sentence in enumerate(questions):
    x[i] = ctable.encode(sentence, MAXLEN) # 取每一筆 sentence 的意思，每一筆 sentence 是用 7 * 12 的矩陣表示
for i, sentence in enumerate(expected):
    y[i] = ctable.encode(sentence, DIGITS + 1)

Vectorization...


In [23]:
indices = np.arange(len(y))
np.random.shuffle(indices) # 原來把資料弄亂還可以這樣寫
x = x[indices]
y = y[indices]

# train_test_split
train_x = x[:20000]    # 20000
train_y = y[:20000]
test_x = x[20000:]     # 60000
test_y = y[20000:]

split_at = len(train_x) - len(train_x) // 10  # split_at=18000
(x_train, x_val) = train_x[:split_at], train_x[split_at:]
(y_train, y_val) = train_y[:split_at], train_y[split_at:]

print('Training Data:')
print(x_train.shape)
print(y_train.shape)

print('Validation Data:')
print(x_val.shape)
print(y_val.shape)

print('Testing Data:')
print(test_x.shape)
print(test_y.shape)

Training Data:
(18000, 7, 13)
(18000, 4, 13)
Validation Data:
(2000, 7, 13)
(2000, 4, 13)
Testing Data:
(60000, 7, 13)
(60000, 4, 13)


In [24]:
print('Build model...')

############################################
##### Build your own model here ############
############################################
model = Sequential()
# "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE.
# Note: In a situation where your input sequences have a variable length,
# use input_shape=(None, num_feature).
# model.add(RNN(HIDDEN_SIZE, input_shape=(MAXLEN, len(chars)),return_sequences=True))
model.add(RNN(HIDDEN_SIZE, input_shape=(MAXLEN, len(chars))))
# As the decoder RNN's input, repeatedly provide with the last output of
# RNN for each time step. Repeat 'DIGITS + 1' times as that's the maximum
# length of output, e.g., when DIGITS=3, max output is 999+999=1998.
model.add(layers.RepeatVector(DIGITS + 1))
# model.add(layers.Lambda(lambda x: x[:,-(DIGITS + 1):,:]))
# The decoder RNN could be multiple layers stacked or a single layer.
for _ in range(LAYERS):
    # By setting return_sequences to True, return not only the last output but
    # all the outputs so far in the form of (num_samples, timesteps,
    # output_dim). This is necessary as TimeDistributed in the below expects
    # the first dimension to be the timesteps.
    model.add(RNN(HIDDEN_SIZE, return_sequences=True))

# Apply a dense layer to the every temporal slice of an input. For each of step
# of the output sequence, decide which character should be chosen.
model.add(layers.TimeDistributed(layers.Dense(len(chars), activation='softmax')))
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['acc'])
model.summary()

Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_3 (LSTM)                (None, 128)               72704     
_________________________________________________________________
repeat_vector_2 (RepeatVecto (None, 4, 128)            0         
_________________________________________________________________
lstm_4 (LSTM)                (None, 4, 128)            131584    
_________________________________________________________________
time_distributed_2 (TimeDist (None, 4, 13)             1677      
Total params: 205,965
Trainable params: 205,965
Non-trainable params: 0
_________________________________________________________________


In [27]:
for iteration in range(200):
    print()
    print('-' * 50)
    print('Iteration', iteration)
    model.fit(x_train, y_train,
              batch_size=BATCH_SIZE,
              epochs=1,
              validation_data=(x_val, y_val))
    for i in range(10):
        ind = np.random.randint(0, len(x_val))
        rowx, rowy = x_val[np.array([ind])], y_val[np.array([ind])]
        preds = model.predict_classes(rowx, verbose=0)
        q = ctable.decode(rowx[0])
        correct = ctable.decode(rowy[0])
        guess = ctable.decode(preds[0], calc_argmax=False)
        '''
        print('Q', q[::-1] if REVERSE else q, end=' ')
        print('T', correct, end=' ')
        if correct == guess:
            print(colors.ok + '☑' + colors.close, end=' ')
        else:
            print(colors.fail + '☒' + colors.close, end=' ')
        print(guess)
        '''


--------------------------------------------------
Iteration 0
Train on 18000 samples, validate on 2000 samples
Epoch 1/1

--------------------------------------------------
Iteration 1
Train on 18000 samples, validate on 2000 samples
Epoch 1/1

--------------------------------------------------
Iteration 2
Train on 18000 samples, validate on 2000 samples
Epoch 1/1

--------------------------------------------------
Iteration 3
Train on 18000 samples, validate on 2000 samples
Epoch 1/1

--------------------------------------------------
Iteration 4
Train on 18000 samples, validate on 2000 samples
Epoch 1/1

--------------------------------------------------
Iteration 5
Train on 18000 samples, validate on 2000 samples
Epoch 1/1

--------------------------------------------------
Iteration 6
Train on 18000 samples, validate on 2000 samples
Epoch 1/1

--------------------------------------------------
Iteration 7
Train on 18000 samples, validate on 2000 samples
Epoch 1/1

---------------


--------------------------------------------------
Iteration 33
Train on 18000 samples, validate on 2000 samples
Epoch 1/1

--------------------------------------------------
Iteration 34
Train on 18000 samples, validate on 2000 samples
Epoch 1/1

--------------------------------------------------
Iteration 35
Train on 18000 samples, validate on 2000 samples
Epoch 1/1

--------------------------------------------------
Iteration 36
Train on 18000 samples, validate on 2000 samples
Epoch 1/1

--------------------------------------------------
Iteration 37
Train on 18000 samples, validate on 2000 samples
Epoch 1/1

--------------------------------------------------
Iteration 38
Train on 18000 samples, validate on 2000 samples
Epoch 1/1

--------------------------------------------------
Iteration 39
Train on 18000 samples, validate on 2000 samples
Epoch 1/1

--------------------------------------------------
Iteration 40
Train on 18000 samples, validate on 2000 samples
Epoch 1/1

-------


--------------------------------------------------
Iteration 66
Train on 18000 samples, validate on 2000 samples
Epoch 1/1

--------------------------------------------------
Iteration 67
Train on 18000 samples, validate on 2000 samples
Epoch 1/1

--------------------------------------------------
Iteration 68
Train on 18000 samples, validate on 2000 samples
Epoch 1/1

--------------------------------------------------
Iteration 69
Train on 18000 samples, validate on 2000 samples
Epoch 1/1

--------------------------------------------------
Iteration 70
Train on 18000 samples, validate on 2000 samples
Epoch 1/1

--------------------------------------------------
Iteration 71
Train on 18000 samples, validate on 2000 samples
Epoch 1/1

--------------------------------------------------
Iteration 72
Train on 18000 samples, validate on 2000 samples
Epoch 1/1

--------------------------------------------------
Iteration 73
Train on 18000 samples, validate on 2000 samples
Epoch 1/1

-------

Train on 18000 samples, validate on 2000 samples
Epoch 1/1

--------------------------------------------------
Iteration 99
Train on 18000 samples, validate on 2000 samples
Epoch 1/1

--------------------------------------------------
Iteration 100
Train on 18000 samples, validate on 2000 samples
Epoch 1/1

--------------------------------------------------
Iteration 101
Train on 18000 samples, validate on 2000 samples
Epoch 1/1

--------------------------------------------------
Iteration 102
Train on 18000 samples, validate on 2000 samples
Epoch 1/1

--------------------------------------------------
Iteration 103
Train on 18000 samples, validate on 2000 samples
Epoch 1/1

--------------------------------------------------
Iteration 104
Train on 18000 samples, validate on 2000 samples
Epoch 1/1

--------------------------------------------------
Iteration 105
Train on 18000 samples, validate on 2000 samples
Epoch 1/1

--------------------------------------------------
Iteration 106
T


--------------------------------------------------
Iteration 131
Train on 18000 samples, validate on 2000 samples
Epoch 1/1

--------------------------------------------------
Iteration 132
Train on 18000 samples, validate on 2000 samples
Epoch 1/1

--------------------------------------------------
Iteration 133
Train on 18000 samples, validate on 2000 samples
Epoch 1/1

--------------------------------------------------
Iteration 134
Train on 18000 samples, validate on 2000 samples
Epoch 1/1

--------------------------------------------------
Iteration 135
Train on 18000 samples, validate on 2000 samples
Epoch 1/1

--------------------------------------------------
Iteration 136
Train on 18000 samples, validate on 2000 samples
Epoch 1/1

--------------------------------------------------
Iteration 137
Train on 18000 samples, validate on 2000 samples
Epoch 1/1

--------------------------------------------------
Iteration 138
Train on 18000 samples, validate on 2000 samples
Epoch 1/1



--------------------------------------------------
Iteration 164
Train on 18000 samples, validate on 2000 samples
Epoch 1/1

--------------------------------------------------
Iteration 165
Train on 18000 samples, validate on 2000 samples
Epoch 1/1

--------------------------------------------------
Iteration 166
Train on 18000 samples, validate on 2000 samples
Epoch 1/1

--------------------------------------------------
Iteration 167
Train on 18000 samples, validate on 2000 samples
Epoch 1/1

--------------------------------------------------
Iteration 168
Train on 18000 samples, validate on 2000 samples
Epoch 1/1

--------------------------------------------------
Iteration 169
Train on 18000 samples, validate on 2000 samples
Epoch 1/1

--------------------------------------------------
Iteration 170
Train on 18000 samples, validate on 2000 samples
Epoch 1/1

--------------------------------------------------
Iteration 171
Train on 18000 samples, validate on 2000 samples
Epoch 1/1



--------------------------------------------------
Iteration 197
Train on 18000 samples, validate on 2000 samples
Epoch 1/1

--------------------------------------------------
Iteration 198
Train on 18000 samples, validate on 2000 samples
Epoch 1/1

--------------------------------------------------
Iteration 199
Train on 18000 samples, validate on 2000 samples
Epoch 1/1


In [28]:
print("MSG : Prediction")
#####################################################
## Try to test and evaluate your model ##############
## ex. test_x = ["555+175", "860+7  ", "340+29 "]
## ex. test_y = ["730 ", "867 ", "369 "] 
#####################################################
loss, acc = model.evaluate(test_x,test_y)
print("Loss:",loss)
print("Accuracy:",acc)

MSG : Prediction
Loss: 0.454495231060187
Accuracy: 0.8840083333333333
