In [3]:
%env KERAS_BACKEND = tensorflow
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt

from keras.models import Sequential
from keras.layers import Dense,Activation
from keras.optimizers import SGD

from keras.datasets import mnist
from keras.utils import np_utils

env: KERAS_BACKEND=tensorflow


Using TensorFlow backend.


In [4]:
(x_train,y_train),(x_test,y_test)=mnist.load_data()

In [5]:
print("總共有%d訓練資料，每筆資料尺寸為 %d x %d" %x_train.shape)
print("總共有%d訓練資料，每筆資料尺寸為 %d x %d" %x_test.shape)

總共有60000訓練資料，每筆資料尺寸為 28 x 28
總共有10000訓練資料，每筆資料尺寸為 28 x 28


In [6]:
#因此用reshape調校一下。
x_train=x_train.reshape(60000,784)
x_test=x_test.reshape(10000,784)

In [7]:
#把訓練資料中的數字0,1資料取出來
x_train_01=x_train[y_train<=1]
x_test_01=x_test[y_test<=1]

In [8]:
#並將label轉換成one-hot encoding的形式
y_train_10 = np_utils.to_categorical(y_train,10)
y_test_10 = np_utils.to_categorical(y_test,10)

y_train_01=y_train[y_train<=1]
y_train_01 = np_utils.to_categorical(y_train_01,2)

y_test_01=y_test[y_test<=1]
y_test_01 = np_utils.to_categorical(y_test_01,2)

In [9]:
#確認訓練資料和測試資料的大小
x_train_01.shape,x_test_01.shape

((12665, 784), (2115, 784))

In [10]:
y_train_01.shape,y_test_01.shape

((12665, 2), (2115, 2))

In [11]:
#建構神經網路，先打開一個空的再用add去加層
model = Sequential()
#第一層
model.add(Dense(500,input_dim=784))
model.add(Activation('sigmoid'))
#第二層
model.add(Dense(500))
model.add(Activation('sigmoid'))
#用SOFTMAX當激發函數
model.add(Dense(10))
model.add(Activation('softmax'))

model.summary()

Instructions for updating:
Colocations handled automatically by placer.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 500)               392500    
_________________________________________________________________
activation_1 (Activation)    (None, 500)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 500)               250500    
_________________________________________________________________
activation_2 (Activation)    (None, 500)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 10)                5010      
_________________________________________________________________
activation_3 (Activation)    (None, 10)                0         
Total params: 648,010
Trainable params: 648,010
Non-trainable params: 

In [12]:
#觀察model layers，網路層清單
model.layers

[<keras.layers.core.Dense at 0x289358a6550>,
 <keras.layers.core.Activation at 0x289358a6860>,
 <keras.layers.core.Dense at 0x289358a6cc0>,
 <keras.layers.core.Activation at 0x289358a6f98>,
 <keras.layers.core.Dense at 0x28939ac2fd0>,
 <keras.layers.core.Activation at 0x28939ac2198>]

In [13]:
#以list的形式來建構神經網路
first_layer=[Dense(500,input_dim=784),Activation('sigmoid')]
second_layer=[Dense(500),Activation('sigmoid')]
output_layer=[Dense(10),Activation('softmax')]

In [14]:
#用+號合併
model = Sequential(first_layer+second_layer+output_layer)

In [15]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 500)               392500    
_________________________________________________________________
activation_4 (Activation)    (None, 500)               0         
_________________________________________________________________
dense_5 (Dense)              (None, 500)               250500    
_________________________________________________________________
activation_5 (Activation)    (None, 500)               0         
_________________________________________________________________
dense_6 (Dense)              (None, 10)                5010      
_________________________________________________________________
activation_6 (Activation)    (None, 10)                0         
Total params: 648,010
Trainable params: 648,010
Non-trainable params: 0
_________________________________________________________________


In [16]:
#情境題:手上有優良的手寫辨識模型，但今天想建立可以辨識0和1的模型，想沿用前兩層，除了最後一層
all_expect_last =[Dense(500,input_dim=784),Activation('sigmoid'),Dense(500),Activation('sigmoid')]

output_layer = [Dense(10),Activation('softmax')]

model_0_to_1 = Sequential(all_expect_last+output_layer)
model_0_to_1.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_7 (Dense)              (None, 500)               392500    
_________________________________________________________________
activation_7 (Activation)    (None, 500)               0         
_________________________________________________________________
dense_8 (Dense)              (None, 500)               250500    
_________________________________________________________________
activation_8 (Activation)    (None, 500)               0         
_________________________________________________________________
dense_9 (Dense)              (None, 10)                5010      
_________________________________________________________________
activation_9 (Activation)    (None, 10)                0         
Total params: 648,010
Trainable params: 648,010
Non-trainable params: 0
_________________________________________________________________


In [17]:
#讀取以訓練好的神經網路權重
model_0_to_1.load_weights('handwriting_model_weights.h5')

In [18]:
#定義新的output layer，前兩個神經網路和上面模型為同一層
new_output_layer = [Dense(2),Activation('softmax')]

model_0_to_1 = Sequential(all_expect_last+new_output_layer)
model_0_to_1.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_7 (Dense)              (None, 500)               392500    
_________________________________________________________________
activation_7 (Activation)    (None, 500)               0         
_________________________________________________________________
dense_8 (Dense)              (None, 500)               250500    
_________________________________________________________________
activation_8 (Activation)    (None, 500)               0         
_________________________________________________________________
dense_10 (Dense)             (None, 2)                 1002      
_________________________________________________________________
activation_10 (Activation)   (None, 2)                 0         
Total params: 644,002
Trainable params: 644,002
Non-trainable params: 0
_________________________________________________________________


In [19]:
#將沿用而不訓練的神經網路冷凍起來
for layer in all_expect_last:
    layer.trainable = False

In [20]:
model_0_to_1.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_7 (Dense)              (None, 500)               392500    
_________________________________________________________________
activation_7 (Activation)    (None, 500)               0         
_________________________________________________________________
dense_8 (Dense)              (None, 500)               250500    
_________________________________________________________________
activation_8 (Activation)    (None, 500)               0         
_________________________________________________________________
dense_10 (Dense)             (None, 2)                 1002      
_________________________________________________________________
activation_10 (Activation)   (None, 2)                 0         
Total params: 644,002
Trainable params: 1,002
Non-trainable params: 643,000
_________________________________________________________________


In [21]:
#組裝
model_0_to_1.compile(loss='mse',optimizer=SGD(lr=0.1),metrics=['accuracy'])

In [22]:
x_train_01.shape,y_train_01.shape

((12665, 784), (12665, 2))

In [23]:
#訓練神經網路，訓練幾筆資料(batch_size)，和共訓練幾次(epochs)
model_0_to_1.fit(x_train_01, y_train_01, batch_size=100, epochs=5)

Instructions for updating:
Use tf.cast instead.
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x28939cc8c18>

In [24]:
score = model_0_to_1.evaluate(x_test_01,y_test_01)



In [25]:
print('測試資料的loss',score[0])
print('測試資料的正確率',score[1])

測試資料的loss 0.001418503502613081
測試資料的正確率 0.9985815602836879
