In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
import tensorflow as tf

# 讀入MNIST數據

In [3]:
from tensorflow.keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# 資料前處理

In [4]:
x_train.shape

(60000, 28, 28)

In [5]:
x_train = x_train.reshape(60000, 28, 28, 1)/255
x_test = x_test.reshape(10000, 28, 28, 1)/255

from tensorflow.keras.utils import to_categorical

y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

# 打造神經網路模型

In [6]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import SGD

In [7]:
CNN_layers = [Conv2D(10, (5,5), padding='same', input_shape=(28, 28, 1), activation='relu'),
              MaxPooling2D(pool_size=(2,2)),
              Conv2D(20, (5,5), padding='same', activation='relu'),
              MaxPooling2D(pool_size=(2,2)),
              Conv2D(30, (5,5), padding='same', activation='relu'),
              MaxPooling2D(pool_size=(2,2)),
              Flatten()]

In [8]:
FC_layers = [Dense(36, activation='relu'),
             Dense(10, activation='softmax')]

In [9]:
model = Sequential(CNN_layers+FC_layers)

In [10]:
model.compile(loss='mse', optimizer=SGD(lr=0.056), metrics=['accuracy'])

In [11]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 28, 28, 10)        260       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 14, 14, 10)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 14, 14, 20)        5020      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 7, 7, 20)          0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 7, 7, 30)          15030     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 3, 3, 30)          0         
_________________________________________________________________
flatten (Flatten)            (None, 270)               0

In [12]:
model.fit(x_train, y_train, batch_size=256, epochs=10)

Train on 60000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x162bfbd6988>

In [13]:
score_train = model.evaluate(x_train, y_train)
score_test = model.evaluate(x_test, y_test)
print(f"model訓練資料的正確率: {score_train[1]*100}")
print(f"model測試資料的正確率: {score_test[1]*100}")

model訓練資料的正確率: 81.15833401679993
model測試資料的正確率: 81.36000037193298


# 下面開始做遷移學習

# 讀入 Fashion MNIST 數據集

In [14]:
from tensorflow.keras.datasets import fashion_mnist
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
class_names = ["T-shirt/top", "Trouser", "Pullover", "Dress", "Coat", "Sandal", "Shirt", "Sneaker", "Bag", "Ankle boot"]

# 資料前處理

In [15]:
x_train = x_train.reshape(60000, 28, 28, 1)/255
x_test = x_test.reshape(10000, 28, 28, 1)/255

y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

In [16]:
FC_layers2 = [Dense(256, activation='relu'),
              Dense(128, activation='relu'),
              Dense(10, activation='softmax')]

In [17]:
model2 = Sequential(CNN_layers+FC_layers2)

In [18]:
model2.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 28, 28, 10)        260       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 14, 14, 10)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 14, 14, 20)        5020      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 7, 7, 20)          0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 7, 7, 30)          15030     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 3, 3, 30)          0         
_________________________________________________________________
flatten (Flatten)            (None, 270)              

# Frozen:當新資料集的樣本數不夠多，凍結借來的部分，只針對新建立的神經網路層訓練。

In [19]:
for layer in CNN_layers:
    layer.trainable = False

In [20]:
model2.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 28, 28, 10)        260       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 14, 14, 10)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 14, 14, 20)        5020      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 7, 7, 20)          0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 7, 7, 30)          15030     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 3, 3, 30)          0         
_________________________________________________________________
flatten (Flatten)            (None, 270)              

In [21]:
model2.compile(loss='mse', optimizer=SGD(lr=0.056), metrics=['accuracy'])

In [22]:
model2.fit(x_train, y_train, batch_size=256, epochs=10)

Train on 60000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x162c2515c08>

In [23]:
score_train = model2.evaluate(x_train, y_train)
score_test = model2.evaluate(x_test, y_test)
print(f"model2訓練資料的正確率: {score_train[1]*100}")
print(f"model2測試資料的正確率: {score_test[1]*100}")

model2訓練資料的正確率: 73.15999865531921
model2測試資料的正確率: 72.2599983215332


# Fine-tune:新資料集的樣本數夠多，整個模型重新訓練。

In [24]:
for layer in CNN_layers:
    layer.trainable = True

In [25]:
model2.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 28, 28, 10)        260       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 14, 14, 10)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 14, 14, 20)        5020      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 7, 7, 20)          0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 7, 7, 30)          15030     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 3, 3, 30)          0         
_________________________________________________________________
flatten (Flatten)            (None, 270)              

In [26]:
model2.compile(loss='mse', optimizer=SGD(lr=0.056), metrics=['accuracy'])

In [27]:
model2.fit(x_train, y_train, batch_size=256, epochs=10)

Train on 60000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x162c2594d08>

In [28]:
score_train = model2.evaluate(x_train, y_train)
score_test = model2.evaluate(x_test, y_test)
print(f"model2訓練資料的正確率: {score_train[1]*100}")
print(f"model2測試資料的正確率: {score_test[1]*100}")

model2訓練資料的正確率: 78.18833589553833
model2測試資料的正確率: 76.77000164985657
