# 內容
使用 keras 內建的 Xception model 做 Trnasfer Learning（使用 CIFAR-10 dataset）

# 目的
- 了解如何使用 Transfer Learning
- 了解 Transfer Learning 的優點，並觀察模型收斂速度

In [ ]:
from keras.models import Sequential
from keras.layers import Convolution2D
from keras.layers import MaxPooling2D
from keras.layers import Flatten
from keras.layers import Dense
from keras.layers import Dropout
from keras.preprocessing import image
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
from keras import backend as K
import keras
from keras.layers import Input
 
from keras.datasets import cifar10
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import OneHotEncoder


input_tensor = Input(shape=(32, 32, 3))



# Xception 架構
# 請參考 https://keras.io/zh/applications/
# include top 決定要不要加入 Fully Connected Layer，預設的輸出數量為 1000
# model=keras.applications.xception.Xception(include_top=False, weights='imagenet', input_tensor=input_tensor, pooling=None, classes=10)

# Resnet 50 架構
# 請參考 https://keras.io/zh/applications/
# include top 決定要不要加入 Fully Connected Layer
model=keras.applications.ResNet50(include_top=False, weights='imagenet', input_tensor=input_tensor, pooling=None, classes=10)
model.summary()


# 添加層數

In [ ]:
x = model.output

# 可以在原架構後面再新增幾層
# 若前面沒有引入 Fully connected layers 時，這邊需要加上 Fully connected layer 做分類
x = GlobalAveragePooling2D()(x)
x = Dense(output_dim=128, activation='relu')(x)
x = Dropout(p=0.1)(x)

# 因 CIFAR-10 只有 10 個分類，因此將 Fully connected layer 設為 10 個神經元
predictions = Dense(output_dim=10, activation='softmax')(x)
model = Model(inputs=model.input, outputs=predictions)
print('Model深度：{}'.format(len(model.layers)))


In [ ]:
# 鎖定前 100 層，使其不更新權重
for layer in model.layers[:100]:
    layer.trainable = False

for layer in model.layers[100:]:
    layer.trainable = True

準備 CIFAR-10 dataset

In [ ]:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

print(x_train.shape) #(50000, 32, 32, 3)

# Normalize Data
def normalize(X_train,X_test):
    mean = np.mean(X_train,axis=(0,1,2,3))
    std = np.std(X_train, axis=(0, 1, 2, 3))
    X_train = (X_train-mean)/(std+1e-7)
    X_test = (X_test-mean)/(std+1e-7)
    return X_train, X_test
    
    
# Normalize Training and Test set    
x_train, x_test = normalize(x_train, x_test) 

# OneHot encoding
# Ex: label = 2, 會變為 [0, 0, 1, 0, 0, 0, 0, 0, 0, 0]
one_hot=OneHotEncoder()
y_train=one_hot.fit_transform(y_train).toarray()
y_test=one_hot.transform(y_test).toarray()

## Training

In [7]:
# compile the model
model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
model.fit(x_train,y_train,batch_size=32,epochs=100)

Epoch 1/100
  192/50000 [..............................] - ETA: 1:41:39 - loss: 2.9591 - accuracy: 0.1615

KeyboardInterrupt: 