## 6.4 使用迁移学习的思想，以VGG16作为模板搭建模型，训练手写字体识别

In [4]:
# 引入keras里的VGG16模块
from keras.applications.vgg16 import VGG16

# 加载keas模型
from keras.layers import Input, Flatten, Dense, Dropout
from keras.models import Model
from keras.optimizers import SGD

# 加载字体库
from keras.datasets import mnist

# 加载Opencv
import cv2
import h5py as h5py
import numpy as np

In [5]:
# 将VGG16顶层去掉 include_top=False
model_vgg = VGG16(include_top=False, weights='imagenet', input_shape=(224,224,3))
model = Flatten(name='flatten')(model_vgg.output)
model = Dense(10,activation='softmax')(model)
model_vgg_mnist = Model(model_vgg.input, model, name='vgg16')

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [8]:
model_vgg_mnist.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
__________

我们有1496万个权重需要训练，因为我们只迁移了网络结构，没有迁移权重。
<br>迁移权重的好处是不需要重新训练网络权重，只需要训练最上层搭建的部分就行了；坏处是新数据不一定适用于一训练好的权重，数据分布和我们关心的问题可能完全不一样。

建立另外一个模型，把VGG16网络的结构和权重同时迁移
<br>关键点在于把不需要重新训练的权重“冷冻”起来。

In [9]:
ishape=224
model_vgg = VGG16(include_top=False, weights='imagenet', input_shape=(ishape,ishape,3))
for layer in model_vgg.layers:
    layer.trainable = False
model = Flatten()(model_vgg.output)
model = Dense(10, activation='softmax')(model)
model_vgg_mnist_pretrain = Model(model_vgg.input, model, name='vgg16_pretrain')

In [10]:
model_vgg_mnist_pretrain.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
__________

只需训练25万各参数，比之前整整少了60倍

In [11]:
sgd = SGD(lr = 0.05, decay=1e-5)
model_vgg_mnist_pretrain.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

因为VGG16网络对于输入层的要求，用opencv把图像从32x32变成224x224(cv2.resize命令)，把黑白图像转换成RGB图像(cv2.COLOR_GRAY2BGR)，并且把训练数据转换成张量形式，供keras输入。

In [None]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = [cv2.cvtColor(cv2.resize(i,(ishape, ishape)), cv2.COLOR_GRAY2BGR) for i in X_train]
X_train = np.concatenate([arr[np.newaxis] for arr in X_train]).astype('float32')
X_test = [cv2.cvtColor(cv2.resize(i,(ishape, ishape)), cv2.COLOR_GRAY2BGR) for i in X_test]
X_test = np.concatenate([arr[np.newaxis] for arr in X_test]).astype('float32')

训练数据的维度如下，6万个样本，每个是224x224x3的张量

In [None]:
X_train.shape

In [None]:
X_test.shape

In [None]:
X_train = X_train/255
X_test = X_test/255

看一看训练数据是否有数据丢失，查找非零项

In [None]:
np.where(X_train[0]!=0)

把训练数据集和测试数据集的类别属性（0-9）转换成One Hot编码，作为输出层的维度。

In [None]:
def tran_y(y):
    y_ohe = np.zeros(10)
    y_ohe[y] = 1
    return y_ohe

In [None]:
y_train_ohe = np.array([tran_y(y_train[i]) for i in range(len(y_train))])
y_test_ohe = np.array([tran_y(y_train[i]) for i in range(len(y_test))])

再对MNIST数据及进行训练

In [None]:
model_vgg_mnist_pretrain.fit(X_trrain, y_train_ohe, validation_data = (X_test, y_test_ohe), epochs = 200, batch_size = 128)