# 导入函数库

In [1]:
import os
import tensorflow as tf
import numpy as np
from tensorflow import keras  # 在TF2.0版本中,已经是自带Keras了,所以不需要额外安装
import tensorflow_model_optimization as tfmot  # 导入TF2.0的模型优化函数库,降低模型优化难度,相当于调用借口解决问题
import zipfile
import tempfile
import tensorflow_datasets as tfds # 这个是之前说过的Tensorflow Datasets

# 如果出现显存不够的错误,把这个代码加上

可以展示下不加这个出现错误的情形

In [2]:
# 加入下面这个代码片段主要是因为TF2.0对RTX20系列显卡支持不是很好,容易爆显存,所以设置成用多少占多少显存,而不是一次性全占了(默认)
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)

# 定义网络结构

## 一些参数设置

In [3]:
layers = tf.keras.layers
models = tf.keras.models
keras_utils = tf.keras.utils

In [4]:
WEIGHTS_PATH_NO_TOP = ('https://github.com/fchollet/deep-learning-models/'
                       'releases/download/v0.1/'
                       'vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5')  # 此处的预训练模型不带全连接层

## VGG16模型改写用于Cifar10分类

In [5]:
# 关于这里把官方VGG16函数拿了出来,原因是这样的,我们知道Keras主要有Sequential model和Functional model两种构建模型方式,
# 而在进行迁移学习的时候，我们经常是加载预训练的模型(设置include_top=False),此时我们很常见的做法是add layer到模型上,
# 这样会形成模型嵌套,这个在使用global_pruning的时候会出错,所以我把VGG16函数拿了出来,手动改了下
def VGG16(include_top=False,
         weights='imagenet',
         input_shape=(32, 32, 3),
         pooling='avg',
         classes=10,
         **kwargs):
    img_input = layers.Input(shape=input_shape)  # 输入节点
    
    # Block 1
    x = layers.Conv2D(64, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block1_conv1')(img_input)
    x = layers.Conv2D(64, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block1_conv2')(x)
    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)

    # Block 2
    x = layers.Conv2D(128, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block2_conv1')(x)
    x = layers.Conv2D(128, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block2_conv2')(x)
    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)

    # Block 3
    x = layers.Conv2D(256, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block3_conv1')(x)
    x = layers.Conv2D(256, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block3_conv2')(x)
    x = layers.Conv2D(256, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block3_conv3')(x)
    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)

    # Block 4
    x = layers.Conv2D(512, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block4_conv1')(x)
    x = layers.Conv2D(512, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block4_conv2')(x)
    x = layers.Conv2D(512, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block4_conv3')(x)
    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)

    # Block 5
    x = layers.Conv2D(512, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block5_conv1')(x)
    x = layers.Conv2D(512, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block5_conv2')(x)
    x = layers.Conv2D(512, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block5_conv3')(x)
    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x)
    
    if pooling == 'avg':
        x = layers.GlobalAveragePooling2D()(x)
        x = layers.Dense(128, activation='relu', name='fc1')(x)
        x = layers.Dense(classes, activation='softmax', name='predictions')(x) # 此处是10分类，而不是ImageNet的1000分类
    
    inputs = img_input  # inputs是输入节点, x是输出节点
    model = models.Model(inputs, x , name='vgg16')  # 生成一个Model, 需要指定输入和输出
    
    weights_path = keras_utils.get_file(
        'vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5',  # 此处加载的是不存在全连接层的预训练模型
        WEIGHTS_PATH_NO_TOP,
        cache_subdir='models',
        file_hash='6d6bbae143d832006294945121d1f1fc')
    model.load_weights(weights_path, by_name=True)  
    # 加载在ImageNet上预训练过的模型，注意by_name参数很有用，把layer和layer name对应上了
    
    return model

## 声明一个VGG16模型实例

In [6]:
model = VGG16()

## 这个函数可以用于查看网络结构和参数量

In [7]:
model.summary()

Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 32, 32, 3)]       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 32, 32, 64)        1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 32, 32, 64)        36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 16, 16, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 16, 16, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 16, 16, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 8, 8, 128)         0     

## 对特征提取层进行冻结,加快模型训练速度

可以展示下不加的情形，更直观的说明为什么需要训练更长时间

In [8]:
length = len(model.layers)  # 查看模型有多少层Layer

In [9]:
# 对模型的特征提取层进行冻结,可以加快模型训练速度
for i in range(length - 2): # 我们看下上面的model.summary()，可以发现最后2层是全连接，所以不冻结
    model.layers[i].trainable = False  # 对该层Layer进行冻结

In [10]:
for i in range(length):
    print(model.layers[i].trainable)

False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
True
True


## 进行模型训练

In [11]:
# 此处使用tfds的方式构建data pipeline
(raw_test, raw_train), metadata = tfds.load(
    'cifar10',
    split=['test', 'train'],
    with_info=True,
    as_supervised=True
)

IMG_SIZE = 32 # All images will be resized to 32X32

def format_example(image, label):
    image = tf.cast(image, tf.float32)
    image = (image/127.5) - 1
    image = tf.image.resize(image, (IMG_SIZE, IMG_SIZE))
    return image, label

train = raw_train.map(format_example)
test = raw_test.map(format_example)

BATCH_SIZE = 32
SHUFFLE_BUFFER_SIZE = 1000

# 可以体验下这里是否加prefetch(tf.data.experimental.AUTOTUNE)和cache()的区别，对训练速度，以及CPU负载有影响
train_batches = train.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE).cache().prefetch(tf.data.experimental.AUTOTUNE)
test_batches = test.batch(BATCH_SIZE)

# compile模型并训练
model.compile(optimizer='adam',
             loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
             metrics=['accuracy'])

model.fit(
    train_batches,
    epochs=10
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f6e9024fba8>

## Baseline的test acc和推断速度

In [12]:
# 此处我们可以看到Baseline的test acc和inference速度为6ms/step
_, baseline_model_accuracy = model.evaluate(test_batches, verbose=1)
print('Baseline test accuracy: ', baseline_model_accuracy)

keras_file = './baseline.h5'
tf.keras.models.save_model(model, keras_file, include_optimizer=False)
print('Saved baseline model to: ', keras_file)

Baseline test accuracy:  0.6237000226974487
Saved baseline model to:  ./baseline.h5


# 对Baseline进行prune

In [13]:
prune_low_magnitude = tfmot.sparsity.keras.prune_low_magnitude

# 计算end_step的值
batch_size = BATCH_SIZE
epochs = 2

num_images = 50000
end_step = np.ceil(num_images / batch_size).astype(np.int32) * epochs

# 设置pruning参数
pruning_params = {
    'pruning_schedule': tfmot.sparsity.keras.PolynomialDecay(initial_sparsity=0.50,
                                                            final_sparsity=0.80,
                                                            begin_step=0,
                                                            end_step=end_step)
}
# 对模型进行全局prune
model_for_pruning = prune_low_magnitude(model, **pruning_params)

# 重新compile剪枝后的模型
model_for_pruning.compile(optimizer='adam',
                         loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                         metrics=['accuracy'])

model_for_pruning.summary()

Instructions for updating:
Please use `layer.add_weight` method instead.


Instructions for updating:
Please use `layer.add_weight` method instead.


Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 32, 32, 3)]       0         
_________________________________________________________________
prune_low_magnitude_block1_c (None, 32, 32, 64)        3522      
_________________________________________________________________
prune_low_magnitude_block1_c (None, 32, 32, 64)        73794     
_________________________________________________________________
prune_low_magnitude_block1_p (None, 16, 16, 64)        1         
_________________________________________________________________
prune_low_magnitude_block2_c (None, 16, 16, 128)       147586    
_________________________________________________________________
prune_low_magnitude_block2_c (None, 16, 16, 128)       295042    
_________________________________________________________________
prune_low_magnitude_block2_p (None, 8, 8, 128)         1     

## 训练Baseline_pruned

In [14]:
logdir = './logs'

callbacks = [
    tfmot.sparsity.keras.UpdatePruningStep(),
    tfmot.sparsity.keras.PruningSummaries(log_dir=logdir)
]

model_for_pruning.fit(train_batches,
                     batch_size=batch_size, epochs=epochs,
                     callbacks=callbacks)

Epoch 1/2
Epoch 2/2


<tensorflow.python.keras.callbacks.History at 0x7f6f20950630>

## Baseline_pruned的test acc和推断速度

In [15]:
# 此处我们可以看到模型的Baseline_pruned的test acc和inferece速度为8ms/step
_, model_for_pruning_accuracy = model_for_pruning.evaluate(test_batches, verbose=1)
print('Pruned test accuracy: ', model_for_pruning_accuracy)

Pruned test accuracy:  0.5507000088691711


In [16]:
model_for_export = tfmot.sparsity.keras.strip_pruning(model_for_pruning)  # 导出模型前要去除pruning wrapper

pruned_keras_file = './baseline_pruned.h5'
tf.keras.models.save_model(model_for_export, pruned_keras_file, include_optimizer=False)
print('Saved pruned keras model to: ', pruned_keras_file)

Saved pruned keras model to:  ./baseline_pruned.h5


# 比较Baseline和Baseline_pruned模型压缩后的体积大小

In [17]:
def get_gzipped_model_size(file):
    # Returns size of gzipped model, in bytes
    _, zipped_file = tempfile.mkstemp('.zip')
    with zipfile.ZipFile(zipped_file, 'w', compression=zipfile.ZIP_DEFLATED) as f:
        f.write(file)
    return os.path.getsize(zipped_file)

In [18]:
# 此处是Baseline和Baseline_pruned模型经过压缩后的体积对比
print("Size of gzipped baseline Keras model: %.2f Mib" % (get_gzipped_model_size(keras_file) / 1000 / 1000))
print("Size of gzipped pruned Keras model: %.2f Mib" % (get_gzipped_model_size(pruned_keras_file) / 1000 / 1000))

Size of gzipped baseline Keras model: 54.98 Mib
Size of gzipped pruned Keras model: 17.31 Mib
