### 蒸馏通道裁剪模型
本示例介绍使用更高精度的[YOLOv3-ResNet34](../../configs/yolov3_r34.yml)模型蒸馏经通道裁剪的[YOLOv3-MobileNet](../../configs/yolov3_mobilenet_v1.yml)模型。脚本可参照蒸馏脚本[distill.py](../distillation/distill.py)和通道裁剪脚本[prune.py](../prune/prune.py)简单修改得到，蒸馏过程采用细粒度损失来蒸馏YOLOv3输出层特征图。

切换到PaddleDetection根目录，设置环境变量

In [1]:
% cd ../..

/workplace/PaddleDetection


导入依赖包，注意须同时导入蒸馏和通道裁剪的相关接口

In [2]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import numpy as np
from collections import OrderedDict
from paddleslim.dist.single_distiller import merge, l2_loss
from paddleslim.prune import Pruner
from paddleslim.analysis import flops

from paddle import fluid
from ppdet.core.workspace import load_config, merge_config, create
from ppdet.data.reader import create_reader
from ppdet.utils.eval_utils import parse_fetches, eval_results, eval_run
from ppdet.utils.stats import TrainingStats
from ppdet.utils.cli import ArgsParser
from ppdet.utils.check import check_gpu
import ppdet.utils.checkpoint as checkpoint

定义细粒度的蒸馏损失函数

In [3]:
def split_distill(split_output_names, weight):
    """
    Add fine grained distillation losses.
    Each loss is composed by distill_reg_loss, distill_cls_loss and
    distill_obj_loss
    """
    student_var = []
    for name in split_output_names:
        student_var.append(fluid.default_main_program().global_block().var(
            name))
    s_x0, s_y0, s_w0, s_h0, s_obj0, s_cls0 = student_var[0:6]
    s_x1, s_y1, s_w1, s_h1, s_obj1, s_cls1 = student_var[6:12]
    s_x2, s_y2, s_w2, s_h2, s_obj2, s_cls2 = student_var[12:18]
    teacher_var = []
    for name in split_output_names:
        teacher_var.append(fluid.default_main_program().global_block().var(
            'teacher_' + name))
    t_x0, t_y0, t_w0, t_h0, t_obj0, t_cls0 = teacher_var[0:6]
    t_x1, t_y1, t_w1, t_h1, t_obj1, t_cls1 = teacher_var[6:12]
    t_x2, t_y2, t_w2, t_h2, t_obj2, t_cls2 = teacher_var[12:18]

    def obj_weighted_reg(sx, sy, sw, sh, tx, ty, tw, th, tobj):
        loss_x = fluid.layers.sigmoid_cross_entropy_with_logits(
            sx, fluid.layers.sigmoid(tx))
        loss_y = fluid.layers.sigmoid_cross_entropy_with_logits(
            sy, fluid.layers.sigmoid(ty))
        loss_w = fluid.layers.abs(sw - tw)
        loss_h = fluid.layers.abs(sh - th)
        loss = fluid.layers.sum([loss_x, loss_y, loss_w, loss_h])
        weighted_loss = fluid.layers.reduce_mean(loss *
                                                 fluid.layers.sigmoid(tobj))
        return weighted_loss

    def obj_weighted_cls(scls, tcls, tobj):
        loss = fluid.layers.sigmoid_cross_entropy_with_logits(
            scls, fluid.layers.sigmoid(tcls))
        weighted_loss = fluid.layers.reduce_mean(
            fluid.layers.elementwise_mul(
                loss, fluid.layers.sigmoid(tobj), axis=0))
        return weighted_loss

    def obj_loss(sobj, tobj):
        obj_mask = fluid.layers.cast(tobj > 0., dtype="float32")
        obj_mask.stop_gradient = True
        loss = fluid.layers.reduce_mean(
            fluid.layers.sigmoid_cross_entropy_with_logits(sobj, obj_mask))
        return loss

    distill_reg_loss0 = obj_weighted_reg(s_x0, s_y0, s_w0, s_h0, t_x0, t_y0,
                                         t_w0, t_h0, t_obj0)
    distill_reg_loss1 = obj_weighted_reg(s_x1, s_y1, s_w1, s_h1, t_x1, t_y1,
                                         t_w1, t_h1, t_obj1)
    distill_reg_loss2 = obj_weighted_reg(s_x2, s_y2, s_w2, s_h2, t_x2, t_y2,
                                         t_w2, t_h2, t_obj2)
    distill_reg_loss = fluid.layers.sum(
        [distill_reg_loss0, distill_reg_loss1, distill_reg_loss2])

    distill_cls_loss0 = obj_weighted_cls(s_cls0, t_cls0, t_obj0)
    distill_cls_loss1 = obj_weighted_cls(s_cls1, t_cls1, t_obj1)
    distill_cls_loss2 = obj_weighted_cls(s_cls2, t_cls2, t_obj2)
    distill_cls_loss = fluid.layers.sum(
        [distill_cls_loss0, distill_cls_loss1, distill_cls_loss2])

    distill_obj_loss0 = obj_loss(s_obj0, t_obj0)
    distill_obj_loss1 = obj_loss(s_obj1, t_obj1)
    distill_obj_loss2 = obj_loss(s_obj2, t_obj2)
    distill_obj_loss = fluid.layers.sum(
        [distill_obj_loss0, distill_obj_loss1, distill_obj_loss2])
    loss = (distill_reg_loss + distill_cls_loss + distill_obj_loss) * weight
    return loss

读取配置文件，设置use_fined_grained_loss=True

In [4]:
cfg = load_config("./configs/yolov3_mobilenet_v1.yml")
merge_config({'use_fine_grained_loss': True})

{'AnchorGenerator': {},
 'BBoxAssigner': {},
 'BBoxHead': {},
 'BFP': {},
 'BalancedL1Loss': {},
 'BlazeFace': {},
 'BlazeNet': {},
 'BoxCoder': {},
 'CBResNet': {},
 'COCODataSet': {},
 'CascadeBBoxAssigner': {},
 'CascadeBBoxHead': {},
 'CascadeMaskRCNN': {},
 'CascadeRCNN': {},
 'CascadeRCNNClsAware': {},
 'CascadeTwoFCHead': {},
 'CascadeXConvNormHead': {},
 'DarkNet': {},
 'DiouLoss': {},
 'EvalReader': {'batch_size': 8,
  'bufsize': 32,
  'dataset': <ppdet.data.source.coco.COCODataSet at 0x7f61d55a5910>,
  'drop_empty': False,
  'inputs_def': {'fields': ['image', 'im_size', 'im_id'], 'num_max_boxes': 50},
  'sample_transforms': [<ppdet.data.transform.operators.DecodeImage at 0x7f61d55a5d10>,
   <ppdet.data.transform.operators.ResizeImage at 0x7f61d55a5d50>,
   <ppdet.data.transform.operators.NormalizeImage at 0x7f61d55a5d90>,
   <ppdet.data.transform.operators.PadBox at 0x7f61d55a5e10>,
   <ppdet.data.transform.operators.Permute at 0x7f61d55a5e50>],
  'worker_num': 8},
 'FPN': {}

创建执行器

In [5]:
devices_num = fluid.core.get_cuda_device_count()
place = fluid.CUDAPlace(0)
# devices_num = int(os.environ.get('CPU_NUM', 1))
# place = fluid.CPUPlace()
exe = fluid.Executor(place)

构造训练模型和reader

In [6]:
main_arch = cfg.architecture
# build program
model = create(main_arch)
inputs_def = cfg['TrainReader']['inputs_def']
train_feed_vars, train_loader = model.build_inputs(**inputs_def)
train_fetches = model.train(train_feed_vars)
loss = train_fetches['loss']

start_iter = 0
train_reader = create_reader(cfg.TrainReader, (cfg.max_iters - start_iter) * devices_num, cfg)
train_loader.set_sample_list_generator(train_reader, place)

loading annotations into memory...
Done (t=28.98s)
creating index...
index created!


2020-02-05 12:02:16,376-INFO: 118287 samples in file dataset/coco/annotations/instances_train2017.json
2020-02-05 12:02:23,315-INFO: places would be ommited when DataLoader is not iterable


<paddle.fluid.reader.GeneratorLoader at 0x7f61d55ede90>

构造评估模型和reader

In [7]:
eval_prog = fluid.Program()
with fluid.program_guard(eval_prog, fluid.default_startup_program()):
    with fluid.unique_name.guard():
        model = create(main_arch)
        inputs_def = cfg['EvalReader']['inputs_def']
        test_feed_vars, eval_loader = model.build_inputs(**inputs_def)
        fetches = model.eval(test_feed_vars)
eval_prog = eval_prog.clone(True)

eval_reader = create_reader(cfg.EvalReader)
eval_loader.set_sample_list_generator(eval_reader, place)

loading annotations into memory...
Done (t=0.89s)
creating index...
index created!


2020-02-05 12:04:41,156-INFO: 5000 samples in file dataset/coco/annotations/instances_val2017.json
2020-02-05 12:04:41,260-INFO: places would be ommited when DataLoader is not iterable


<paddle.fluid.reader.GeneratorLoader at 0x7f6212664d10>

构造teacher模型并导入权重

In [8]:
teacher_cfg = load_config("./configs/yolov3_r34.yml")
merge_config({'use_fine_grained_loss': True})
teacher_arch = teacher_cfg.architecture
teacher_program = fluid.Program()
teacher_startup_program = fluid.Program()

with fluid.program_guard(teacher_program, teacher_startup_program):
    with fluid.unique_name.guard():
        teacher_feed_vars = OrderedDict()
        for name, var in train_feed_vars.items():
            teacher_feed_vars[name] = teacher_program.global_block(
            )._clone_variable(
                var, force_persistable=False)
        model = create(teacher_arch)
        train_fetches = model.train(teacher_feed_vars)
        teacher_loss = train_fetches['loss']

exe.run(teacher_startup_program)
checkpoint.load_params(exe, teacher_program, "https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r34.tar")
teacher_program = teacher_program.clone(for_test=True)

2020-02-05 12:04:48,698-INFO: Found /root/.cache/paddle/weights/yolov3_r34
2020-02-05 12:04:48,700-INFO: Loading parameters from /root/.cache/paddle/weights/yolov3_r34...


合并program

In [9]:
data_name_map = {
    'target0': 'target0',
    'target1': 'target1',
    'target2': 'target2',
    'image': 'image',
    'gt_bbox': 'gt_bbox',
    'gt_class': 'gt_class',
    'gt_score': 'gt_score'
}
merge(teacher_program, fluid.default_main_program(), data_name_map, place)

构造蒸馏损失和优化器

In [10]:
yolo_output_names = [
    'strided_slice_0.tmp_0', 'strided_slice_1.tmp_0',
    'strided_slice_2.tmp_0', 'strided_slice_3.tmp_0',
    'strided_slice_4.tmp_0', 'transpose_0.tmp_0', 'strided_slice_5.tmp_0',
    'strided_slice_6.tmp_0', 'strided_slice_7.tmp_0',
    'strided_slice_8.tmp_0', 'strided_slice_9.tmp_0', 'transpose_2.tmp_0',
    'strided_slice_10.tmp_0', 'strided_slice_11.tmp_0',
    'strided_slice_12.tmp_0', 'strided_slice_13.tmp_0',
    'strided_slice_14.tmp_0', 'transpose_4.tmp_0'
]
    
distill_loss = split_distill(yolo_output_names, 1000)
loss = distill_loss + loss
lr_builder = create('LearningRate')
optim_builder = create('OptimizerBuilder')
lr = lr_builder()
opt = optim_builder(lr)
opt.minimize(loss)

([inputs {
    parameter: "Grad"
    arguments: "conv1_bn_offset@GRAD"
  }
  inputs {
    parameter: "LearningRate"
    arguments: "learning_rate_warmup"
  }
  inputs {
    parameter: "Param"
    arguments: "conv1_bn_offset"
  }
  inputs {
    parameter: "Velocity"
    arguments: "conv1_bn_offset_velocity_0"
  }
  outputs {
    parameter: "ParamOut"
    arguments: "conv1_bn_offset"
  }
  outputs {
    parameter: "VelocityOut"
    arguments: "conv1_bn_offset_velocity_0"
  }
  type: "momentum"
  attrs {
    name: "op_callstack"
    type: STRINGS
    strings: "  File \"/usr/local/python2.7.15/lib/python2.7/site-packages/paddle/fluid/framework.py\", line 2503, in append_op\n    attrs=kwargs.get(\"attrs\", None))\n"
    strings: "  File \"/usr/local/python2.7.15/lib/python2.7/site-packages/paddle/fluid/optimizer.py\", line 883, in _append_optimize_op\n    stop_gradient=True)\n"
    strings: "  File \"/usr/local/python2.7.15/lib/python2.7/site-packages/paddle/fluid/optimizer.py\", line 440, 

裁剪训练和评估program

In [11]:
exe.run(fluid.default_startup_program())

pruned_params = ["yolo_block.0.0.0.conv.weights",                                                                          
                "yolo_block.0.0.1.conv.weights",                                                                          
                "yolo_block.0.1.0.conv.weights",                                                                          
                "yolo_block.0.1.1.conv.weights",                                                                          
                "yolo_block.0.2.conv.weights",                                                                            
                "yolo_block.0.tip.conv.weights",                                                                          
                "yolo_block.1.0.0.conv.weights",                                                                          
                "yolo_block.1.0.1.conv.weights",                                                                          
                "yolo_block.1.1.0.conv.weights",                                                                          
                "yolo_block.1.1.1.conv.weights",                                                                          
                "yolo_block.1.2.conv.weights",                                                                            
                "yolo_block.1.tip.conv.weights",                                                                          
                "yolo_block.2.0.0.conv.weights",                                                                          
                "yolo_block.2.0.1.conv.weights",                                                                          
                "yolo_block.2.1.0.conv.weights",                                                                          
                "yolo_block.2.1.1.conv.weights",                                                                          
                "yolo_block.2.2.conv.weights",                                                                            
                "yolo_block.2.tip.conv.weights"]
pruned_ratios = [0.5] * 6 + [0.7] * 6 + [0.8] * 6

print("pruned params: {}".format(pruned_params))
print("pruned ratios: {}".format(pruned_ratios))

pruner = Pruner()
distill_prog = pruner.prune(
    fluid.default_main_program(),
    fluid.global_scope(),
    params=pruned_params,
    ratios=pruned_ratios,
    place=place,
    only_graph=False)[0]

base_flops = flops(eval_prog)
eval_prog = pruner.prune(
    eval_prog,
    fluid.global_scope(),
    params=pruned_params,
    ratios=pruned_ratios,
    place=place,
    only_graph=True)[0]
pruned_flops = flops(eval_prog)
print("FLOPs -{}; total FLOPs: {}; pruned FLOPs: {}".format(float(base_flops - pruned_flops)/base_flops, base_flops, pruned_flops))

pruned params: ['yolo_block.0.0.0.conv.weights', 'yolo_block.0.0.1.conv.weights', 'yolo_block.0.1.0.conv.weights', 'yolo_block.0.1.1.conv.weights', 'yolo_block.0.2.conv.weights', 'yolo_block.0.tip.conv.weights', 'yolo_block.1.0.0.conv.weights', 'yolo_block.1.0.1.conv.weights', 'yolo_block.1.1.0.conv.weights', 'yolo_block.1.1.1.conv.weights', 'yolo_block.1.2.conv.weights', 'yolo_block.1.tip.conv.weights', 'yolo_block.2.0.0.conv.weights', 'yolo_block.2.0.1.conv.weights', 'yolo_block.2.1.0.conv.weights', 'yolo_block.2.1.1.conv.weights', 'yolo_block.2.2.conv.weights', 'yolo_block.2.tip.conv.weights']
pruned ratios: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8]
FLOPs -0.675602593026; total FLOPs: 24531648.0; pruned FLOPs: 7958003.0


导入裁剪模型权重

In [14]:
checkpoint.load_params(exe, distill_prog, "https://paddlemodels.bj.bcebos.com/PaddleSlim/prune/yolov3_mobilenet_v1_prune578.tar")

2020-02-05 12:15:10,958-INFO: Decompressing /root/.cache/paddle/weights/yolov3_mobilenet_v1_prune578.tar...
2020-02-05 12:15:11,078-INFO: Loading parameters from /root/.cache/paddle/weights/yolov3_mobilenet_v1_prune578...


编译训练和评估program

In [16]:
build_strategy = fluid.BuildStrategy()
build_strategy.fuse_all_reduce_ops = False
build_strategy.fuse_all_optimizer_ops = False
build_strategy.fuse_elewise_add_act_ops = True
# only enable sync_bn in multi GPU devices
sync_bn = getattr(model.backbone, 'norm_type', None) == 'sync_bn'
build_strategy.sync_batch_norm = sync_bn and devices_num > 1 \
    and cfg.use_gpu

exec_strategy = fluid.ExecutionStrategy()
# iteration number when CompiledProgram tries to drop local execution scopes.
# Set it to be 1 to save memory usages, so that unused variables in
# local execution scopes can be deleted after each iteration.
exec_strategy.num_iteration_per_drop_scope = 1

parallel_main = fluid.CompiledProgram(distill_prog).with_data_parallel(
    loss_name=loss.name,
    build_strategy=build_strategy,
    exec_strategy=exec_strategy)
compiled_eval_prog = fluid.compiler.CompiledProgram(eval_prog)

开始训练

In [None]:
# parse eval fetches
extra_keys = []
if cfg.metric == 'COCO':
    extra_keys = ['im_info', 'im_id', 'im_shape']
if cfg.metric == 'VOC':
    extra_keys = ['gt_bbox', 'gt_class', 'is_difficult']
eval_keys, eval_values, eval_cls = parse_fetches(fetches, eval_prog,
                                                     extra_keys)

# whether output bbox is normalized in model output layer
is_bbox_normalized = False
map_type = cfg.map_type if 'map_type' in cfg else '11point'
best_box_ap_list = [0.0, 0]  #[map, iter]
save_dir = os.path.join(cfg.save_dir, 'yolov3_mobilenet_v1')

train_loader.start()
for step_id in range(start_iter, cfg.max_iters):
    teacher_loss_np, distill_loss_np, loss_np, lr_np = exe.run(
        parallel_main,
        fetch_list=[
            'teacher_' + teacher_loss.name, distill_loss.name, loss.name,
            lr.name
        ])
    if step_id % 20 == 0:
        print(
            "step {} lr {:.6f}, loss {:.6f}, distill_loss {:.6f}, teacher_loss {:.6f}".
            format(step_id, lr_np[0], loss_np[0], distill_loss_np[0],
                    teacher_loss_np[0]))
    if step_id % cfg.snapshot_iter == 0 and step_id != 0 or step_id == cfg.max_iters - 1:
        save_name = str(
            step_id) if step_id != cfg.max_iters - 1 else "model_final"
        checkpoint.save(exe,
                        distill_prog,
                        os.path.join(save_dir, save_name))
        # eval
        results = eval_run(exe, compiled_eval_prog, eval_loader, eval_keys,
                                  eval_values, eval_cls)
        resolution = None
        box_ap_stats = eval_results(results, cfg.metric, cfg.num_classes,
                                        resolution, is_bbox_normalized,
                                        FLAGS.output_eval, map_type,
                                        cfg['EvalReader']['dataset'])

        if box_ap_stats[0] > best_box_ap_list[0]:
            best_box_ap_list[0] = box_ap_stats[0]
            best_box_ap_list[1] = step_id
            checkpoint.save(exe,
                                distill_prog,
                                os.path.join("./", "best_model"))
        print("Best test box ap: {}, in step: {}".format(
                best_box_ap_list[0], best_box_ap_list[1]))
    train_loader.reset()

step 0 lr 0.000001, loss 59.226379, distill_loss 17.233088, teacher_loss 42.062668


我们也提供了一键式启动蒸馏通道裁剪模型训练脚本[distill_pruned_model.py](./distill_pruned_model.py)，可通过如下命令启动训练。
```bash
python slim/extensions/distill_pruned_model.py -c configs/yolov3_mobilenet_v1.yml -t configs/yolov3_r34.yml --teacher_pretrained https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r34.tar --pruned_params="yolo_block.0.0.0.conv.weights,yolo_block.0.0.1.conv.weights,yolo_block.0.1.0.conv.weights,yolo_block.0.1.1.conv.weights,yolo_block.0.2.conv.weights,yolo_block.0.tip.conv.weights,yolo_block.1.0.0.conv.weights,yolo_block.1.0.1.conv.weights,yolo_block.1.1.0.conv.weights,yolo_block.1.1.1.conv.weights,yolo_block.1.2.conv.weights,yolo_block.1.tip.conv.weights,yolo_block.2.0.0.conv.weights,yolo_block.2.0.1.conv.weights,yolo_block.2.1.0.conv.weights,yolo_block.2.1.1.conv.weights,yolo_block.2.2.conv.weights,yolo_block.2.tip.conv.weights" --pruned_ratios="0.5,0.5,0.5,0.5,0.5,0.5,0.7,0.7,0.7,0.7,0.7,0.7,0.8,0.8,0.8,0.8,0.8,0.8" -o use_fine_grained_loss=true pretrain_weights=https://paddlemodels.bj.bcebos.com/PaddleSlim/prune/yolov3_mobilenet_v1_prune578.tar
```