In [1]:
import argparse
import math
import mindspore

from mindspore import context
from mindspore.context import ParallelMode
from mindspore.train import Model
from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor
from mindspore.communication.management import init, get_rank, get_group_size
from mindspore.train.serialization import load_checkpoint, load_param_into_net

from mindface.detection.loss import MultiBoxLoss
from mindface.detection.datasets import create_dataset
from mindface.detection.utils import adjust_learning_rate

from mindface.detection.models import RetinaFace, RetinaFaceWithLossCell, resnet50, mobilenet025
from mindface.detection.runner import read_yaml, TrainingWrapper

In [2]:
#set seed
mindspore.common.seed.set_seed(42)

#set mode
context.set_context(mode=context.PYNATIVE_MODE, device_target='GPU')

In [3]:
# create dataset
config_cfg = 'configs/RetinaFace_mobilenet025.yaml'
cfg = read_yaml(config_cfg) #config_cfg为配置文件的地址
# change the dataset path of yours
data_dir = 'data/WiderFace/train/label.txt'
ds_train = create_dataset(data_dir, variance=[0.1,0.2], match_thresh=0.35, image_size=640, clip=False, batch_size=8,
                        repeat_num=1, shuffle=True, multiprocessing=True, num_worker=4, is_distribute=False)
print('dataset size is : \n', ds_train.get_dataset_size())

dataset size is : 
 1609


In [4]:
#set learning rate schedule
steps_per_epoch = math.ceil(ds_train.get_dataset_size())
lr = adjust_learning_rate(0.01, 0.1, (70,90), steps_per_epoch, 100,
                              warmup_epoch=5, lr_type1='dynamic_lr')

In [5]:
#build model
backbone_mobilenet025 = mobilenet025(1000)
retinaface_mobilenet025  = RetinaFace(phase='train', backbone=backbone_mobilenet025, in_channel=32, out_channel=64)
retinaface_mobilenet025.set_train(True)

RetinaFace<
  (base): MobileNetV1<
    (stage1): SequentialCell<
      (0): SequentialCell<
        (0): Conv2d<input_channels=3, output_channels=8, kernel_size=(3, 3), stride=(2, 2), pad_mode=pad, padding=1, dilation=(1, 1), group=1, has_bias=False, weight_init=normal, bias_init=zeros, format=NCHW>
        (1): BatchNorm2d<num_features=8, eps=1e-05, momentum=0.09999999999999998, gamma=Parameter (name=base.stage1.0.1.gamma, shape=(8,), dtype=Float32, requires_grad=True), beta=Parameter (name=base.stage1.0.1.beta, shape=(8,), dtype=Float32, requires_grad=True), moving_mean=Parameter (name=base.stage1.0.1.moving_mean, shape=(8,), dtype=Float32, requires_grad=False), moving_variance=Parameter (name=base.stage1.0.1.moving_variance, shape=(8,), dtype=Float32, requires_grad=False)>
        (2): LeakyReLU<>
        >
      (1): SequentialCell<
        (0): Conv2d<input_channels=8, output_channels=8, kernel_size=(3, 3), stride=(1, 1), pad_mode=pad, padding=1, dilation=(1, 1), group=8, has_bias

In [6]:
# load checkpoint
pretrain_model_path = 'pretrained/RetinaFace_MobileNet025.ckpt'
param_dict_retinaface = load_checkpoint(pretrain_model_path)
load_param_into_net(retinaface_mobilenet025, param_dict_retinaface)
print(f'Resume Model from [{pretrain_model_path}] Done.')



Resume Model from [pretrained/RetinaFace_MobileNet025.ckpt] Done.


In [7]:
# set loss
multibox_loss = MultiBoxLoss(num_classes = 2, num_boxes = 16800, neg_pre_positive=7)

In [8]:
# set optimazer
opt = mindspore.nn.SGD(params=retinaface_mobilenet025.trainable_params(), learning_rate=lr, momentum=0.9,
                               weight_decay=5e-4, loss_scale=1)

In [9]:
# add loss and optimazer  
net = RetinaFaceWithLossCell(retinaface_mobilenet025, multibox_loss, loc_weight=2.0, class_weight=1.0, landm_weight=1.0)
net = TrainingWrapper(net, opt)

In [None]:
finetune_epochs = 10
model = Model(net)
config_ck = CheckpointConfig(save_checkpoint_steps=1000,
                                 keep_checkpoint_max=3)
ckpoint_cb = ModelCheckpoint(prefix="RetinaFace", directory=cfg['ckpt_path'], config=config_ck)

time_cb = TimeMonitor(data_size=ds_train.get_dataset_size())
callback_list = [LossMonitor(), time_cb, ckpoint_cb]

print("============== Starting Training ==============")
model.train(finetune_epochs, ds_train, callbacks=callback_list, dataset_sink_mode=False)





epoch: 1 step: 1, loss is 3.9023001194000244
epoch: 1 step: 2, loss is 5.1197357177734375
epoch: 1 step: 3, loss is 2.216236114501953
epoch: 1 step: 4, loss is 4.17732048034668
epoch: 1 step: 5, loss is 2.1989660263061523
epoch: 1 step: 6, loss is 2.4191744327545166
epoch: 1 step: 7, loss is 4.706803798675537
epoch: 1 step: 8, loss is 3.6719870567321777
epoch: 1 step: 9, loss is 6.679204940795898
epoch: 1 step: 10, loss is 2.627380132675171
epoch: 1 step: 11, loss is 3.092747688293457
epoch: 1 step: 12, loss is 2.8383612632751465
epoch: 1 step: 13, loss is 2.9648098945617676
epoch: 1 step: 14, loss is 3.2772698402404785
epoch: 1 step: 15, loss is 5.015932083129883
epoch: 1 step: 16, loss is 5.855517387390137
epoch: 1 step: 17, loss is 4.99378776550293
epoch: 1 step: 18, loss is 5.658236503601074
epoch: 1 step: 19, loss is 3.9252257347106934
epoch: 1 step: 20, loss is 3.6157286167144775
epoch: 1 step: 21, loss is 6.473025321960449
epoch: 1 step: 22, loss is 8.208998680114746
epoch: 1 st

epoch: 1 step: 181, loss is 4.362998008728027
epoch: 1 step: 182, loss is 6.627020835876465
epoch: 1 step: 183, loss is 5.752681255340576
epoch: 1 step: 184, loss is 4.137649059295654
epoch: 1 step: 185, loss is 2.3150827884674072
epoch: 1 step: 186, loss is 7.574062824249268
epoch: 1 step: 187, loss is 6.697934627532959
epoch: 1 step: 188, loss is 6.244487762451172
epoch: 1 step: 189, loss is 4.931735038757324
epoch: 1 step: 190, loss is 4.733990669250488
epoch: 1 step: 191, loss is 3.9278128147125244
epoch: 1 step: 192, loss is 3.2420432567596436
epoch: 1 step: 193, loss is 5.682356834411621
epoch: 1 step: 194, loss is 5.704257011413574
epoch: 1 step: 195, loss is 5.793201446533203
epoch: 1 step: 196, loss is 5.410724639892578
epoch: 1 step: 197, loss is 4.2469587326049805
epoch: 1 step: 198, loss is 4.8359174728393555
epoch: 1 step: 199, loss is 3.400698661804199
epoch: 1 step: 200, loss is 5.177846908569336
epoch: 1 step: 201, loss is 5.254858016967773
epoch: 1 step: 202, loss is 5

看起来这个loss也没啥好训练的，直接开始下面验证一下效果吧

In [12]:
network = RetinaFace(phase='predict', backbone=backbone_mobilenet025, in_channel=32, out_channel=64)
backbone_mobilenet025.set_train(False)
net.set_train(False)

TrainingWrapper<
  (network): RetinaFaceWithLossCell<
    (network): RetinaFace<
      (base): MobileNetV1<
        (stage1): SequentialCell<
          (0): SequentialCell<
            (0): Conv2d<input_channels=3, output_channels=8, kernel_size=(3, 3), stride=(2, 2), pad_mode=pad, padding=1, dilation=(1, 1), group=1, has_bias=False, weight_init=normal, bias_init=zeros, format=NCHW>
            (1): BatchNorm2d<num_features=8, eps=1e-05, momentum=0.09999999999999998, gamma=Parameter (name=base.stage1.0.1.gamma, shape=(8,), dtype=Float32, requires_grad=True), beta=Parameter (name=base.stage1.0.1.beta, shape=(8,), dtype=Float32, requires_grad=True), moving_mean=Parameter (name=base.stage1.0.1.moving_mean, shape=(8,), dtype=Float32, requires_grad=False), moving_variance=Parameter (name=base.stage1.0.1.moving_variance, shape=(8,), dtype=Float32, requires_grad=False)>
            (2): LeakyReLU<>
            >
          (1): SequentialCell<
            (0): Conv2d<input_channels=8, output_c

In [19]:
cfg['val_model'] = 'pretrained/RetinaFace_MobileNet025.ckpt'
assert cfg['val_model'] is not None, 'val_model is None.'
param_dict = load_checkpoint(cfg['val_model'])
print('Load trained model done. {}'.format(cfg['val_model']))
network.init_parameters_data()
load_param_into_net(network, param_dict)



Load trained model done. pretrained/RetinaFace_MobileNet025.ckpt


[]

In [20]:
testset_folder = cfg['val_dataset_folder']
testset_label_path = cfg['val_dataset_folder'] + "label.txt"
with open(testset_label_path, 'r') as f:
    _test_dataset = f.readlines()
    test_dataset = []
    for im_path in _test_dataset:
        if im_path.startswith('# '):
            test_dataset.append(im_path[2:-1])

num_images = len(test_dataset)
print(num_images)

3226


In [33]:
from mindface.detection.runner import DetectionEngine, Timer
import argparse
import os
import numpy as np
import cv2

from mindspore import Tensor, context
from mindspore.train.serialization import load_checkpoint, load_param_into_net
from mindspore import ops

from utils import prior_box
from models import RetinaFace, resnet50, mobilenet025
from runner import DetectionEngine, Timer, read_yaml
detection = DetectionEngine(nms_thresh=0.4, conf_thresh=0.02, iou_thresh=0.5, var=[0.1,0.2])

In [34]:
# 初始化计时器，forward_time表示网络推理的时间，misc表示后处理的时间。
timers = {'forward_time': Timer(), 'misc': Timer()}

if cfg['val_origin_size']:
    h_max, w_max = 0, 0
    for img_name in test_dataset:
        image_path = os.path.join(testset_folder, 'images', img_name)
        _img = cv2.imread(image_path, cv2.IMREAD_COLOR)
        if _img.shape[0] > h_max:
            h_max = _img.shape[0]
        if _img.shape[1] > w_max:
            w_max = _img.shape[1]

    h_max = (int(h_max / 32) + 1) * 32
    w_max = (int(w_max / 32) + 1) * 32

    priors = prior_box(image_sizes=(h_max, w_max),
                        min_sizes=[[16, 32], [64, 128], [256, 512]],
                        steps=[8, 16, 32],
                        clip=False)
else:
    target_size = 1600
    max_size = 2160
    priors = prior_box(image_sizes=(max_size, max_size),
                        min_sizes=[[16, 32], [64, 128], [256, 512]],
                        steps=[8, 16, 32],
                        clip=False)

In [36]:
print('Predict box starting')
ave_time = 0
ave_forward_pass_time = 0
ave_misc = 0
for i, img_name in enumerate(test_dataset):
    image_path = os.path.join(testset_folder, 'images', img_name)

    img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR)
    img = np.float32(img_raw)

    # testing scale
    if cfg['val_origin_size']:
        resize = 1
        assert img.shape[0] <= h_max and img.shape[1] <= w_max
        image_t = np.empty((h_max, w_max, 3), dtype=img.dtype)
        image_t[:, :] = (104.0, 117.0, 123.0)
        image_t[0:img.shape[0], 0:img.shape[1]] = img
        img = image_t
    else:
        im_size_min = np.min(img.shape[0:2])
        im_size_max = np.max(img.shape[0:2])
        resize = float(target_size) / float(im_size_min)
        # prevent bigger axis from being more than max_size:
        if np.round(resize * im_size_max) > max_size:
            resize = float(max_size) / float(im_size_max)

        img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR)

        assert img.shape[0] <= max_size and img.shape[1] <= max_size
        image_t = np.empty((max_size, max_size, 3), dtype=img.dtype)
        image_t[:, :] = (104.0, 117.0, 123.0)
        image_t[0:img.shape[0], 0:img.shape[1]] = img
        img = image_t

    scale = np.array([img.shape[1], img.shape[0], img.shape[1], img.shape[0]], dtype=img.dtype)
    img -= (104, 117, 123)
    img = img.transpose(2, 0, 1)
    img = np.expand_dims(img, 0)
    img = Tensor(img)

    timers['forward_time'].start()
    boxes, confs, _ = network(img)
    timers['forward_time'].end()
    timers['misc'].start()
    print(resize)
    detection.eval(boxes, confs, resize, scale, img_name, priors)
    timers['misc'].end()

    ave_time = ave_time + timers['forward_time'].diff + timers['misc'].diff
    ave_forward_pass_time = ave_forward_pass_time + timers['forward_time'].diff
    ave_misc = ave_misc + timers['misc'].diff
    print('im_detect: {:d}/{:d} forward_pass_time: {:.4f}s misc: {:.4f}s sum_time: {:.4f}s'.format(i + 1, num_images,
                                                                                    timers['forward_time'].diff,
                                                                                    timers['misc'].diff,
                                                                                    timers['forward_time'].diff + timers['misc'].diff))

Predict box starting
2.109375


TypeError: 'float' object is not iterable