# 1）将coco2017数据集进行切分，生成minicoco2017数据集

In [11]:
import json
from pycocotools.coco import COCO
import wget
import numpy as np
from random import sample
from pathlib import Path
from joblib import delayed, Parallel

ANNOTATIONS = {"info": {
    "description": "minicoco2017"
}
}

def myImages(images: list, train: int, val: int) -> tuple:
    myImagesTrain = images[:train]
    myImagesVal = images[train:train+val]
    return myImagesTrain, myImagesVal


def cocoJson(images: list) -> dict:
    arrayIds = np.array([k["id"] for k in images])
    annIds = coco.getAnnIds(imgIds=arrayIds, catIds=catIds, iscrowd=None)
    anns = coco.loadAnns(annIds)
    for k in anns:
        k["category_id"] = catIds.index(k["category_id"])+1
    catS = [{'id': int(value), 'name': key}
            for key, value in categories.items()]
    ANNOTATIONS["images"] = images
    ANNOTATIONS["annotations"] = anns
    ANNOTATIONS["categories"] = catS

    return ANNOTATIONS


def createJson(JsonFile: json, train: bool) -> None:
    name = "train"
    if not train:
        name = "val"
    Path("minicoco2017/annotations").mkdir(parents=True, exist_ok=True)
    with open(f"minicoco2017/annotations/{name}2017.json", "w") as outfile:
        json.dump(JsonFile, outfile)


def downloadImagesToTrain(img: dict) -> None:
    link = (img['coco_url'])
    Path("minicoco2017/train2017").mkdir(parents=True, exist_ok=True)
    wget.download(link, f"{'minicoco2017/train2017/' + img['file_name']}")

def downloadImagesToVal(img: dict) -> None:
    link = (img['coco_url'])
    Path("minicoco2017/val2017").mkdir(parents=True, exist_ok=True)
    wget.download(link, f"{'minicoco2017/val2017/' + img['file_name']}")

coco = COCO('./coco2017/annotations/instances_train2017.json')

catNms = ['car', 'airplane', 'person']

catIds = coco.getCatIds(catNms) 

dictCOCO = {k: coco.getCatIds(k)[0] for k in catNms}  
dictCOCOSorted = dict(sorted(dictCOCO.items(), key=lambda x: x[1]))  

IdCategories = list(range(1, len(catNms)+1)) 
categories = dict(zip(list(dictCOCOSorted), IdCategories)) 

# Get the corresponding image ids and images using loadImgs
imgIds = coco.getImgIds(catIds=catIds) 
imgOriginals = coco.loadImgs(imgIds) 

# The images are selected randomly
imgShuffled = sample(imgOriginals, len(imgOriginals))  

# Choose the number of images for the training and validation set. default 30-10
myImagesTrain, myImagesVal = myImages(imgShuffled, 30, 10)  

trainSet = cocoJson(myImagesTrain)
createJson(trainSet, train=True)

valSet = cocoJson(myImagesVal)
createJson(valSet, train=False)

Parallel(
    n_jobs=-1, prefer="threads")([delayed(downloadImagesToTrain)(img) for img in myImagesTrain])

Parallel(
    n_jobs=-1, prefer="threads")([delayed(downloadImagesToVal)(img) for img in myImagesVal])

print("\nfinish.")

loading annotations into memory...
Done (t=19.73s)
creating index...
index created!
100% [............................................................................] 236406 / 236406
finish.


# 2）将minicoco2017数据集转换为mindrecord数据集

In [13]:
"""FasterRcnn dataset"""
from __future__ import division

import os
import numpy as np
from numpy import random

import cv2
import mindspore as ms
import mindspore.dataset as de
from mindspore.mindrecord import FileWriter
from src.image_process import *


def preprocess_fn(image, box, is_training, config):
    """Preprocess function for dataset."""

    def _infer_data(image_bgr, image_shape, gt_box_new, gt_label_new, gt_iscrowd_new_revert):
        image_shape = image_shape[:2]
        input_data = image_bgr, image_shape, gt_box_new, gt_label_new, gt_iscrowd_new_revert

        if config.keep_ratio:
            input_data = rescale_column_test(*input_data, config=config)
        else:
            input_data = resize_column_test(*input_data, config=config)
        input_data = imnormalize_column(*input_data)

        output_data = transpose_column(*input_data)
        return output_data

    def _data_aug(image, box, is_training):
        """Data augmentation function."""
        pad_max_number = config.num_gts
        if pad_max_number < box.shape[0]:
            box = box[:pad_max_number, :]
        image_bgr = image.copy()
        image_bgr[:, :, 0] = image[:, :, 2]
        image_bgr[:, :, 1] = image[:, :, 1]
        image_bgr[:, :, 2] = image[:, :, 0]
        image_shape = image_bgr.shape[:2]
        gt_box = box[:, :4]
        gt_label = box[:, 4]
        gt_iscrowd = box[:, 5]

        gt_box_new = np.pad(gt_box, ((0, pad_max_number - box.shape[0]), (0, 0)), mode="constant", constant_values=0)
        gt_label_new = np.pad(gt_label, ((0, pad_max_number - box.shape[0])), mode="constant", constant_values=-1)
        gt_iscrowd_new = np.pad(gt_iscrowd, ((0, pad_max_number - box.shape[0])), mode="constant", constant_values=1)
        gt_iscrowd_new_revert = (~(gt_iscrowd_new.astype(np.bool))).astype(np.int32)

        if not is_training:
            return _infer_data(image_bgr, image_shape, gt_box_new, gt_label_new, gt_iscrowd_new_revert)

        flip = (np.random.rand() < config.flip_ratio)
        expand = (np.random.rand() < config.expand_ratio)
        input_data = image_bgr, image_shape, gt_box_new, gt_label_new, gt_iscrowd_new_revert

        if expand:
            input_data = expand_column(*input_data)
        if config.keep_ratio:
            input_data = rescale_column(*input_data, config=config)
        else:
            input_data = resize_column(*input_data, config=config)
        input_data = imnormalize_column(*input_data)
        if flip:
            input_data = flip_column(*input_data)

        output_data = transpose_column(*input_data)
        return output_data

    return _data_aug(image, box, is_training)


def create_coco_label(is_training, config):
    """Get image path and annotation from COCO."""
    from pycocotools.coco import COCO

    coco_root = config.coco_root  # coco_root: "./minicoco2017"
    data_type = config.val_data_type
    if is_training:
        data_type = config.train_data_type  # train_data_type: "train2017"

    # Classes need to train or test.
    train_cls = config.coco_classes  # coco_classes: ['person','car', 'airplane']
    train_cls_dict = {}
    for i, cls in enumerate(train_cls):
        train_cls_dict[cls] = i  # train_cls_dict: {'person': 0, 'airplane': 1, 'car': 2}

    anno_json = os.path.join('.', coco_root, config.instance_set.format(data_type))  # anno_json: "../minicoco2017/annotations/train2017.json"
    if hasattr(config, 'train_set') and is_training:
        anno_json = os.path.join(coco_root, config.train_set)
    if hasattr(config, 'val_set') and not is_training:
        anno_json = os.path.join(coco_root, config.val_set)

    # 根据annotations json文件创建COCO类
    coco = COCO(anno_json)
    classs_dict = {}
    cat_ids = coco.loadCats(coco.getCatIds()) # 获取所有的类别信息，loadCats()需要传入 需加载的类别id序列
    for cat in cat_ids:
        classs_dict[cat["id"]] = cat["name"] # classes_dict: {1:'person', 2:'car'...}

    image_ids = coco.getImgIds() # 获取所有 标记所对应的原图id  image_ids: [391895, 522418...]
    # 创建要返回的变量
    image_files = []
    image_anno_dict = {}

    for img_id in image_ids:
        image_info = coco.loadImgs(img_id)
        file_name = image_info[0]["file_name"]
        anno_ids = coco.getAnnIds(imgIds=img_id, iscrowd=None)
        anno = coco.loadAnns(anno_ids)
        image_path = os.path.join(coco_root, data_type, file_name)
        annos = []
        for label in anno:
            bbox = label["bbox"]
            class_name = classs_dict[label["category_id"]]
            if class_name in train_cls:
                x1, x2 = bbox[0], bbox[0] + bbox[2]
                y1, y2 = bbox[1], bbox[1] + bbox[3]
                annos.append([x1, y1, x2, y2] + [train_cls_dict[class_name]] + [int(label["iscrowd"])])

        image_files.append(image_path)
        if annos:
            image_anno_dict[image_path] = np.array(annos)
        else:
            image_anno_dict[image_path] = np.array([0, 0, 0, 0, 0, 1])

    return image_files, image_anno_dict

# 可能用于评估
def parse_json_annos_from_txt(anno_file, config):
    """for user defined annotations text file, parse it to json format data"""
    if not os.path.isfile(anno_file):
        raise RuntimeError("Evaluation annotation file {} is not valid.".format(anno_file))

    annos = {
        "images": [],
        "annotations": [],
        "categories": []
    }

    # set categories field
    for i, cls_name in enumerate(config.coco_classes):
        annos["categories"].append({"id": i, "name": cls_name})

    with open(anno_file, "rb") as f:
        lines = f.readlines()

    img_id = 1
    anno_id = 1
    for line in lines:
        line_str = line.decode("utf-8").strip()
        line_split = str(line_str).split(' ')
        # set image field
        file_name = line_split[0]
        annos["images"].append({"file_name": file_name, "id": img_id})
        # set annotations field
        for anno_info in line_split[1:]:
            anno = anno_info.split(",")
            x = float(anno[0])
            y = float(anno[1])
            w = float(anno[2]) - float(anno[0])
            h = float(anno[3]) - float(anno[1])
            category_id = int(anno[4])
            iscrowd = int(anno[5])
            annos["annotations"].append({"bbox": [x, y, w, h],
                                         "area": w * h,
                                         "category_id": category_id,
                                         "iscrowd": iscrowd,
                                         "image_id": img_id,
                                         "id": anno_id})
            anno_id += 1
        img_id += 1

    return annos


def create_train_data_from_txt(image_dir, anno_path):
    """Filter valid image file, which both in image_dir and anno_path."""

    def anno_parser(annos_str):
        """Parse annotation from string to list."""
        annos = []
        for anno_str in annos_str:
            anno = anno_str.strip().split(",")
            xmin, ymin, xmax, ymax = list(map(float, anno[:4]))
            cls_id = int(anno[4])
            iscrowd = int(anno[5])
            annos.append([xmin, ymin, xmax, ymax, cls_id, iscrowd])
        return annos

    image_files = []
    image_anno_dict = {}
    if not os.path.isdir(image_dir):
        raise RuntimeError("Path given is not valid.")
    if not os.path.isfile(anno_path):
        raise RuntimeError("Annotation file is not valid.")

    with open(anno_path, "rb") as f:
        lines = f.readlines()
    for line in lines:
        line_str = line.decode("utf-8").strip()
        line_split = str(line_str).split(' ')
        file_name = line_split[0]
        image_path = os.path.join(image_dir, file_name)
        if os.path.isfile(image_path):
            image_anno_dict[image_path] = anno_parser(line_split[1:])
            image_files.append(image_path)
    return image_files, image_anno_dict


def data_to_mindrecord_byte_image(config, dataset="coco", is_training=True, prefix="fasterrcnn.mindrecord", file_num=1):
    """Create MindRecord file."""
    mindrecord_dir = config.mindrecord_dir  # mindrecord_dir: "./MindRecord_COCO_TRAIN"
    mindrecord_path = os.path.join(mindrecord_dir, prefix)  # mindrecord_file: "/MindRecord_COCO_TRAIN/FasterRcnn.mindrecord0"
    writer = FileWriter(mindrecord_path, file_num)
    if dataset == "coco":
        image_files, image_anno_dict = create_coco_label(is_training, config=config)
    else:
        image_files, image_anno_dict = create_train_data_from_txt(config.image_dir, config.anno_path)

    fasterrcnn_json = {
        "image": {"type": "bytes"},
        "annotation": {"type": "int32", "shape": [-1, 6]},
    }
    writer.add_schema(fasterrcnn_json, "fasterrcnn_json")

    for image_name in image_files:
        with open(image_name, 'rb') as f:
            img = f.read()
        annos = np.array(image_anno_dict[image_name], dtype=np.int32)
        row = {"image": img, "annotation": annos}
        writer.write_raw_data([row])
    writer.commit()


def create_fasterrcnn_dataset(config, mindrecord_file, batch_size=2, device_num=1, rank_id=0, is_training=True,
                              num_parallel_workers=8, python_multiprocessing=False):
    """Create FasterRcnn dataset with MindDataset."""
    cv2.setNumThreads(0)
    de.config.set_prefetch_size(1)
    ds = de.MindDataset(mindrecord_file, columns_list=["image", "annotation"], num_shards=device_num, shard_id=rank_id,
                        num_parallel_workers=4, shuffle=is_training)
    decode = ms.dataset.vision.Decode()
    ds = ds.map(input_columns=["image"], operations=decode)
    compose_map_func = (lambda image, annotation: preprocess_fn(image, annotation, is_training, config=config))

    if is_training:
        ds = ds.map(input_columns=["image", "annotation"],
                    output_columns=["image", "image_shape", "box", "label", "valid_num"],
                    column_order=["image", "image_shape", "box", "label", "valid_num"],
                    operations=compose_map_func, python_multiprocessing=python_multiprocessing,
                    num_parallel_workers=num_parallel_workers)
        ds = ds.batch(batch_size, drop_remainder=True)
    else:
        ds = ds.map(input_columns=["image", "annotation"],
                    output_columns=["image", "image_shape", "box", "label", "valid_num"],
                    column_order=["image", "image_shape", "box", "label", "valid_num"],
                    operations=compose_map_func,
                    num_parallel_workers=num_parallel_workers)
        ds = ds.batch(batch_size, drop_remainder=True)
    return ds


# 生成anchor
在Faster-RCNN网络的RPN阶段中，需要根据backbone网络抽取的feature map的大小，对每个点生成相应的anchor。所谓anchor，实际上就是一组矩形框，它们的大小、尺寸、位置坐标由base_size、scales、ratios和featmap_size决定。对于一个由ResNet50提取的特征图feature map，生成anchor的大致流程如下：
1) 首先有个base_size，指定生成的基础anchor的大小，生成的基础anchor的长和宽都是base_size，这时候只有一个anchor；

2) 以base_size的大小为基础，按照三种长宽比ratios{2:1, 1:1, 1:2}，生成指定长宽比的基础anchor；

3) 根据指定的缩放比例，对基础anchor进行缩放，本案例中缩放比例一共有1种，缩放比例为8，三种长宽比和一种缩放比例，就得到了3 * 1 = 3个基础anchor；

4) 上面的过程描述了特征上一个cell对应的anchor的生成过程，对于特征图上的每个cell，都要生成3个anchor。在本案例中，提供了一个stride参数，用于将在feature map上生成的anchor尺寸还原为原图中的anchor尺寸。

In [1]:
import numpy as np
class AnchorGenerator():
    def __init__(self, base_size, scales, ratios):
        self.base_size = base_size
        self.scales = np.array(scales)
        self.ratios = np.array(ratios)
        self.base_anchors = self.gen_base_anchors()
    def gen_base_anchors(self):
        # 生成feather map中一个点的anchors
        w = self.base_size
        h = self.base_size
        x_ctr = 0.5 * (w - 1)
        y_ctr = 0.5 * (h - 1)
        h_ratios = np.sqrt(self.ratios)
        w_ratios = 1 / h_ratios
        ws = (w * w_ratios[:, None] * self.scales[None, :]).reshape(-1)
        hs = (h * h_ratios[:, None] * self.scales[None, :]).reshape(-1)
        base_anchors = np.stack([
            x_ctr - 0.5 * (ws - 1), y_ctr - 0.5 * (hs - 1),
            x_ctr + 0.5 * (ws - 1), y_ctr + 0.5 * (hs - 1)
        ], axis=-1).round()
        return base_anchors
    def _meshgrid(self, x, y, row_major=True):
        xx = np.repeat(x.reshape(1, len(x)), len(y), axis=0).reshape(-1)
        yy = np.repeat(y, len(x))
        if row_major:
            return xx, yy
        return yy, xx
    def grid_anchors(self, featmap_size, stride=16):
        # 根据feature map的大小，生成对应的所有anchors
        base_anchors = self.base_anchors
        
        feat_h, feat_w = featmap_size
        shift_x = np.arange(0, feat_w) * stride
        shift_y = np.arange(0, feat_h) * stride
        shift_xx, shift_yy = self._meshgrid(shift_x, shift_y)
        shifts = np.stack([shift_xx, shift_yy, shift_xx, shift_yy], axis=-1)
        shifts = shifts.astype(base_anchors.dtype)
        all_anchors = base_anchors[None, :, :] + shifts[:, None, :]
        all_anchors = all_anchors.reshape(-1, 4)
        
        return all_anchors

In [7]:
base_size = 4
scales = [8]
ratios = [0.5, 1.0, 2.0]
featmap_size = [192, 320]
stride = 4
x = AnchorGenerator(base_size, scales, ratios)
all_anchors = x.grid_anchors(featmap_size, stride)
print(f"For {featmap_size[0]}*{featmap_size[1]} feature map, the number of generated anchors is {len(all_anchors)}.")
print(f"Generated anchors as follow:")
print(all_anchors)

For 192*320 feature map, the number of generated anchors is 184320.
Generated anchors as follow:
[[ -21.   -9.   24.   12.]
 [ -14.  -14.   17.   17.]
 [  -9.  -21.   12.   24.]
 ...
 [1255.  755. 1300.  776.]
 [1262.  750. 1293.  781.]
 [1267.  743. 1288.  788.]]


# ResNet50 backbone

In [11]:
import numpy as np
import mindspore.nn as nn
from mindspore.common.tensor import Tensor
import mindspore.ops as ops
import mindspore as ms

def _conv(in_channels, out_channels, kernel_size=3, stride=1, padding=0, pad_mode='pad'):
    shape = (out_channels, in_channels, kernel_size, kernel_size)
    weights = Tensor(np.full(shape, 0.01).astype(np.float32))
    return nn.Conv2d(in_channels, out_channels,
                    kernel_size=kernel_size, stride=stride, padding=padding,
                    pad_mode=pad_mode, weight_init=weights, has_bias=False)
def _BatchNorm2dInit(out_chls, momentum=0.1, affine=True, use_batch_statistics=True):
    dtype = np.float32
    gamma_init = Tensor(np.array(np.ones(out_chls)).astype(dtype))
    beta_init = Tensor(np.array(np.ones(out_chls) * 0).astype(dtype))
    moving_mean_init = Tensor(np.array(np.ones(out_chls) * 0).astype(dtype))
    moving_var_init = Tensor(np.array(np.ones(out_chls)).astype(dtype))
    return nn.BatchNorm2d(out_chls, momentum=momentum, affine=affine, gamma_init=gamma_init,
                          beta_init=beta_init, moving_mean_init=moving_mean_init,
                          moving_var_init=moving_var_init, use_batch_statistics=use_batch_statistics)
class ResNetFea(nn.Cell):
    def __init__(self, block, layer_nums, in_channels, out_channels, weights_update=False):
        super(ResNetFea, self).__init__()
        bn_training = False  # 训练时是否更新某一层的权重
        self.conv1 = _conv(3, 64, kernel_size=7, stride=2, padding=3, pad_mode='pad')
        self.bn1 = _BatchNorm2dInit(64, affine=bn_training, use_batch_statistics=bn_training)
        self.relu = ops.ReLU()
        self.maxpool = ops.MaxPool(kernel_size=3, strides=2, pad_mode="SAME")
        self.weights_update = weights_update
        
        if not self.weights_update:
            self.conv1.weight.requires_grad = False
        
        self.layer1 = self._make_layer(block, 
                                       layer_nums[0], 
                                       in_channel=in_channels[0], 
                                       out_channel=out_channels[0], 
                                       stride=1, 
                                       training=bn_training, 
                                       weights_update=self.weights_update)
        self.layer2 = self._make_layer(block, 
                                       layer_nums[1], 
                                       in_channel=in_channels[1], 
                                       out_channel=out_channels[1], 
                                       stride=2, 
                                       training=bn_training, 
                                       weights_update=True)
        self.layer3 = self._make_layer(block, 
                                       layer_nums[2], 
                                       in_channel=in_channels[2], 
                                       out_channel=out_channels[2], 
                                       stride=2, 
                                       training=bn_training, 
                                       weights_update=True)
        self.layer4 = self._make_layer(block, 
                                       layer_nums[3], 
                                       in_channel=in_channels[3], 
                                       out_channel=out_channels[3], 
                                       stride=2, 
                                       training=bn_training, 
                                       weights_update=True)
    def _make_layer(self, block, layer_num, in_channel, out_channel, stride, training=False, weights_update=False):
        layers = []
        down_sample = False
        if stride != 1 or in_channel != out_channel:
            down_sample = True
        resblk = block(in_channel, out_channel, stride=stride, down_sample=down_sample, training=training, weights_update=weights_update)
        layers.append(resblk)
        
        for _ in range(1, layer_num):
            resblk = block(out_channel, out_channel, stride=1, training=training, weights_update=weights_update)
            layers.append(resblk)
        
        return nn.SequentialCell(layers)
    def construct(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        c1 = self.maxpool(x)
        
        c2 = self.layer1(c1)
        identity = c2
        if not self.weights_update:
            identity = ops.stop_gradient(c2)
        c3 = self.layer2(identity)
        c4 = self.layer3(c3)
        c5 = self.layer4(c4)

        return identity, c3, c4, c5
class ResidualBlock(nn.Cell):
    expansion = 4
    def __init__(self, in_channels, out_channels,stride=1,down_sample=False,momentum=0.1,training=False, weights_update=False):
        super(ResidualBlock, self).__init__()
        self.affine = weights_update
        
        out_chls = out_channels // self.expansion
        self.conv1 = _conv(in_channels, out_chls, kernel_size=1, stride=1, padding=0)
        self.bn1 = _BatchNorm2dInit(out_chls, momentum=momentum, affine=self.affine, use_batch_statistics=training)
        self.conv2 = _conv(out_chls, out_chls, kernel_size=3, stride=stride, padding=1)
        self.bn2 = _BatchNorm2dInit(out_chls, momentum=momentum, affine=self.affine, use_batch_statistics=training)
        self.conv3 = _conv(out_chls, out_channels, kernel_size=1, stride=1, padding=0)
        self.bn3 = _BatchNorm2dInit(out_channels, momentum=momentum, affine=self.affine, use_batch_statistics=training)
        
        if training:
            self.bn1 = self.bn1.set_train()
            self.bn2 = self.bn2.set_train()
            self.bn3 = self.bn3.set_train()
        
        if not weights_update:
            self.conv1.weight.requires_grad = False
            self.conv2.weight.requires_grad = False
            self.conv3.weight.requires_grad = False
        
        self.relu = ops.ReLU()
        self.downsample = down_sample
        if self.downsample:
            self.conv_down_sample = _conv(in_channels, out_channels, kernel_size=1, stride=stride, padding=0)
            self.bn_down_sample = _BatchNorm2dInit(out_channels, momentum=momentum, affine=self.affine,
                                                   use_batch_statistics=training)
            if training:
                self.bn_down_sample = self.bn_down_sample.set_train()
            if not weights_update:
                self.conv_down_sample.weight.requires_grad = False
        self.add = ops.Add()
    def construct(self, x):
        identity = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        
        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)
        
        out = self.conv3(out)
        out = self.bn3(out)
        
        if self.downsample:
            identity = self.conv_down_sample(identity)
            identity = self.bn_down_sample(identity)
        
        out = self.add(out, identity)
        out = self.relu(out)
        
        return out

In [16]:
resnet = ResNetFea(ResidualBlock, [3, 4, 6, 3],[64, 256, 512, 1024],[256, 512, 1024, 2048],False)
x = Tensor(np.random.rand(1, 3, 224, 224), ms.float32)
x = resnet(x)
for i in range(len(x)):
    print(x[i].shape)

(1, 256, 56, 56)
(1, 512, 28, 28)
(1, 1024, 14, 14)
(1, 2048, 7, 7)


# Faster-RCNN主干网络

In [None]:
import mindspore.nn as nn
import numpy as np
class Faster_Rcnn(nn.Cell):
    def __init__(self):
        self.dtype = np.float32
        self.ms_type = ms.float32
        self.train_batch_size = 2  # 设置训练时的batch大小为2
        self.without_bg_loss = True  # ?
        self.num_classes = 4  # 设置预测类别的个数，算上了背景类别
        self.num_cls_bbox = 3  # 设置类别框的个数，不预测背景的框
        self.anchor_scales = [8]  # anchor尺寸
        self.anchor_ratios = [0.5, 1.0, 2.0]  # anchor长宽比
        self.anchor_strides = [4, 8, 16, 32, 64] # ?
        self.target_means = tuple([0., 0., 0., 0.]) # ?
        self.target_stds = tuple([0.1, 0.1, 0.2, 0.2]) # ?
        
        # 创建anchor生成器
        self.anchor_base_sizes = list(self.anchor_strides)
        self.anchor_generators = []
        for anchor_base in self.anchor_base_sizes:
            self.anchor_generators.append(AnchorGenerator(anchor_base, self.anchor_scales, self.anchor_ratios))
        self.num_anchors = len(self.anchor_ratios) * len(self.anchor_scales)
        featmap_sizes = [[192, 320], [96, 160], [48, 80], [24, 40], [12, 20]]
        assert len(featmap_sizes) == len(self.anchor_generators)
        self.anchor_list = self.get_anchors(featmap_sizes)
        
        # ResNet backbone
        self.backbone = ResNetFea(ResidualBlock, [3,4,6,3], [64,256,512,1024], [256,512,1024,2048])
    def construct(self,):
        pass
    
    def get_anchors(self, featmap_sizes):
        num_levels = len(featmap_sizes)
        multi_level_anchors = ()
        for i in range(num_levels):
            anchors = self.anchor_generators[i].grid_anchors(featmap_sizes[i], self.anchor_strides[i])
            multi_level_anchors += (Tensor(anchors.astype(self.dtype)),)
        
        return multi_level_anchors