In [None]:
import mxnet as mx
from mxnet import gluon,autograd,contrib,image,nd
from mxnet.gluon import data as gdata,loss as gloss,nn
import gluoncv
import matplotlib as mpl
import matplotlib.pyplot as plt
import sys
import time
import numpy as np

## 定义RPN网络


- 生成默认锚框
- 两个1x1卷积预测层
- 对生成的默认锚框进行采样

In [None]:
x = nd.array([[1,2],[3,4]])

In [None]:
y = nd.array([[5,6,8],[7,8,7]])

In [None]:
z = nd.stack(x,y,axis = 0)

In [None]:
width,height,stride =  5,5,2
offset_x = np.arange(0, width * stride, stride)
offset_y = np.arange(0, height * stride, stride)

In [None]:
offset_x, offset_y = np.meshgrid(offset_x, offset_y)

In [None]:
offset_x.ravel()

In [None]:
offsets = np.stack((offset_x.ravel(), offset_y.ravel(),
                            offset_x.ravel(), offset_y.ravel()), axis=1)

In [None]:
offsets.shape

In [None]:
"""RPN网络 anchors"""

import numpy as np
from mxnet import gluon
from mxnet import nd


class RPNAnchorGenerator(gluon.Block):
    """
    @输入参数
    stride:int              
        特征图的每个像素感受野大小，通常为原图和特征图尺寸比例
    base_size:int           
        默认大小
    ratios:int              
        宽高比
    scales:int              
        大小比例
        
        每个锚框为   width = base_size*size/sqrt(ratio)  
                    height = base_size*size*sqrt(ratio)
        
    alloc_size:(int,int)          
        默认的特征图大小(H,W)，以后每次生成直接索引切片
    """

    def __init__(self, stride, base_size, ratios, scales, alloc_size, **kwargs):
        super(RPNAnchorGenerator, self).__init__(**kwargs)
        if not base_size:
            raise ValueError("Invalid base_size: {}".format(base_size))
        # 防止非法输入
        if not isinstance(ratios, (tuple, list)):
            ratios = [ratios]
        if not isinstance(scales, (tuple, list)):
            scales = [scales]

        # 每个像素的锚框数
        self._num_depth = len(ratios) * len(scales)
        # 预生成锚框
        anchors = self._generate_anchors(stride, base_size, ratios, scales, alloc_size)
        self.anchors = self.params.get_constant('anchor_', anchors)

    def _generate_anchors(self, stride, base_size, ratios, scales, alloc_size):
        # 计算中心点坐标
        px, py = (base_size - 1) * 0.5, (base_size - 1) * 0.5
        base_sizes = []
        for r in ratios:
            for s in scales:
                size = base_size * base_size / r
                ws = np.round(np.sqrt(size))
                w = (ws * s - 1) * 0.5
                h = (np.round(ws * r) * s - 1) * 0.5
                base_sizes.append([px - w, py - h, px + w, py + h])
        # 每个像素的锚框
        base_sizes = np.array(base_sizes)

        # 下面进行偏移量的生成
        width, height = alloc_size
        offset_x = np.arange(0, width * stride, stride)
        offset_y = np.arange(0, height * stride, stride)
        offset_x, offset_y = np.meshgrid(offset_x, offset_x)
        # 生成(H*W,4)
        offset = np.stack((offset_x.ravel(), offset_y.ravel(),
                           offset_x.ravel(), offset_y.ravel()), axis=1)

        # 下面广播到每一个anchor中    (1,N,4) + (M,1,4)
        anchors = base_sizes.reshape((1, -1, 4)) + offset.reshape((-1, 1, 4))
        anchors = anchors.reshape((1, 1, width, height, -1)).astype(np.float32)
        return anchors

    # 对原始生成的锚框进行切片操作
    def forward(self, x):
        # 切片索引
        anchors = self.anchors.data()
        a = nd.slice_like(anchors, x * 0, axes=(2, 3))
        return a.reshape((1, -1, 4))

In [None]:
stride = 16
base_size = 16
ratios = [1,2,0.5]
scales = [1,10,20]
alloc_size = (56,56)

In [None]:
%pdb
anchorge = RPNAnchorGenerator(stride,base_size,ratios,scales,alloc_size)

In [None]:
x = nd.random.uniform(shape=(1,3,16,16))

In [None]:
anchorge.anchors.value

In [None]:
x = nd.uniform(shape=(10,5))

In [None]:
nd.max(x,axis = 1).shape

In [None]:
y = nd.max(x,axis = 0, keepdims=True)

In [None]:
mask = nd.broadcast_greater(x+1e-5,y)

In [None]:
mask

In [None]:
"""Region Proposal 标注工具."""
from __future__ import absolute_import

import numpy as np
import mxnet as mx
from mxnet import gluon, nd
from mxnet import autograd


class RPNTargetSampler(gluon.Block):
    """
    @输入参数
    num_sample  : int
        RPN采样的训练样本总数
    pos_iou_thresh  :   float
        IOU 大于 pos_iou_thresh 的锚框将被视为正类
    neg_iou_thresh  :   float
        IOU 小于 neg_iou_thresh 的锚框将被视为负类
    pos_ratio   :   float
        采样输出中正样本比例，最终的正样本数量为 num_sample*pos_ratio
    """

    def __init__(self, num_sample, pos_iou_thresh, neg_iou_thresh, pos_ratio, **kwargs):
        super(RPNTargetSampler, self).__init__(**kwargs)
        self._pos_iou_thresh = pos_iou_thresh
        self._num_sample = num_sample
        self._neg_iou_thresh = neg_iou_thresh
        self._max_pos = int(np.round(pos_ratio * num_sample))
        self._eps = np.spacing(np.float32(1.0))

    """
    @输入参数
    ious : ndarray  
        (N,M) 通过box_iou 生成的交并比

    @:return
    samples :  ndarray
        (N,)  采样的锚框                     1： pos  0:ignore    -1:neg
    matches :   ndarray
        (N,)   匹配的ground truth 索引       [0,M）

    """

    def forward(self, ious):

        matches = nd.argmax(ious, axis=-1)
        # 每个锚框最高得分
        max_iou_pre_anchor = nd.max(ious, axis=-1)
        # 将所有锚框都初始化为0，ignore
        samples = nd.zeros_like(max_iou_pre_anchor)

        # 计算每个ground_truth 的最高iou
        max_all_ious = nd.max(ious, axis=0, keepdims=True)
        # 标记处mask中最高分值的那一行为1
        mask = nd.broadcast_greater(ious + self._eps, max_all_ious)
        mask = nd.sum(mask, axis=-1)
        # 将最高分数的锚框标记为 1 正类
        samples = nd.where(mask, nd.ones_like(samples), samples)

        # 下面标记大于 pos_iou_thresh的样本为正例
        samples = nd.where(max_iou_pre_anchor > self._pos_iou_thresh, nd.ones_like(samples), samples)

        # 标记小于neg_iou_thresh的样本为负类
        tmp = (max_iou_pre_anchor < self._neg_iou_thresh) * (max_iou_pre_anchor > 0)

        samples = nd.where(tmp, nd.ones_like(samples) * -1, samples)
        # 将其转换为 numnpy
        samples = samples.asnumpy()
        # 下面进行采样
        # 首先对正样本进行采样
        num_pos = int((samples > 0).sum())
        if num_pos > self._max_pos:
            discard_indices = np.random.choice(
                np.where((samples > 0))[0], size=(num_pos - self._max_pos), replace=False
            )
            samples[discard_indices] = 0  # 将多余部分设置为忽略
        num_neg = int((samples < 0).sum())
        max_neg = self._num_sample - min(self._max_pos, num_pos)

        if num_neg > max_neg:
            discard_indices = np.random.choice(
                np.where((samples < 0))[0], size=(num_neg - max_neg), replace=False
            )
            samples[discard_indices] = 0

        # 最后将其转化为ndarray
        samples = nd.array(samples, ctx=matches.context)
        return samples, matches

In [None]:
num_sample = 10
pos_iou_thresh = 0.7
neg_iou_thresh = 0.3
pos_ratio = 0.25

In [None]:
sampler = RPNTargetSampler(num_sample,pos_iou_thresh,neg_iou_thresh,pos_ratio)

In [None]:
ious = nd.random.uniform(shape=(50,5))

In [None]:
sam,mat=sampler(ious)

In [None]:
sam

In [None]:
import pdb
pdb.set_trace()