# RCNN的分类与回归目标

RCNN精确到具体类别的正负样本采样与训练数据的generator

In [1]:
import numpy as np
import tensorflow as tf
sess = tf.Session()

  from ._conv import register_converters as _register_converters


In [12]:
# 之前写的2维tensor的去除padding的操作
def padding_remove_tf(padding_tf):
    pad_tag = padding_tf[..., -1] # 原3维数组的话，这里返回2维数组
    start_pad = tf.where(tf.equal(pad_tag,0))[0][0]
    end_pad = tf.where(tf.equal(pad_tag,0))[-1][0]
    return padding_tf[start_pad:end_pad+1,:-1]

In [3]:
# 之前写的IOU的计算过程
def iou_tf(boxes_a, boxes_b):
    # numpy转tensor
#     boxes_a = tf.constant(boxes_a, dtype=tf.float32)
#     boxes_b = tf.constant(boxes_b, dtype=tf.float32)
    # 扩围
    boxes_a = tf.expand_dims(boxes_a, axis=1)
    boxes_b = tf.expand_dims(boxes_b, axis=0)
    # 两两计算overlap的长与宽，若不相交（负值）则取0
    overlap_h = tf.maximum(0.0, tf.minimum(boxes_a[...,2],boxes_b[...,2])-tf.maximum(boxes_a[...,0],boxes_b[...,0]))
    overlap_w = tf.maximum(0.0, tf.minimum(boxes_a[...,3],boxes_b[...,3])-tf.maximum(boxes_a[...,1],boxes_b[...,1]))
    # 计算交集
    overlap = overlap_h * overlap_w
    # 计算并集
    union = (boxes_a[...,2]-boxes_a[...,0]) * (boxes_a[...,3]-boxes_a[...,1]) + (boxes_b[...,2]-boxes_b[...,0]) * (boxes_b[...,3]-boxes_b[...,1]) - overlap
    # 求交并比
    iou = tf.divide(overlap, union, name='regress_target_iou')
    return iou

In [4]:
# 之前写的回归目标的计算过程
def regress_target_tf(anchors, gt_boxes):
    """
    计算回归目标
    :param anchors: [N,(y1,x1,y2,x2)]
    :param gt_boxes: [N,(y1,x1,y2,x2)]
    :return: [N,(dy, dx, dh, dw)]
    """
    # 高度和宽度
    h = anchors[:, 2] - anchors[:, 0]
    w = anchors[:, 3] - anchors[:, 1]
 
    gt_h = gt_boxes[:, 2] - gt_boxes[:, 0]
    gt_w = gt_boxes[:, 3] - gt_boxes[:, 1]
    # 中心点
    center_y = (anchors[:, 2] + anchors[:, 0]) * 0.5
    center_x = (anchors[:, 3] + anchors[:, 1]) * 0.5
    gt_center_y = (gt_boxes[:, 2] + gt_boxes[:, 0]) * 0.5
    gt_center_x = (gt_boxes[:, 3] + gt_boxes[:, 1]) * 0.5
 
    # 回归目标
    dy = (gt_center_y - center_y) / h
    dx = (gt_center_x - center_x) / w
    dh = tf.log(gt_h / h)
    dw = tf.log(gt_w / w)
 
    target = tf.stack([dy, dx, dh, dw], axis=1)
    target /= tf.constant([0.1, 0.1, 0.2, 0.2])
    # target = tf.where(tf.greater(target, 100.0), 100.0, target)
    return target

In [5]:
# 之前写的random_shuffle()函数，用来随机采样
def shuffle_sample(tensor_list, tensor_size, sample_size):
    sample_indices = tf.random_shuffle(tf.range(tensor_size))[:sample_size]
    return [tf.gather(tensor, sample_indices) for tensor in tensor_list]

In [None]:
# RCNN的分类、回归目标的确定
## 与RPN的目标寻找原理没多大差别，只需要注意这里的proposals与gt_boxes因为之前的回归，已经做了对应。
## 同时，这里的正负样本不是1：1的比例关系，而是正样本+负样本等于确定数量的关系。
def rcnn_targets_graph(gt_boxes, gt_class_ids, proposals, train_rois_per_image, roi_positive_ratio)
    """
    每个图像生成检测网络的分类和回归目标
    IoU>=0.5的为正样本；IoU<0.5的为负样本
    :param gt_boxes: GT 边框坐标 [MAX_GT_BOXs, (y1,x1,y2,x2,tag)] ,tag=0 为padding
    :param gt_class_ids: GT 类别 [MAX_GT_BOXs, 1+1] ;最后一位为tag, tag=0 为padding
    :param proposals: [N,(y1,x1,y2,x2,tag)] ,tag=0 为padding，其他tag=1
    :param train_rois_per_image: 每张图像训练的proposal数量
    :param roi_positive_ratio: proposal正负样本比
    :return:
    """
    # 首先去除gt_boxes、gt_class_ids、proposals的padding
    gt_boxes = padding_remove_tf(gt_boxes)
    gt_class_ids = padding_remove_tf(gt_class_ids)[...,0]
    proposals = padding_remove_tf(proposals)
    
    # 然后两两计算IOU
    iou = iou_tf(gt_boxes, proposals)
    
    # 开始找正样本proposals
    ## 第一类是找到每个gt对应iou值最大的proposals索引indice_1（不管iou的阈值情况）
    gt_iou_argmax = tf.argmax(iou, axis=1)
    positive_gt_1 = gt_boxes # 因为之前RPN已经确定每个GT至少生成有一个对应的proposal，所以GT不用再做筛选
    positive_proposals_idx_1 = gt_iou_argmax
    positive_cls_1 = gt_class_ids
    ## 第一类：确定GT框、GT框对应的cls、正样本proposals框3个指标
    gt_boxes_pos_1 = tf.identity(positive_gt_1)
    gt_cls_pos_1 = tf.identity(positive_cls_1)
    proposals_pos_1 = tf.gather(proposals, positive_proposals_idx_1)
    
    ## 第二类是找到每个proposals与其对应gt的最大iou值，并比较0.5的阈值情况，得到索引indice_2
    proposal_iou_max = tf.reduce_max(iou, axis=0)
    proposal_pos_idx_2 = tf.where(proposal_iou_max > 0.5) # iou值高于0.5正样本
    proposal_iou_argmax = tf.argmax(iou, axis=0)
    gt_pos_idx_2 = tf.gather_nd(proposal_iou_argmax, proposal_pos_idx_2) # 找到第二部分正样本proposals所对应的GT索引
    ## 第二类：确定GT框、GT框对应的cls、正样本proposals框3个指标
    gt_boxes_pos_2 = tf.gather(gt_boxes, gt_pos_idx_2)
    gt_cls_pos_2 = tf.gather(gt_class_ids, gt_pos_idx_2)
    proposals_pos_2 = tf.gather(proposals, proposal_pos_idx_2[...,0])
    
    # 合并目标
    gt_boxes_pos = tf.concat([gt_boxes_pos_1, gt_boxes_pos_2], axis=0)
    gt_cls_pos = tf.concat([gt_cls_pos_1, gt_cls_pos_2], axis=0)
    proposals_pos = tf.concat([proposals_pos_1, proposals_pos_2], axis=0)
    
    # 计算正样本的回归目标（因为只有正样本有边框回归过程）
    deltas_pos = regress_target_tf(proposals_pos, gt_boxes_pos)
    
    # 正样本随机采样
    proposal_pos_num = tf.minimum(tf.shape(proposals_pos)[0], tf.cast(train_rois_per_image*roi_positive_ratio, tf.int32))
    gt_boxes_pos, gt_cls_pos, deltas_pos = shuffle_sample([gt_boxes_pos, gt_cls_pos, deltas_pos], 
                                      tf.shape(proposal_pos)[0], 
                                      proposal_pos_num)
    
    # 开始找负样本proposals
    proposal_neg_idx = tf.where(proposal_iou_max < 0.5) # iou值低于0.5负样本
    ## 负样本索引随机采样
    proposal_neg_num = tf.minimum(train_rois_per_image-proposal_pos_num, tf.cast(train_rois_per_image*(1-roi_positive_ratio), tf.int32))
    proposal_neg_idx = tf.random_shuffle(proposal_neg_idx)[:proposal_neg_num]
    ## 采集负样本目标
    proposals_neg = tf.gather(proposals, proposal_neg_idx)
    deltas_neg = tf.zeros([proposal_neg_num, 4], dtype=float32)
    gt_cls_neg = tf.zeros([proposal_neg_num], dtype=uint8)
    
    # 合并正负样本
    proposals = tf.concat([proposals_pos, proposals_neg], axis=0)
    deltas = tf.concat([deltas_pos, deltas_neg], axis=0)
    class_ids = tf.concat([gt_cls_pos, gt_cls_neg], axis=0)
    
#     # 计算padding
#     class_ids, train_rois = tf_utils.pad_list_to_fixed_size(
#         [tf.expand_dims(class_ids, axis=1), train_rois], train_rois_per_image)  # class_ids分类扩一维
#     # 为后续处理方便负样本tag设置为-1
#     deltas = tf_utils.pad_to_fixed_size_with_negative(deltas, train_rois_per_image, negative_num=negative_num)
    
    return proposals, deltas, class_ids

In [None]:
# # RCNN的分类、回归目标的确定 （第2次写）
# def rcnn_target(proposals, gt_boxes, gt_cls_ids, train_rois_per_image, roi_positive_ratio):
#     """
#     """
#     # 首先对gt_boxes去padding操作
#     gt_boxes = padding_remove_tf(gt_boxes)
#     gt_cls_ids = padding_remove_tf(gt_cls_ids)
#     # 计算proposals与gt_boxes之间的IOU关系
#     iou = iou_tf(gt_boxes, proposals)
#     # 根据iou关系找到所有GT对应的IOU值最高的第1波正样本
#     proposal_iou_argmax = tf.argmax(iou, axis=1) # axis=0按列计算，axis=1按行计算
#     pos_indices_1 = proposal_iou_argmax
#     #======================================================================
#     gt_boxes_1 = gt_boxes
#     gt_cls_ids_1 = gt_cls_ids
#     proposal_pos_1 = tf.gather(proposals, pos_indices_1)
#     # 根据0.5的阈值关系，找到第2波正样本
#     proposal_iou_max = tf.reduce_max(iou, axis=0)
#     pos_indices_2 = tf.where(proposal_iou_max > 0.5)
#     gt_iou_argmax = tf.argmax(iou, axis=0)
#     gt_pos_indices = tf.gather_nd(gt_iou_argmax, pos_indices_2) # 找到第二波正样本对应的GT
#     #======================================================================
#     gt_boxes_2 = tf.gather(gt_boxes, gt_pos_indices)
#     gt_cls_ids_2 = tf.gather(gt_cls_idx, gt_pos_indices)
#     proposal_pos_2 = tf.gather(proposals, pos_indices_2)
#     # 正样本合并汇总
#     gt_boxes_pos = tf.concat([gt_boxes_1, gt_boxes_2], axis=0)
#     gt_cls_ids_pos = tf.concat([gt_cls_ids_1, gt_cls_ids_2], axis=0)
#     proposal_pos = tf.concat([proposal_pos_1, proposal_pos_2], axis=0)
#     # 正样本回归目标的计算
#     deltas_pos = regress_target_tf(proposal_pos, gt_boxes_pos)
#     # 正样本启发式采样
#     pos_num = tf.minimum(train_rois_per_image*roi_positive_ratio, tf.shape(proposal_pos)[0])
#     deltas_pos, gt_cls_ids_pos, proposal_pos = shuffle_sample([gt_boxes_pos, gt_cls_ids_pos, proposal_pos],
#                                                                  tf.shape(proposal_pos)[0],
#                                                                  pos_num)
    
#     # 找到负样本
#     neg_indices = tf.where(proposal_iou_max < 0.5)
#     # 负样本gather
#     proposal_neg = tf.gather(proposals, neg_indices)
#     neg_num = tf.minimum(train_rois_per_image*(1-roi_positive_ratio), tf.shape(proposal_neg)[0])
#     proposal_neg = random_shuffle(proposal_neg, tf.range(proposal_neg))[:neg_num]
#     deltas_neg =tf.zeros([neg_num, 4])
#     gt_cls_ids_neg = tf.zeros([neg_num])
    
#     # 正负样本合并
#     deltas = tf.concat([deltas_pos, deltas_neg], axis=0)
#     cls = tf.concat([gt_cls_ids_pos, gt_cls_ids_neg], axis=0)
#     proposals = tf.concat([proposal_pos, proposal_neg], axis=0)
    
#     return deltas, cls, proposals

In [66]:
#===============================参数设置==========================================
# 函数输出测试:1
proposals = tf.constant(np.array([[78,22,253,304],[269,19,360,296]]), dtype=tf.float32)
gt_boxes = tf.constant(np.array([[50,30,200,280,0],[280,10,370,320,0]]), dtype=tf.float32)
gt_class_ids = tf.constant(np.array([[1,0],[2,0]]), dtype=tf.int32)
train_rois_per_image = 2
roi_positive_ratio = 0.5

In [67]:
# RCNN分类、回归目标函数测试：1
gt_boxes = padding_remove_tf(gt_boxes)
gt_class_ids = padding_remove_tf(gt_class_ids)[...,0]
# proposals = padding_remove_tf(proposals)

# 然后两两计算IOU
iou = iou_tf(gt_boxes, proposals)
print(gt_boxes.eval(session=sess))
print(gt_class_ids.eval(session=sess))
print(iou.eval(session=sess))

[[ 50.  30. 200. 280.]
 [280.  10. 370. 320.]]
[1 2]
[[0.54126    0.        ]
 [0.         0.71606296]]


In [68]:
# RCNN分类、回归目标函数测试：2
## ================================正样本====================================
## 第一类是找到每个gt对应iou值最大的proposals索引indice_1（不管iou的阈值情况）
gt_iou_argmax = tf.argmax(iou, axis=1)
positive_gt_1 = gt_boxes # 因为之前RPN已经确定每个GT至少生成有一个对应的proposal，所以GT不用再做筛选
positive_proposals_idx_1 = gt_iou_argmax
positive_cls_1 = gt_class_ids
## 第一类：确定GT框、GT框对应的cls、正样本proposals框3个指标
gt_boxes_pos_1 = tf.identity(positive_gt_1)
gt_cls_pos_1 = tf.identity(positive_cls_1)
proposals_pos_1 = tf.gather(proposals, positive_proposals_idx_1)

## 第二类是找到每个proposals与其对应gt的最大iou值，并比较0.5的阈值情况，得到索引indice_2
proposal_iou_max = tf.reduce_max(iou, axis=0)
proposal_pos_idx_2 = tf.where(proposal_iou_max > 0.5) # iou值高于0.5正样本
proposal_iou_argmax = tf.argmax(iou, axis=0)
gt_pos_idx_2 = tf.gather_nd(proposal_iou_argmax, proposal_pos_idx_2) # 找到第二部分正样本proposals所对应的GT索引
## 第二类：确定GT框、GT框对应的cls、正样本proposals框3个指标
gt_boxes_pos_2 = tf.gather(gt_boxes, gt_pos_idx_2)
gt_cls_pos_2 = tf.gather(gt_class_ids, gt_pos_idx_2)
proposals_pos_2 = tf.gather(proposals, proposal_pos_idx_2[...,0])

# 合并目标
gt_boxes_pos = tf.concat([gt_boxes_pos_1, gt_boxes_pos_2], axis=0)
gt_cls_pos = tf.concat([gt_cls_pos_1, gt_cls_pos_2], axis=0)
proposals_pos = tf.concat([proposals_pos_1, proposals_pos_2], axis=0)

deltas_pos = regress_target_tf(proposals_pos, gt_boxes_pos)

# 正样本随机采样
proposal_pos_num = tf.minimum(tf.shape(proposals_pos)[0], tf.cast(train_rois_per_image*roi_positive_ratio, tf.int32))
proposals_pos, gt_cls_pos, deltas_pos = shuffle_sample([proposals_pos, gt_cls_pos, deltas_pos], 
                                  tf.shape(proposals_pos)[0], 
                                  proposal_pos_num)
print("2波正样本合并后随机采样对应的回归目标：\n{}".format(deltas_pos.eval(session=sess)))
print("2波正样本合并后随机采样对应的分类目标：\n{}".format(gt_cls_pos.eval(session=sess)))
print("2波正样本合并后随机采样的proposals框：\n{}".format(proposals_pos.eval(session=sess)))

2波正样本合并后随机采样对应的回归目标：
[[ 1.1538461   0.27075812 -0.05524918  0.562774  ]]
2波正样本合并后随机采样对应的分类目标：
[2]
2波正样本合并后随机采样的proposals框：
[[269.  19. 360. 296.]]


In [71]:
# RCNN分类、回归目标函数测试：3
## ================================负样本====================================
## 采集负样本目标
# 开始找负样本proposals
proposal_neg_idx = tf.where(proposal_iou_max < 0.5) # iou值低于0.6负样本
## 负样本索引随机采样
proposal_neg_num = tf.minimum(train_rois_per_image-proposal_pos_num, tf.cast(train_rois_per_image*(1-roi_positive_ratio), tf.int32))
proposal_neg_idx = tf.random_shuffle(proposal_neg_idx)[:proposal_neg_num]
proposals_neg = tf.gather(proposals, proposal_neg_idx[...,0])
deltas_neg = tf.zeros([proposal_neg_num, 4], dtype=tf.float32)
gt_cls_neg = tf.zeros([proposal_neg_num], dtype=tf.int32)

# 合并正负样本
proposals = tf.concat([proposals_pos, proposals_neg], axis=0)
deltas = tf.concat([deltas_pos, deltas_neg], axis=0)
class_ids = tf.concat([gt_cls_pos, gt_cls_neg], axis=0)
#     # 计算padding
#     class_ids, train_rois = tf_utils.pad_list_to_fixed_size(
#         [tf.expand_dims(class_ids, axis=1), train_rois], train_rois_per_image)  # class_ids分类扩一维
#     # 为后续处理方便负样本tag设置为-1
#     deltas = tf_utils.pad_to_fixed_size_with_negative(deltas, train_rois_per_image, negative_num=negative_num)
print("正、负样本合并随机采样对应的回归目标：\n{}".format(deltas.eval(session=sess)))
print("正、负样本合并随机采样对应的分类目标：\n{}".format(class_ids.eval(session=sess)))
print("正、负样本合并随机采样的proposals框：\n{}".format(proposals.eval(session=sess)))

正、负样本合并随机采样对应的回归目标：
[[-2.3142858  -0.28368792 -0.7707533  -0.6022309 ]
 [ 0.          0.          0.          0.        ]]
正、负样本合并随机采样对应的分类目标：
[2 0]
正、负样本合并随机采样的proposals框：
[[269.  19. 360. 296.]]
