In [2]:
# training
import random
import pprint
import time
import os
import gc
import cv2
import numpy as np
import pickle
import tensorflow as tf
from keras import backend as K
from keras.optimizers import Adam
from keras.layers import Input
from keras.models import Model
import config
import data_generators
import losses as losses_fn
import roi_helpers as roi_helpers
from keras.utils import generic_utils
import resnet as nn
from simple_parser import get_data
from props_pic_2nd import props_pic
from Visual import _create_unique_color_float, _create_unique_color_uchar, draw_boxes_and_label_on_image
from anchor_2nd import anchors_generation, sliding_anchors_all, pos_neg_iou, anchor_targets_bbox
from net_design_2nd import stage_2_net_res
import matplotlib as mpl
mpl.use('agg')
np.set_printoptions(threshold=np.inf) # 允许numpy数组的完全打印
np.seterr(divide='ignore', invalid='ignore') # 不允许“divide”Warning相关信息的打印

def data_gen_stage_2(result, img_data, sess, X, class_mapping, classes_count, iter_num, num_logistic):
    """
    根据1阶段的一个batch（1张）图片处理结果，生成第2阶段的训练数据
    :param result: 
    :param img_data: 
    :param sess: 
    :param X: 
    :param class_mapping: 
    :return: 
    """
    # feature map上的proposals坐标映射回resize原图(16倍下采样)
    result[:, :] = 16 * result[:, :]
    # 提取第1阶段每个batch图片对应的Tag标签标注
    x1_tag = img_data['outer_boxes'][0]['x1']
    y1_tag = img_data['outer_boxes'][0]['y1']
    x2_tag = img_data['outer_boxes'][0]['x2']
    y2_tag = img_data['outer_boxes'][0]['y2']
    cls_tag = img_data['outer_boxes'][0]['class']
    # 提取第1阶段每个batch图片对应的数字标签标注
    annos_list = [[], [], [], [], []]
    for i in range(len(img_data['bboxes'])):
        # 对无法辨别的小数字标注（高、宽均低于原图的10个像素点）做剔除，防止影响最终检测效果
        if img_data['bboxes'][i]['x2'] - img_data['bboxes'][i]['x1'] >= 10 and img_data['bboxes'][i]['y2'] - \
                img_data['bboxes'][i]['y1'] >= 10:
            annos_list[0].append(img_data['bboxes'][i]['x1'])
            annos_list[1].append(img_data['bboxes'][i]['y1'])
            annos_list[2].append(img_data['bboxes'][i]['x2'])
            annos_list[3].append(img_data['bboxes'][i]['y2'])
            annos_list[4].append(img_data['bboxes'][i]['class'])
    annos_np = np.concatenate((np.array(annos_list[0])[np.newaxis, :], np.array(annos_list[1])[np.newaxis, :],
                               np.array(annos_list[2])[np.newaxis, :], np.array(annos_list[3])[np.newaxis, :],
                               np.array(annos_list[4])[np.newaxis, :]), axis=0).T
    # 进行第2阶段所需参数的计算提取过程
    rs_pic, rs_boxes, rs_num_gt_pic, rs_wh, gt_index = props_pic(sess, result[np.newaxis, :, :],
                                                                 [[x1_tag, y1_tag, x2_tag, y2_tag, cls_tag]],
                                                                 annos_np[np.newaxis, :, :], X[np.newaxis, :, :, :])
    # ==============================================================================
    # 生成第二阶段的训练数据
    # ==============================================================================
    batch_size = len(rs_pic[0])  # 一次5张crops图
    base_anchors = anchors_generation(16, [0.5 ** (1.0 / 3.0), 1, 2 ** (1.0 / 3.0)],
                                      [0.5, 0.5 ** (1.0 / 2.0), 1, 2 ** (1.0 / 3.0), 2 ** (1.0 / 2.0), 2])
    all_anchors = sliding_anchors_all((10, 20), (8, 8), base_anchors)
    #================================================================
    '''
    # 测试部分：计算当前anchor与当前gt-box的iou，以及框住gt的proposals生成的anchors是否覆盖了所有的小gt
    from overlap_2nd import overlap
    if gt_index != [[]]:
        for gt in rs_num_gt_pic[0][gt_index[0][0]]:
            num_width.append(gt[2]-gt[0])
            num_height.append(gt[3]-gt[1])
    
        # 打印Top-5的最高IOU值
        for anchor in all_anchors:
            for gt in  rs_num_gt_pic[0][gt_index[0][0]]:
                if overlap(anchor[np.newaxis, :], gt[np.newaxis, :])[0][0] > max:
                    max = overlap(anchor[np.newaxis, :], gt[np.newaxis, :])[0][0]
        print("\nTop-{}最高IOU值：{}\n".format(gt_index[0][0]+1,max))
    '''

    # print("\n宽均值：{}、高均值：{}、宽_max：{}、高_max：{}、宽_min：{}、高_min：{}、高宽比_max：{}、高宽比_min：{}".format(mean_width,mean_height,max_width,max_height,min_width,min_height,max_ratio,min_ratio))
    # print(np.array(rs_num_gt_pic[0]))
    # ================================================================
    labels_batch, regression_batch, boxes_batch, inds, pos_inds = anchor_targets_bbox(all_anchors, rs_pic[0],
                                                                                      rs_num_gt_pic[0],
                                                                                      len(class_mapping) - 1)

    # 测试部分：输出第2阶段小图片的正样本anchors情况
    for i, num_gt in enumerate(rs_num_gt_pic[0]):
        if i in gt_index[0]:
            for num in rs_num_gt_pic[0][i][:,4]:
                num_logistic[int(num)] += 1
            draw_imgs = draw_boxes_and_label_on_image(rs_pic[0][i],
                                                      {1: all_anchors[pos_inds[i]]}) # , {1: num_gt}
            cv2.imwrite('./anchors_test/Pic{}_Prop{}.png'.format(iter_num, i),
                        draw_imgs)

    #============================================================
    #============================================================
    x1 = rs_pic[0]  # tf.tensor转换为numpy
    # Y1 = [labels_batch, regression_batch]

    # 区分训练过程中计算loss的anchors样本，并提取非背景类的anchors索引
    # ===========rpn_accuracy_rpn_monitor.append(len(inds[0]))
    # ===========rpn_accuracy_for_epoch.append(len(inds[0]))

    # 训练分类网络
    # y1目标数据
    labels_batch[inds, -1] = np.abs(labels_batch[inds, -1] - 1)  # 0、1标注减1再取绝对值，相当于把labels_batch第3维最后1列由前景背景类替换为bg类
    # labels_batch[:, :, -1] = np.abs(labels_batch[:, :, -1]) # 然后所有最后1列取绝对值把中性样本也替换为bg类
    # 正、负样本与中性样本的区分numpy
    tmp = np.zeros((batch_size, len(inds[0]), 1))
    for batch in range(len(inds)):
        for i in range(len(inds[0])):
            a = np.zeros(1)
            # print(labels_batch[:, inds[0], -1])
            if labels_batch[:, :, -1][batch][i] != -1:  # 说明是非忽略样本
                a = 1
            tmp[batch][i] = a
    y1 = np.concatenate([tmp, labels_batch], axis=2)
    # y2目标数据
    tmp = np.zeros((batch_size, len(inds[0]), 4 * (len(classes_count) - 1)))
    for batch in range(len(inds)):
        for i in range(len(inds[0])):
            a = np.zeros(4 * (len(classes_count) - 1))
            # print(labels_batch[:, inds[0], -1])
            if labels_batch[:, :, -1][batch][i] == 0:  # 说明是正样本
                # 取出正样本赋值标签为1的类别索引
                label_index = list(labels_batch[:, :, :(len(classes_count) - 1)][batch][i]).index(1)
                # 然后把对应的正样本回归目标进行对应类别下的赋值
                a[4 * label_index: 4 * label_index + 4] = regression_batch[:, :, :4][batch][i]
            tmp[batch][i] = a

    # 合并为list
    y2 = np.concatenate([np.repeat(labels_batch[:, :, :(len(classes_count) - 1)], 4, axis=2), tmp], axis=2)
    del x1_tag,y1_tag,x2_tag,y2_tag,cls_tag,annos_list,annos_np,rs_pic,rs_boxes,rs_num_gt_pic,rs_wh,gt_index,labels_batch,regression_batch,boxes_batch,inds,pos_inds,tmp
    gc.collect()
    return np.array(x1), [y1,y2], num_logistic


def train():
    cfg = config.Config()

    # 下面3行设置了数据增强时所需相关参数
    cfg.use_horizontal_flips = True
    cfg.use_vertical_flips = True
    cfg.rot_90 = True

    cfg.base_net_weights = os.path.join('./model/', nn.get_weight_path())
    # cfg.model_path = './model/kitti_frcnn_last.hdf5'
    cfg.simple_label_file = 'img_infos.txt'

    # 读取VOC图片数据
    print('======== 读取图片信息 ========')
    all_images, classes_count, class_mapping = get_data(cfg.simple_label_file)
    # 增加背景类（若没有）
    if 'bg' not in classes_count:
        classes_count['bg'] = 0
        class_mapping['bg'] = len(class_mapping)

    cfg.class_mapping = class_mapping

    print('class_count:')
    pprint.pprint(classes_count)
    print('类别数量：{}'.format(len(classes_count)))

    # # 打乱图片顺序
    # random.shuffle(all_images)

    # 分配训练集和测试
    train_imgs = [s for s in all_images if s['imageset'] == 'trainval']
    val_imgs = [s for s in all_images if s['imageset'] == 'test']

    print('训练集数量：{}'.format(len(train_imgs)))
    print('测试集数量：{}'.format(len(val_imgs)))

    # 保存配置文件
    print('\n======== 保存配置 ========')
    with open(cfg.config_save_file, 'wb') as config_f:
        pickle.dump(cfg, config_f)
        print('配置参数已写入：{},'.format(cfg.config_save_file))

    # 返回数据生成器
    data_gen_train = data_generators.get_anchor_gt(train_imgs, cfg, nn.get_img_output_length,
                                                   K.image_dim_ordering(), mode='train')
    # data_gen_val = data_generators.get_anchor_gt(val_imgs, cfg, nn.get_img_output_length,
    #                                              K.image_dim_ordering(), mode='val')

    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)
    else:
        input_shape_img = (None, None, 3)

    img_input = Input(shape=input_shape_img)
    small_img_input = Input(shape=(160, 80, 3)) # 高为80，宽为40

    # 定义基础网络
    shared_layers = nn.nn_base(img_input, trainable=True)

    # 定义rpn网络
    num_anchors = len(cfg.anchor_box_scales) * len(cfg.anchor_box_ratios)  # 9
    rpn = nn.rpn(shared_layers, num_anchors)

    # 定义后续分类网络的输出
    classifier = stage_2_net_res(len(classes_count), small_img_input, height=160, width=80)

    model_rpn = Model(img_input, rpn[:2])
    model_classifier = Model(small_img_input, classifier)

    # model_all = Model([img_input, small_img_input], rpn[:2] + classifier)

    # 加载预训练模型参数
    print('\n======== 加载预训练模型参数 ========')
    try:
        print('loading weights from {}'.format(cfg.base_net_weights))
        model_rpn.load_weights(cfg.base_rpn_model_path, by_name=True)
        model_classifier.load_weights(cfg.base_tf_model_path, by_name=True)
    except Exception as e:
        print(e)
        print('Could not load pretrained model weights. Weights can be found in the keras application folder '
              'https://github.com/fchollet/keras/tree/master/keras/applications')

    # 编译模型
    optimizer = Adam(lr=1e-5)
    optimizer_classifier = Adam(lr=1e-3)
    model_rpn.compile(optimizer=optimizer,
                      loss=[losses_fn.rpn_loss_cls(num_anchors), losses_fn.rpn_loss_regr(num_anchors)],
                      metrics=['accuracy'])
    model_classifier.compile(optimizer=optimizer_classifier,
                             loss=[losses_fn.class_loss_cls, losses_fn.class_loss_regr(len(classes_count) - 1)],
                             metrics=['accuracy'])
    # model_all.compile(optimizer='sgd', loss='mae')

    # 设置一些训练参数
    epoch_length = 910
    num_epochs = 200
    losses = np.zeros((epoch_length, 5))
    rpn_accuracy_rpn_monitor = []
    rpn_accuracy_for_epoch = []
    start_time = time.time()
    best_loss = np.Inf
    iter_num = 0
    # 解决tensorflow初始化参数内存占满的问题
    # config_tf = tf.ConfigProto()
    # config_tf.gpu_options.allow_growth = True
    sess = tf.Session() # config=config_tf
    num_logistic = [0,0,0,0,0,0,0,0,0,0]

    print('\n======== 开始训练 ========')
    for epoch_num in range(num_epochs):
        progbar = generic_utils.Progbar(epoch_length)
        print('Epoch {}/{}'.format(epoch_num + 1, num_epochs))
        n = 1
        while True:
            #try:
                # 当完成一轮epoch时，计算epoch_length个rpn_accuracy的均值，输出相关信息，如果均值为0，则提示出错
                if len(rpn_accuracy_rpn_monitor) == epoch_length and cfg.verbose:
                    mean_overlapping_bboxes = float(sum(rpn_accuracy_rpn_monitor)) / len(rpn_accuracy_rpn_monitor)
                    rpn_accuracy_rpn_monitor = []
                    print(
                        'Average number of overlapping bounding boxes from RPN = {} for {} previous iterations'.format(
                            mean_overlapping_bboxes, epoch_length))
                    if mean_overlapping_bboxes == 0:
                        print('RPN is not producing bounding boxes that overlap'
                              ' the ground truth boxes. Check RPN settings or keep training.')

                # X：resize后的图片  Y：标定好的anchor和回归系数  img_data：原始图片的信息
                X, Y, img_data, X_2 = next(data_gen_train)
                # 训练1阶段的rpn
                loss_rpn = model_rpn.train_on_batch(X, Y)
                # 预测每个anchor的分数和回归系数, P_rpn[0]维度为(1,m,n,9), P_rpn[1]维度为(1,m,n,36)
                P_rpn = model_rpn.predict_on_batch(X)
                # 在feature map上生成按预测得分降序排列的proposals（即rois）
                result = roi_helpers.rpn_to_roi(P_rpn[0], P_rpn[1], cfg, K.image_dim_ordering(), use_regr=True,
                                                overlap_thresh=0.7,
                                                max_boxes=5)
                # 训练2阶段的classifier
                x, y, num_logistic = data_gen_stage_2(result, img_data, sess, X_2, class_mapping, classes_count, iter_num, num_logistic)
                loss_class = model_classifier.train_on_batch(x, y)

                # 统计loss
                losses[iter_num, 0] = loss_rpn[1]
                losses[iter_num, 1] = loss_rpn[2]
                losses[iter_num, 2] = loss_class[1]
                losses[iter_num, 3] = loss_class[2]

                # 更新进度条   #========== ('rpn_cls', np.mean(losses[:iter_num, 0])), ('rpn_regr', np.mean(losses[:iter_num, 1])),
                iter_num += 1
                progbar.update(iter_num,
                               [('rpn_cls', np.mean(losses[:iter_num, 0])), ('rpn_regr', np.mean(losses[:iter_num, 1])),
                                ('detector_cls', np.mean(losses[:iter_num, 2])),('detector_regr', np.mean(losses[:iter_num, 3]))])
                n += 1
                # 如果一个epoch结束，输出各个部分的平均误差
                if iter_num == epoch_length:
                    loss_rpn_cls = np.mean(losses[:, 0])
                    loss_rpn_regr = np.mean(losses[:, 1])
                    loss_class_cls = np.mean(losses[:, 2])
                    loss_class_regr = np.mean(losses[:, 3])

                    #=======mean_overlapping_bboxes = float(sum(rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch)
                    #=======rpn_accuracy_for_epoch = []

                    # 输出提示信息
                    if cfg.verbose:
                        #==========print('Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'.format(
                               #===========mean_overlapping_bboxes))
                        print('Loss RPN classifier: {}'.format(loss_rpn_cls))
                        print('Loss RPN regression: {}'.format(loss_rpn_regr))
                        print('Loss Detector classifier: {}'.format(loss_class_cls))
                        print('Loss Detector regression: {}'.format(loss_class_regr))
                        print('Elapsed time: {}'.format(time.time() - start_time))

                    # 当前整个epoch的总和损失  #=============loss_rpn_cls + loss_rpn_regr +
                    curr_loss = loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr
                    iter_num = 0
                    start_time = time.time()
                    '''
                    # 1个epoch所有图片proposals区域，正样本anchors的高、宽趋势可视化
                    x = range(len(num_width))
                    y1 = num_width
                    y2 = num_height
                    plt.plot(x, y1, marker='o', color='r')
                    plt.plot(x, y2, marker='*', color='b')
                    # plt.show()
                    # plt.savefig('plot.png',format='png')
                    '''
                    # 如果当前损失最小，则保存当前的参数
                    if curr_loss < best_loss:
                        if cfg.verbose:
                            print('Total loss decreased from {} to {}, saving weights'.format(best_loss, curr_loss))
                        best_loss = curr_loss
                        model_classifier.save_weights(cfg.model_path)

                    break
    print(num_logistic)
'''
            except Exception as e:
                print('Exception: {}'.format(e))
                # save model
                model_all.save_weights(cfg.model_path)
                continue
'''

if __name__ == '__main__':
    train()
    print('Training complete, exiting.')


class_count:
{'0': 24,
 '1': 10,
 '2': 21,
 '3': 102,
 '4': 20,
 '5': 20,
 '6': 30,
 '7': 35,
 '8': 76,
 '9': 36,
 'bg': 0}
类别数量：11
训练集数量：120
测试集数量：0

配置参数已写入：config.pickle,

loading weights from ./model/resnet50_weights_tf_dim_ordering_tf_kernels.h5

Epoch 1/10
Loss RPN classifier: 0.07375852511487821
Loss RPN regression: 0.03092568395562315
Loss Detector classifier: 0.5778397411108017
Loss Detector regression: 0.07608750903358062
Elapsed time: 148.6282205581665
Total loss decreased from inf to 0.7586114592148837, saving weights
Epoch 2/10
Loss RPN classifier: 0.03794629643358928
Loss RPN regression: 0.02462094008794035
Loss Detector classifier: 0.5122654919823011
Loss Detector regression: 0.10171730006113648
Elapsed time: 141.74072194099426
Total loss decreased from 0.7586114592148837 to 0.6765500285649672, saving weights
Epoch 3/10
Loss RPN classifier: 0.03764123749128161
Loss RPN regression: 0.02011318124399016
Loss Detector classifier: 0.49059534122546516
Loss Detector regression:

In [8]:
# prediction
import cv2
import numpy as np
import pickle
from keras import backend as K
from keras.layers import Input
from keras.models import Model
import roi_helpers
import argparse
import os
import resnet as nn
from visualize import draw_boxes_and_label_on_image
from net_design_2nd import stage_2_net_vgg
from anchor_2nd import anchors_generation, sliding_anchors_all
from PIL import Image
from keras.preprocessing.image import img_to_array


def format_img_size(img, cfg):
    """ 缩放图片尺寸，短边为600 """
    img_min_side = float(cfg.im_size)
    (height, width, _) = img.shape

    if width <= height:
        ratio = img_min_side / width
        new_height = int(ratio * height)
        new_width = int(img_min_side)
    else:
        ratio = img_min_side / height
        new_width = int(ratio * width)
        new_height = int(img_min_side)
    img = cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_CUBIC)
    return img, ratio


def format_img_channels(img, cfg):
    """ 每个channel减去像素均值，将channel放在第一维，然后在前面增加一个维度 """
    # img = img[:, :, (2, 1, 0)]
    img = img.astype(np.float32)
    
    img[:, :, 0] -= cfg.img_channel_mean[0]
    img[:, :, 1] -= cfg.img_channel_mean[1]
    img[:, :, 2] -= cfg.img_channel_mean[2]
    
    img /= cfg.img_scaling_factor
    # img = np.transpose(img, (2, 0, 1))
    img = np.expand_dims(img, axis=0)
    return img


def format_img(img, C):
    """ formats an image for model prediction based on config """
    ratio = 1
    img = format_img_channels(img, C)
    return img, ratio


def get_real_coordinates(ratio, x1, y1, x2, y2):
    """
    将坐标值从resize后的图片映射到原始图片
    """
    real_x1 = int(round(x1 // ratio))
    real_y1 = int(round(y1 // ratio))
    real_x2 = int(round(x2 // ratio))
    real_y2 = int(round(y2 // ratio))

    return real_x1, real_y1, real_x2, real_y2


def predict_single_image(img_path, model_rpn, model_classifier, cfg, class_mapping):
    """
    预测单张图片
    :param img_path: 图片路径
    :param model_rpn: rpn模型
    :param model_classifier: 目标检测模型
    :param cfg: 配置参数
    :param class_mapping: 类别映射
    :return:
    """
    img = cv2.imread(img_path)  # 读取图片

    if img is None:
        print('reading image failed.')
        exit(0)

    # print(class_mapping)

    X, ratio = format_img(img, cfg)  # 预处理图片（缩放、变换维度）
    '''
    if K.image_dim_ordering() == 'tf':
        X = np.transpose(X, (0, 2, 3, 1))
    '''
    # 得到所有anchor的分类得分、回归参数以及feature map
    P_rpn = model_rpn.predict_on_batch(X)

    # 得到proposals (rois)
    proposals = roi_helpers.rpn_to_roi(P_rpn[0], P_rpn[1], cfg, K.image_dim_ordering(), use_regr=True,
                                                overlap_thresh=0.7,
                                                max_boxes=5)

    rpn_outputs = []  # 存放proposals

    score = 5
    # 将proposal坐标从feature map映射回输入图片
    for proposal in proposals:
        rpn_outputs.append(
            [cfg.rpn_stride * proposal[0], cfg.rpn_stride * proposal[1],
             cfg.rpn_stride * proposal[2], cfg.rpn_stride * proposal[3], score])
        score -= 1

    # for box in rpn_outputs:
    # nms
    boxes_nms = roi_helpers.non_max_suppression_fast(rpn_outputs, overlap_thresh=0.8)
    rpn_outputs = boxes_nms
    print("【RPN outputs】:")
    for b in boxes_nms:
        # 将坐标映射回原始图片
        b[0], b[1], b[2], b[3] = get_real_coordinates(ratio, b[0], b[1], b[2], b[3])
        print('coordinate:{} prob: {}'.format(b[0: 4], b[-1]))

    # 将rois从原图中裁剪出来，并记录裁剪尺度和裁剪比例
    image = Image.fromarray(img.astype('uint8'))
    resized_width = 80
    resized_height = 160
    imgs_crop = []  # 存放裁剪后的图片
    crop_scales = []  # 存放每个roi从原图中的裁剪尺度
    crop_ratios = []  # 存放每个roi的缩放比
    for roi in rpn_outputs:
        w = roi[2] - roi[0]
        h = roi[3] - roi[1]
        ratio_w = resized_width / w
        ratio_h = resized_height / h
        crop_ratios.append([ratio_w, ratio_h])
        crop_scales.append([round(roi[0]), round(roi[1])])
        prop_crop = image.crop([roi[0], roi[1], roi[2], roi[3]])
        prop_crop = img_to_array(prop_crop)
        prop_pic = cv2.resize(prop_crop, (resized_width, resized_height))
        imgs_crop.append(prop_pic)

    base_anchors = anchors_generation(16, [0.5 ** (1.0 / 3.0), 1, 2 ** (1.0 / 3.0)],
                                      [0.5, 0.5 ** (1.0 / 2.0), 1, 2 ** (1.0 / 3.0), 2 ** (1.0 / 2.0), 2])
    all_anchors = sliding_anchors_all((10, 20), (8, 8), base_anchors)

    final_boxes = {}
    for i, img_crop in enumerate(imgs_crop):
        p_cls, p_regr = model_classifier.predict_on_batch(img_crop[np.newaxis, :, :, :])
        boxes = {}
        bbox_threshold = 0.9
        # 遍历每个anchor
        for ii in range(p_cls.shape[1]):
            # 如果当前anchor为某类的最大概率小于阈值，或者最大概率对应的是背景类，则丢弃
            if np.max(p_cls[0, ii, :]) < bbox_threshold or np.argmax(p_cls[0, ii, :]) == (p_cls.shape[2] - 1):
                continue
            cls_num = np.argmax(p_cls[0, ii, :])  # 最大概率类别对应的下标
            if cls_num not in boxes.keys():
                boxes[cls_num] = []

            # 边框回归
            x1, y1, x2, y2 = regr_revise(all_anchors[ii, :], p_regr[0, ii, 4*cls_num: 4*(cls_num+1)])
            boxes[cls_num].append([all_anchors[ii, 0], all_anchors[ii, 1],
                                   all_anchors[ii, 2], all_anchors[ii, 3], np.max(p_cls[0, ii, :])])
#             boxes[cls_num].append([x1, y1, x2, y2, np.max(p_cls[0, ii, :])])

            # print('================>')
            # print('回归前坐标：{}'.format(all_anchors[ii, :]))
            # print('回归参数：{}'.format(p_regr[0, ii, 4*cls_num: 4*(cls_num+1)]))
            # print('回归后坐标：{}'.format([x1, y1, x2, y2]))
            # print('<================')

        for cls_num, box in boxes.items():
            # nms
            boxes_nms = roi_helpers.non_max_suppression_fast(box, overlap_thresh=0.3, max_boxes=1)
            boxes[cls_num] = boxes_nms
            for b in boxes_nms:
                # 将坐标映射回原始小图片
                b[0] = round(b[0] / crop_ratios[i][0])
                b[1] = round(b[1] / crop_ratios[i][1])
                b[2] = round(b[2] / crop_ratios[i][0])
                b[3] = round(b[3] / crop_ratios[i][1])
                # 将坐标映射回原始大图片
                b[0] += crop_scales[i][0]
                b[1] += crop_scales[i][1]
                b[2] += crop_scales[i][0]
                b[3] += crop_scales[i][1]

                print('【{}】'.format(class_mapping[cls_num]))
                print('coordinate:{} prob: {}'.format(b[0: 4], b[-1]))
                if cls_num not in final_boxes.keys():
                    final_boxes[cls_num] = []
                final_boxes[cls_num].append([b[0], b[1], b[2], b[3], b[4]])

    # 绘图保存
    img = draw_boxes_and_label_on_image(img, class_mapping, final_boxes)
    result_path = './result_images/{}.png'.format(os.path.basename(img_path).split('.')[0])
    print('result saved into ', result_path)
    cv2.imwrite(result_path, img)


def regr_revise(anchor, regr):
    """
    第1阶段bbox_transform函数定义的回归目标在4个偏移量(dx,dy,dw,dh)基础上，做位置修正
    :return:
    """
    x_target_center = regr[0] * (anchor[2] - anchor[0]) + (anchor[2] + anchor[0]) / 2.0
    y_target_center = regr[1] * (anchor[3] - anchor[1]) + (anchor[3] + anchor[1]) / 2.0
    w_target = np.exp(regr[2]) * (anchor[2] - anchor[0])
    h_target = np.exp(regr[3]) * (anchor[3] - anchor[1])
    x1_target = x_target_center - w_target / 2.0
    y1_target = y_target_center - h_target / 2.0
    x2_target = x_target_center + w_target / 2.0
    y2_target = y_target_center + h_target / 2.0
    return x1_target, y1_target, x2_target, y2_target


def predict(args_):
    """
    预测图片
    :param args_: 从命令行获取的参数
    :return:
    """
    path = args_  # 图片路径
    # 加载配置文件
    with open('config.pickle', 'rb') as f_in:
        cfg = pickle.load(f_in)
    # cfg.use_horizontal_flips = False
    # cfg.use_vertical_flips = False
    # cfg.rot_90 = False

    class_mapping = cfg.class_mapping
    if 'bg' not in class_mapping:
        class_mapping['bg'] = len(class_mapping)
    class_mapping = {v: k for k, v in class_mapping.items()}  # 键值互换

    input_shape_img = (None, None, 3)
    img_input = Input(shape=input_shape_img)

    # 定义基础网络
    shared_layers = nn.nn_base(img_input, trainable=True)

    # 定义RPN
    num_anchors = len(cfg.anchor_box_scales) * len(cfg.anchor_box_ratios)
    rpn_layers = nn.rpn(shared_layers, num_anchors)

    # 定义检查网络
    small_img_input = Input(shape=(160, 80, 3))
    classifier = stage_2_net_vgg(len(class_mapping), small_img_input)

    model_rpn = Model(img_input, rpn_layers)
    model_classifier = Model(small_img_input, classifier)

    # 加载权重
    model_rpn.load_weights('model_trained/model_final_1st.hdf5', by_name=True)
    model_classifier.load_weights('model_trained/model_final.hdf5', by_name=True)

    # 编译模型
    model_rpn.compile(optimizer='sgd', loss='mse')
    model_classifier.compile(optimizer='sgd', loss='mse')

    if os.path.isdir(path):
        for idx, img_name in enumerate(sorted(os.listdir(path))):
            if not img_name.lower().endswith(('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')):
                continue
            print(img_name)
            predict_single_image(os.path.join(path, img_name), model_rpn,
                                 model_classifier, cfg, class_mapping)
    elif os.path.isfile(path):
        print('predict image from {}'.format(path))
        predict_single_image(path, model_rpn, model_classifier, cfg, class_mapping)


def parse_args():
    parser = argparse.ArgumentParser()
    # 00020_annotated_num/images/aug_3_012.png
    parser.add_argument('--path', '-p', default='./00020_annotated_num_120/images', help='image path')
    return parser.parse_args()


if __name__ == '__main__':
    # args = parse_args()
    predict('./00020_annotated_num_120/images/aug_3_012.png')


predict image from ./00020_annotated_num_120/images/aug_3_012.png
【RPN outputs】:
coordinate:[1040.  400. 1056.  448.] prob: 5.0
coordinate:[1360.  592. 1376.  608.] prob: 4.0
coordinate:[1360.  608. 1376.  624.] prob: 3.0
coordinate:[1456.  752. 1472.  784.] prob: 2.0
coordinate:[1792.  640. 1808.  672.] prob: 1.0
【8】
coordinate:[1050.  416. 1057.  425.] prob: 0.9999703168869019
【0】
coordinate:[1048.  439. 1053.  450.] prob: 0.9998008608818054
【9】
coordinate:[1051.  399. 1053.  404.] prob: 0.9718182682991028
【2】
coordinate:[1047.  441. 1051.  447.] prob: 0.9999532699584961
【7】
coordinate:[1046.  446. 1048.  448.] prob: 0.998893678188324
【4】
coordinate:[1048.  417. 1050.  419.] prob: 0.9982838034629822
【6】
coordinate:[1051.  437. 1056.  442.] prob: 0.9876574277877808
【3】
coordinate:[1040.  445. 1042.  448.] prob: 0.9884258508682251
【0】
coordinate:[1369.  605. 1375.  609.] prob: 0.9996733665466309
【8】
coordinate:[1359.  605. 1366.  608.] prob: 0.9999905824661255
【9】
coordinate:[1371.  59