# 预测

In [1]:
from __future__ import division
import os
import cv2
import numpy as np
import sys
import pickle
from keras_frcnn import config
from keras import backend as K
from keras.layers import Input
from keras.models import Model
from keras_frcnn import network_aux_roi
from keras.backend.tensorflow_backend import set_session
import tensorflow as tf
from matplotlib import pyplot as plt
import keras_frcnn.network_model_vgg as nn
from keras_frcnn.result_display import Display
from keras_frcnn.data_process import Process
#GPU占用设置
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
con = tf.ConfigProto()
con.gpu_options.per_process_gpu_memory_fraction = 0.4
set_session(tf.Session(config=con))
sys.setrecursionlimit(40000)

#预测数据路径
img_path = './test/'
num_rois=32
config_output_filename = 'config.pickle'
with open(config_output_filename, 'rb') as f_in:
    C = pickle.load(f_in)

Using TensorFlow backend.


## 图像预处理

In [2]:
# 读取图片
filepath = 'test/fe402625-3f99-3755-84b5-9b9ca9696706.jpg'
img = cv2.imread(filepath)
img_process = Process(img, C) 
X, ratio = img_process.format_img()

## 网络结构
以VGG为例


VGG

shared_layers = nn.nn_base(img_input, trainable=True)   

RPN

rpn_layers = nn.rpn(shared_layers, num_anchors)   


R-CNN

classifier = nn.classifier(feature_map_input, roi_input, C.num_rois, nb_classes=len(class_mapping), trainable=True)   

VGG+RPN

model_rpn = Model(img_input, rpn_layers)     

VGG+Fast R-CNN

model_classifier = Model([feature_map_input, roi_input], classifier)   




In [3]:
#class_mapping中是所有类别的名称
class_mapping = C.class_mapping
class_mapping = {v: k for k, v in class_mapping.items()}
K.set_learning_phase(0)

# 输入
num_features = 512
input_shape_img = (None, None, 3)
input_shape_features = (None, None, num_features)

img_input = Input(shape=input_shape_img)
roi_input = Input(shape=(num_rois, 4))
feature_map_input = Input(shape=input_shape_features)

# 基础网络
shared_layers = nn.nn_base(img_input, trainable=True)

# RPN
num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
rpn_layers = nn.rpn(shared_layers, num_anchors)

classifier = nn.classifier(feature_map_input, roi_input, C.num_rois, nb_classes=len(class_mapping), trainable=True)

model_rpn = Model(img_input, rpn_layers)
model_classifier = Model([feature_map_input, roi_input], classifier)

#加载权值
print('Loading weights from {}'.format(C.model_path))
model_rpn.load_weights(C.model_path, by_name=True)
model_classifier.load_weights(C.model_path, by_name=True)

Loading weights from ./model_frcnn.hdf5


## 预测
### 1. 提取候选区域

In [4]:
bbox_threshold = 0.8

classes = {}
# 得到RPN的输出（类别分数，偏移值）以及特征图
[Y1, Y2, F] = model_rpn.predict(X)
# 处理RPN预测出来的坐标，[tx,ty,tw,th]->[x1, y1, x2, y2]（相对原图坐标），去掉出界的框,nms
R = network_aux_roi.rpn_to_roi(Y1, Y2, C, K.image_dim_ordering(), overlap_thresh=0.7)
# R.shape (300, 4) ，假设一共预测出300个box
    
# (x1,y1,x2,y2) -> (x,y,w,h)
R[:, 2] -= R[:, 0]
R[:, 3] -= R[:, 1]

kwargs passed to function are ignored with Tensorflow backend


### 2. 在候选区域上预测

In [5]:
bboxes = {}
probs = {}

#我们按顺序选择提取其中32个作为roi进一步处理
for jk in range(R.shape[0]//C.num_rois + 1):
    ROIs = np.expand_dims(R[C.num_rois*jk:C.num_rois*(jk+1), :], axis=0)
    if ROIs.shape[1] == 0:
        break
    # 如果最后一批数量不足32，补0
    if jk == R.shape[0]//C.num_rois:
        curr_shape = ROIs.shape
        target_shape = (curr_shape[0],C.num_rois,curr_shape[2])
        ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
        ROIs_padded[:, :curr_shape[1], :] = ROIs
        ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
        ROIs = ROIs_padded
        
    #假设roi数量为32，预测类别和回归值(1, 32, 2),(1, 32, 4)，输入F特征图以及32个ROIs
    [P_cls, P_regr] = model_classifier.predict([F, ROIs])
    for ii in range(P_cls.shape[1]):
        # 如果类别分数最大的都小于阈值或者最后一个背景类是分数最大的box,跳出循环
        if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax(P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
            continue
        # 得到分数最高类别的名称
        cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]
        # 如果类别还没有在bboxes中，初始化此类坐标和概率列表
        if cls_name not in bboxes:
            bboxes[cls_name] = []
            probs[cls_name] = []
        # 得到ROI的坐标
        (x, y, w, h) = ROIs[0, ii, :]
        # 得到分数最大的类别的偏移值，通过计算得到最终坐标
        cls_num = np.argmax(P_cls[0, ii, :])
        try:
            (tx, ty, tw, th) = P_regr[0, ii, 4*cls_num:4*(cls_num+1)]
            tx /= C.classifier_regr_std[0]
            ty /= C.classifier_regr_std[1]
            tw /= C.classifier_regr_std[2]
            th /= C.classifier_regr_std[3]
            x, y, w, h = roi_helpers.apply_regr(x, y, w, h, tx, ty, tw, th)
        except:
            pass
        # 将类别坐标以及概率加入列表
        bboxes[cls_name].append([C.rpn_stride*x, C.rpn_stride*y, C.rpn_stride*(x+w), C.rpn_stride*(y+h)])
        probs[cls_name].append(np.max(P_cls[0, ii, :]))

### 3.输出
整理检测目标的类别，坐标，概率，并画出预测框

In [6]:
display = Display(bboxes,probs,ratio)
# 打印结果
display.result()
# 画框
display.draw(C,img)

[('hat', [230, 575, 604, 835], 99.995064735412598), ('hat', [287, 345, 489, 489], 99.963974952697754), ('hat', [575, 287, 719, 374], 99.842333793640137), ('hat', [259, 230, 374, 316], 99.760580062866211), ('hat', [950, 345, 1151, 489], 99.675744771957397), ('hat', [431, 115, 518, 172], 99.542361497879028), ('hat', [0, 0, 86, 86], 89.802491664886475)]
