In [30]:
import keras.layers as KL
from keras.models import Model
import keras.backend as K
import tensorflow as tf
import numpy as np
import keras
import voc_data
import importlib
import config
import math
from keras.callbacks import LearningRateScheduler
from keras.utils.vis_utils import plot_model

import proposal_func
import detection_target_fixed
import classifier_fixed
import utils

importlib.reload(voc_data)
importlib.reload(config)
importlib.reload(proposal_func)
importlib.reload(detection_target_fixed)
importlib.reload(classifier_fixed)
importlib.reload(utils)

classes_num = len(voc_data.classes_arr)
config = config.Config()
fpn_classifiler = classifier_fixed.fpn_classifiler
anchor_num = len(config.scales) * len(config.ratios) # 一个锚点对应的anchor数量

In [31]:
class BatchNorm(KL.BatchNormalization):
    def call(self, inputs, training=None):
        return super(self.__class__, self).call(inputs, training=False)

# ResNet网络 的building_block
# filter：卷积核的通道数
# block：block的标识
def building_block(filters, block):
    if block != 0:
        stride = 1
    else:
        stride = 2
    
    def f(x):
        y = KL.Conv2D(filters, (1,1), strides=stride)(x)
        y = BatchNorm(axis=3)(y)
        y = KL.Activation("relu")(y)
        
        y = KL.Conv2D(filters, (3,3), padding="same")(y)
        y = BatchNorm(axis=3)(y)
        y = KL.Activation("relu")(y)
        
        y = KL.Conv2D(4 * filters, (1,1))(y)
        y = BatchNorm(axis=3)(y)
        
        if block == 0:
            # 保证shorcut的filters和上面y的filters个数一致
            shorcut = KL.Conv2D(4 * filters, (1,1), strides=stride)(x)
            shorcut = BatchNorm(axis=3)(shorcut)
        else:
            shorcut = x

        # 结合两个支路的输出
        y = KL.Add()([y, shorcut])
        y = KL.Activation("relu")(y)
        return y
    return f

# ResNet网络
def resNet_featureExtractor(inputs):
    filters = 64   # 第一个卷积核的通道数
    x = KL.Conv2D(filters, (2,2), strides=2)(inputs)
    x = BatchNorm(axis=3)(x)
    x = KL.Activation("relu")(x)
    
    # resnet50
    blocks = [3, 4, 6]    # buildblock的数量  change
    
    for i, block_num in enumerate(blocks):
        for block_id in range(block_num):
            x = building_block(filters, block_id)(x)
        filters = filters * 2
    return x

def rpn_net(inputs, k):
    shared_map = KL.Conv2D(256, (3,3), padding="same")(inputs)
    shared_map = KL.Activation("linear")(shared_map)
    rpn_class = KL.Conv2D(2 * k, (1,1))(shared_map)
    rpn_class = KL.Lambda(lambda x: tf.reshape(x, [tf.shape(x)[0], -1, 2]))(rpn_class)
    rpn_class = KL.Activation("linear")(rpn_class)
    # 分类的得分
    rpn_prob = KL.Activation("softmax")(rpn_class)
    
    y = KL.Conv2D(4*k, (1,1))(shared_map)
    y = KL.Activation("linear")(y)
    # 边框的得分
    rpn_bbox = KL.Lambda(lambda x: tf.reshape(x, [tf.shape(x)[0], -1, 4]))(y)
    
    return rpn_class, rpn_prob, rpn_bbox


In [32]:
x = KL.Input((config.image_size[0], config.image_size[0], 3))  # change
fp = resNet_featureExtractor(x)
rpn_class, rpn_prob, rpn_bbox = rpn_net(fp, anchor_num)
model = Model([x], [rpn_class, rpn_prob, rpn_bbox])
model.summary()
# plot_model(model, to_file="model/model.png", show_shapes=True)

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_28 (InputLayer)           (None, 320, 320, 3)  0                                            
__________________________________________________________________________________________________
conv2d_331 (Conv2D)             (None, 160, 160, 64) 832         input_28[0][0]                   
__________________________________________________________________________________________________
batch_norm_310 (BatchNorm)      (None, 160, 160, 64) 256         conv2d_331[0][0]                 
__________________________________________________________________________________________________
activation_321 (Activation)     (None, 160, 160, 64) 0           batch_norm_310[0][0]             
__________________________________________________________________________________________________
conv2d_332

In [33]:
# x：rpn_box，counts：anchor为1的个数，num_rows = len(counts)：总共多少个
# 小函数，取出rpn_box前面有值的“偏移量”
def batch_back(x, counts, num_rows):
    outputs = []
    for i in range(num_rows):
        outputs.append(x[i, :counts[i]])
    return tf.concat(outputs, axis=0)

# 分类loss
# rpn_match：真实下标  rpn_class_logits：实际计算出的结果
def rpn_class_loss(rpn_match, rpn_class_logits):
    # 压缩最后一维，变成一维的坐标，rpn_match (None, 576, 1) => (None, 576) 
    rpn_match = tf.squeeze(rpn_match, -1)
    # 取出 -1 和 1 的坐标
    indices = tf.where(K.not_equal(rpn_match, 0))
    # 把 -1 和 0 变成 0， 1 还是 1
    anchor_class = K.cast(K.equal(rpn_match, 1), tf.int32)
    # 取出实际计算出的所有-1 和 1坐标的anchor
    rpn_class_logits = tf.gather_nd(rpn_class_logits, indices)     ### prediction
    # 取出真实下标的所有-1 和 1坐标的anchor => 现在是（0, 1）0：表示背景 1：表示前景
    anchor_class = tf.gather_nd(anchor_class, indices)   ### target
    # 计算loss
    loss = K.sparse_categorical_crossentropy(target=anchor_class, output=rpn_class_logits, from_logits=True)
    # 计算平均，如果loss算出东西了，就取平均，如果没算出东西，就取0
    loss = K.switch(tf.size(loss) > 0 , K.mean(loss), tf.constant(0.0))
    return loss

# 回归loss
# target_bbox：真实的边框, rpn_match：真实的anchor下标, rpn_bbox：实际计算出来的结果边框
def rpn_bbox_loss(target_bbox, rpn_match, rpn_bbox):
    # 压缩最后一维，变成一维的坐标，rpn_match (None, 576, 1) => (None, 576) 
    rpn_match = tf.squeeze(rpn_match, -1)
    # 取出 1 的坐标
    indices = tf.where(K.equal(rpn_match, 1))
    # 取出所有 1 的“偏移量”
    rpn_bbox = tf.gather_nd(rpn_bbox, indices)
    # 得出为 1 的“偏移量”的个数
    batch_counts = K.sum(K.cast(K.equal(rpn_match, 1), tf.int32), axis=1)
    # 取出 target_bbox 中为1的“偏移量”
    target_bbox = batch_back(target_bbox, batch_counts, config.batch_size)
    # 求误差
    diff = K.abs(target_bbox - rpn_bbox)
    # 取出小于1的部分
    less_than_one = K.cast(K.less(diff, 1.0), "float32")
    # 小于1的部分给抛物线  diff**2
    # 大于1的部分给直线（不会对大的误差敏感） diff-0.5
    loss = (less_than_one * 0.5 * diff**2) + ((1 - less_than_one) * (diff - 0.5))
    # 计算平均，如果loss算出东西了，就取平均，如果没算出东西，就取0
    loss = K.switch(tf.size(loss) > 0 , K.mean(loss), tf.constant(0.0))
    return loss

# 置信度loss
def smooth_l1_loss(y_true, y_pred):
    diff = K.abs(y_true - y_pred)
    less_than_one = K.cast(K.less(diff, 1.0), "float32")
    loss = (less_than_one * 0.5 * diff**2) + (1 - less_than_one) * (diff - 0.5)
    return loss

# 回归loss
def mrcnn_bbox_loss_graph(target_bbox, target_class_ids, pred_bbox):

    target_class_ids = K.reshape(target_class_ids, (-1,))
    target_bbox = K.reshape(target_bbox, (-1, 4))
    pred_bbox = K.reshape(pred_bbox, (-1, K.int_shape(pred_bbox)[2], 4))

    positive_roi_ix = tf.where(target_class_ids > 0)[:, 0]
    positive_roi_class_ids = tf.cast(
        tf.gather(target_class_ids, positive_roi_ix), tf.int64)
    indices = tf.stack([positive_roi_ix, positive_roi_class_ids], axis=1)

    target_bbox = tf.gather(target_bbox, positive_roi_ix)
    pred_bbox = tf.gather_nd(pred_bbox, indices)

    loss = K.switch(tf.size(target_bbox) > 0,
                    smooth_l1_loss(y_true=target_bbox, y_pred=pred_bbox),
                    tf.constant(0.0))
    loss = K.mean(loss)
    loss = K.reshape(loss, [1, 1])
    return loss

# 分类loss
def mrcnn_class_loss_graph(target_class_ids, pred_class_logits, active_class_ids):
    target_class_ids = tf.cast(target_class_ids, 'int64')
    # Find predictions of classes that are not in the dataset.
    pred_class_ids = tf.argmax(pred_class_logits, axis=2)
    # TODO: Update this line to work with batch > 1. Right now it assumes all
    #       images in a batch have the same active_class_ids
    pred_active = tf.gather(active_class_ids[0], pred_class_ids)

    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
        labels=target_class_ids, logits=pred_class_logits)

    pred_active = tf.cast(pred_active, tf.float32)
    loss = loss * pred_active

    loss = tf.reduce_sum(loss) / tf.reduce_sum(pred_active)
    return loss

# 分类loss  v2版本
def mrcnn_class_loss_graphV2(target_class_ids, pred_class_logits, active_class_ids, batch_size=config.batch_size):
    target_class_ids = tf.cast(target_class_ids, 'int64')
    pred_class_ids = tf.argmax(pred_class_logits, axis=2)
    #pred_active = tf.zeros((batch_size, tf.shape(target_class_ids)[1]))
    pred_active = utils.batch_slice([active_class_ids, pred_class_ids], lambda x,y:tf.gather(x,y), batch_size)
    #for i in range(batch_size):
    #    pred_active[i] = tf.gather(active_class_ids[i], pred_class_ids[i])
        #pred_active = tf.gather(active_class_ids[0], pred_class_ids)

    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
        labels=target_class_ids, logits=pred_class_logits)
    pred_active = tf.cast(pred_active, tf.float32)
    loss = loss * pred_active
    loss = tf.reduce_sum(loss) / tf.reduce_sum(pred_active)
    return loss

In [34]:
# 最后一个修正层，用于使用模型时
import keras.engine as KE

def refine_detections(rois, probs, deltas):
    argMax_probs = tf.argmax(probs, axis=1)
    max_probs = tf.reduce_max(probs, axis=1)
    keep_idxs = tf.where(max_probs > 0.5)[:,0]
    idx_y = tf.cast(np.arange(16), tf.int32)
    idx_x = tf.cast(argMax_probs, tf.int32)
    idxs = tf.stack([idx_y, idx_x],axis=1)
    deltas_keep = tf.gather_nd(deltas, idxs)
    refined_rois = proposal_func.anchor_refinement(tf.cast(rois, tf.float32),
                                 tf.cast(deltas_keep * config.RPN_BBOX_STD_DEV, tf.float32))
    rois_ready = tf.gather(refined_rois, keep_idxs)
    class_ids = tf.gather(argMax_probs, keep_idxs)
    class_ids = tf.to_float(class_ids)[..., tf.newaxis]
    detections = tf.concat([rois_ready, class_ids], axis=1)
    gap = tf.maximum(16 - tf.shape(detections)[0],0)
    detections = tf.pad(detections, [(0, gap), (0, 0)], "CONSTANT")
    return detections

class DetectionLayer(KE.Layer):
    def __init__(self, **kwargs):
        super(DetectionLayer, self).__init__(**kwargs)
    def call(self, inputs):
        rois = inputs[0]
        probs = inputs[1]
        deltas = inputs[2]
        
        detections_batch = utils.batch_slice(
            [rois, probs, deltas],
            lambda x, y, z: refine_detections(x, y, z),
            config.batch_size)
        #return tf.reshape(
        #    detections_batch,
        #    [16, 8, -1])
        return detections_batch
    def compute_output_shape(self, input_shape):
        return (None, 8, -1)

In [35]:
## training inference 

## RPN   all 
class fasterRCNN():
    def __init__(self, mode, subnet, config):
        assert mode in ["training", "inference"]
        self.mode = mode
        self.config = config
        self.subnet = subnet
        self.keras_model = self.build(mode=mode, subnet=subnet, config=config)
    
    def build(self, mode, subnet, config):
        
        assert mode in ["training", "inference"]
        # 图片的输入
        input_image = KL.Input(shape=[config.image_size[0], config.image_size[1], 3], dtype=tf.float32)
        # 真实的边框输入
        input_bboxes = KL.Input(shape=[None,4], dtype=tf.float32)
        # 真实的分类输入（21个类别，包含1个背景）
        input_class_ids = KL.Input(shape=[None],dtype=tf.int32)
        input_active_ids = KL.Input(shape=[classes_num,], dtype=tf.int32)
        # 真实的anchor分类 -1,0,1输入
        input_rpn_match = KL.Input(shape=[None, 1], dtype=tf.int32)
        # 真实的anchor偏移量输入
        input_rpn_bbox = KL.Input(shape=[None, 4], dtype=tf.float32)
        # 图片宽高
        h, w = config.image_size[: 2]
        # 图片宽高比例
        image_scale = K.cast(K.stack([h,w,h,w], axis=0), tf.float32)
        gt_bboxes = KL.Lambda(lambda x: x / image_scale)(input_bboxes)
        
        # 创建模型
        feature_map = resNet_featureExtractor(input_image)
        rpn_class, rpn_prob, rpn_bbox = rpn_net(feature_map, anchor_num)
        # proposals层（PS：这里改了下参数）
        proposals = proposal_func.proposal(proposal_count=16, nms_thresh=0.7, anchors=voc_data._anchors, batch_size=config.batch_size, config=config)([rpn_prob, rpn_bbox])
        
        # 训练模式
        if mode == "training":
            target_rois, target_class_ids, target_delta, target_bboxes = detection_target_fixed.DetectionTarget(config=config, \
                              name="proposal_target")([proposals,input_class_ids,gt_bboxes])
            
            denomrlaize_rois = KL.Lambda(lambda x: 8.0*x, name="denormalized_rois")(target_rois)
            loss_rpn_match = KL.Lambda(lambda x: rpn_class_loss(*x), name="loss_rpn_match")([input_rpn_match, rpn_class])
            loss_rpn_bbox = KL.Lambda(lambda x: rpn_bbox_loss(*x), name="loss_rpn_bbox")([input_rpn_bbox, input_rpn_match, rpn_bbox])

            # rpn模式
            if subnet == "rpn":
                model = Model([input_image, input_bboxes, input_class_ids, input_active_ids, input_rpn_match, input_rpn_bbox],
                              [feature_map, rpn_class, rpn_prob, rpn_bbox, proposals, target_rois, denomrlaize_rois, target_class_ids, \
                               target_delta, target_bboxes, loss_rpn_match, loss_rpn_bbox])
            # rpn+fpn模式
            elif subnet == "all":
                mrcnn_class_logits, mrcnn_class, mrcnn_bbox = fpn_classifiler(feature_map, denomrlaize_rois, config.batch_size, 21, 7, classes_num)
                bbox_loss = KL.Lambda(lambda x: mrcnn_bbox_loss_graph(*x), name="bbox_loss")(
                                                [target_delta, target_class_ids, mrcnn_bbox])
                class_loss = KL.Lambda(lambda x: mrcnn_class_loss_graphV2(*x), name="mrcnn_class_loss")(
                                        [target_class_ids, mrcnn_class_logits, input_active_ids])
                
                model = Model([input_image, input_bboxes, input_class_ids, input_active_ids, input_rpn_match, input_rpn_bbox],
                [feature_map, rpn_class, rpn_prob, rpn_bbox, proposals, target_rois, denomrlaize_rois,target_class_ids, target_delta, \
                 target_bboxes, mrcnn_class_logits, mrcnn_class, mrcnn_bbox, loss_rpn_match, loss_rpn_bbox, bbox_loss, class_loss])
                model.summary()
        # 验证模式
        if mode == "inference":
            denomrlaize_proposals = KL.Lambda(lambda x:8.0*x, name="denormalized_proposals")(proposals)
            mrcnn_class_logits, mrcnn_class, mrcnn_bbox = fpn_classifiler(feature_map, denomrlaize_proposals, config.batch_size, 21, 7, classes_num)
            detections = DetectionLayer()([proposals, mrcnn_class, mrcnn_bbox])
            # 验证模式只传入图片，结果是检测出的框
            model = Model([input_image],[detections])
            
        return model
           
    # 编译
    def compile_(self):
        # 添加loss层
        loss_lay1 = self.keras_model.get_layer("loss_rpn_match").output
        loss_lay2 = self.keras_model.get_layer("loss_rpn_bbox").output
        if self.subnet == "all":
            loss_lay3 = self.keras_model.get_layer("bbox_loss").output
            loss_lay4 = self.keras_model.get_layer("mrcnn_class_loss").output

        self.keras_model.add_loss(tf.reduce_mean(loss_lay1))
        self.keras_model.add_loss(tf.reduce_mean(loss_lay2))
        if self.subnet == "all":
            self.keras_model.add_loss(tf.reduce_mean(loss_lay3))
            self.keras_model.add_loss(tf.reduce_mean(loss_lay4))

        self.keras_model.compile(loss=[None]*len(self.keras_model.output), optimizer=keras.optimizers.SGD(lr=0.0, momentum=0.9, decay=0.0, nesterov=False))
        
        # 打印出两个loss的收敛情况
        self.keras_model.metrics_names.append("loss_rpn_match")
        self.keras_model.metrics_tensors.append(tf.reduce_mean(loss_lay1, keep_dims=True))
        self.keras_model.metrics_names.append("loss_rpn_bbox")
        self.keras_model.metrics_tensors.append(tf.reduce_mean(loss_lay2, keep_dims=True))

        # 打印出两个loss的收敛情况
        if self.subnet == "all":
            self.keras_model.metrics_names.append("bbox_loss")
            self.keras_model.metrics_tensors.append(tf.reduce_mean(loss_lay3, keep_dims=True))
            self.keras_model.metrics_names.append("mrcnn_class_loss")
            self.keras_model.metrics_tensors.append(tf.reduce_mean(loss_lay4, keep_dims=True))
    
    # 训练方法
    def training(self, dataGen):
        self.compile_()
        def step_decay(epoch):
            initial_lrate = 0.0001
            drop = 0.8
            epochs_drop = 5.0
            lrate = initial_lrate * math.pow(drop, math.floor((1+epoch)/epochs_drop))
            print('学习率：' + str(lrate))
            return lrate
        lrate = LearningRateScheduler(step_decay)
        his = self.keras_model.fit_generator(dataGen, steps_per_epoch=20, epochs=20, callbacks=[lrate]) 
    
    # 验证方法
    def inference(self, testdata):
        assert self.mode == "inference"
        out = self.keras_model.predict(testdata)
        return out
    
    # 保存权重参数
    def save_weights(self, weights_path):
        self.keras_model.save_weights(weights_path)
        
    # 加载权重参数
    def load_weights(self, weights_path):
        from keras.engine import topology
        import h5py
        f = h5py.File(weights_path)
        layers = self.keras_model.layers
        topology.load_weights_from_hdf5_group_by_name(f, layers)

In [36]:
# voc
def data_Gen(num_batch, batch_size, config, beginIndex):
    print("----------------")
    index = beginIndex
    for iii in range(num_batch):
        images = []
        bboxes = []
        class_ids = []
        rpn_matchs = []
        rpn_bboxes = []
        active_ids = []
        for i in range(batch_size):
            image, bbox, class_id, active_id, rpn_match, rpn_bbox, idxLen, anchors = voc_data.getAllImage(index)
            pad_num = config.max_gt_obj - bbox.shape[0]
            pad_box = np.zeros((pad_num, 4))
            pad_ids = np.zeros((pad_num, 1))
            bbox = np.concatenate([bbox, pad_box], axis=0)
            class_id = np.concatenate([class_id, pad_ids], axis=0)
        
            images.append(image)
            bboxes.append(bbox)
            class_ids.append(class_id)
            rpn_matchs.append(rpn_match)
            rpn_bboxes.append(rpn_bbox)
            active_ids.append(active_id)
            index += 1
            # 数据下标大于16000，重置为0
            if index >= 16000:
                index = 0

        print("数据：" + str(index))
        images = np.concatenate(images, 0).reshape(batch_size, config.image_size[0],config.image_size[1] , 3)
        bboxes = np.concatenate(bboxes, 0).reshape(batch_size, -1 , 4)
        class_ids = np.concatenate(class_ids, 0).reshape(batch_size, -1 )
        rpn_matchs = np.concatenate(rpn_matchs, 0).reshape(batch_size, -1 , 1)
        rpn_bboxes = np.concatenate(rpn_bboxes, 0).reshape(batch_size, -1 , 4)
        active_ids = np.concatenate(active_ids, 0).reshape(batch_size, -1 )
        yield [images, bboxes, class_ids, active_ids, rpn_matchs, rpn_bboxes],[]

# 开始下标
beginIndex = 0
dataGen = data_Gen(200000, config.batch_size, config, beginIndex) # 10000个数据，batch_size=10

In [37]:
model = fasterRCNN(mode="training", subnet="all", config=config)

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_29 (InputLayer)           (None, 320, 320, 3)  0                                            
__________________________________________________________________________________________________
conv2d_377 (Conv2D)             (None, 160, 160, 64) 832         input_29[0][0]                   
__________________________________________________________________________________________________
batch_norm_353 (BatchNorm)      (None, 160, 160, 64) 256         conv2d_377[0][0]                 
__________________________________________________________________________________________________
activation_365 (Activation)     (None, 160, 160, 64) 0           batch_norm_353[0][0]             
__________________________________________________________________________________________________
conv2d_378

In [38]:
# face V2版本
# 参数1320万 320 × 320  batch_size：15 steps_per_epoch：20  rpn_stride：16  scales：[40, 50, 65, 85, 110, 140, 190, 250]  buildblock：[7,8,7]   文件名：model_320_16_[epochs数]_v3.h5
# 第一次20epochs lr=0.0001 batch_size：10  1w数据
# drop = 0.6 衰减率调小
# learning rate schedule
model.training(dataGen)

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


----------------Epoch 1/20

学习率：0.0001


UnboundLocalError: local variable 'active_ids' referenced before assignment

In [None]:
model.save_weights("model/model_320_16_v2_500.h5")
# model.save_weights("model/model_320_16_700.h5")

In [None]:
model = fasterRCNN(mode="inference", subnet="rpn", config=config)
model.load_weights("model/model_320_16_v2_500.h5")

In [None]:
test_data = next(dataGen)

In [None]:
out = model.inference(test_data[0][0])

In [None]:
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import random
%matplotlib inline

ix = random.sample(range(config.batch_size), 1)[0]

image = test_data[0][0][ix]

boxes_result = out[ix][:,:4] * config.image_size[0]
id_result = out[ix][:,4]

plt.imshow(image)
Axs = plt.gca()

pos_idxs = np.where(id_result > 0)[0]

for i in range(pos_idxs.shape[0]):
    id_ = pos_idxs[i]
    box = boxes_result[id_]
    rec = patches.Rectangle((box[0], box[1]), box[2]-box[0], box[3]-box[1], 
                           edgecolor="r", facecolor="none")
    Axs.add_patch(rec)