# train脚本 搭建比较检测器并进行训练

### 设置训练参数 和 主干网络

In [None]:
# 定义模型的主要函数，处理训练、评估和预测三种模式
def model_fn(features,
             labels,
             mode,
             params,
             config):

    # ***********************************************************************************************
    # *                                         share net                                           *
    # ***********************************************************************************************
    #参数解析
    net_config = params["net_config"]
    #根据模式设置是否训练
    if mode == tf.estimator.ModeKeys.TRAIN:
        IS_TRAINING = True
    else:
        IS_TRAINING = False
    #获取原始图像的批次和大小
    origin_image_batch = features["image"]
    image_window = features["image_window"]
    image_batch = origin_image_batch - net_config.PIXEL_MEANS  #将图像数据减去像素均值，以进行图像标准化（预处理）
    # there is is_training means that bn is training, so it is important!
    #获得主干网络res_net
    _, share_net = get_network_byname(inputs=image_batch,
                                      config=net_config,
                                      is_training=False,
                                      reuse=tf.AUTO_REUSE)

### 搭建FPN金字塔池化网络 提取特征

In [None]:
    # ***********************************************************************************************
    # *                                            fpn                                              *
    # ***********************************************************************************************
    feature_pyramid = build_fpn.build_feature_pyramid(share_net, net_config)

### 搭建RPN区域建议网络 将FPN的输出作为输入
得到位置和分类（是否是物体）损失

In [1]:
    # ***********************************************************************************************
    # *                                            rpn                                              *
    # ***********************************************************************************************
    gtboxes_and_label_batch = labels.get("gt_box_labels")
    rpn = build_rpn.RPN(feature_pyramid=feature_pyramid,
                        image_window=image_window,
                        config=net_config)

    # rpn_proposals_scores==(2000,)
    rpn_proposals_boxes, rpn_proposals_scores = rpn.rpn_proposals(IS_TRAINING)
    rpn_location_loss, rpn_classification_loss = rpn.rpn_losses(labels["minibatch_indices"],
                                                                labels["minibatch_encode_gtboxes"],
                                                                labels["minibatch_objects_one_hot"])
        
    rpn_total_loss = rpn_classification_loss + rpn_location_loss

IndentationError: unexpected indent (1555849508.py, line 4)

### 提取参考图像的特征 为后续模型提供参考图像的特征

In [None]:
    # ***********************************************************************************************
    # *                                        Rerference image                                    *   
    # ***********************************************************************************************
    #加载参考图像
    reference_image = load_reference_image()
    #转换类型
    reference_image = tf.cast(reference_image, tf.float32)%%!
    #将参考图像数据减去像素均值，以进行图像标准化
    reference_image = reference_image - net_config.PIXEL_MEANS
    #得到主干网络
    _, reference_share_net = get_network_byname(inputs=reference_image,
                                                config=net_config,
                                                is_training=False,
                                                reuse=tf.AUTO_REUSE)
    #用FPN网络提取参考图像的特征
    reference_feature_pyramid = build_fpn.build_feature_pyramid(reference_share_net, net_config)
    # average the features of support images
    # reference_feature_pyramid[key](C*S, H, W, 256)---->(C, 7, 7, 256)
    with tf.variable_scope('reference_feature_origision'):
        #对金字塔特征每个层级进行遍历 
        for key, value in reference_feature_pyramid.items():
            #用双线性插值将特征尺寸调整为ROI大小
            reference_feature_pyramid[key] = tf.image.resize_bilinear(reference_feature_pyramid[key],
                                                                      (net_config.ROI_SIZE, net_config.ROI_SIZE))
            
            #对特征图的第二维进行平均化
            reference_feature_pyramid[key] = tf.reduce_mean(tf.reshape(reference_feature_pyramid[key],
                                                            (net_config.NUM_CLASS-1, net_config.NUM_SUPPROTS,
                                                             net_config.ROI_SIZE, net_config.ROI_SIZE,
                                                             256)), axis=1)
        #对特征金字塔的不同层级的特征进行平均，生成一个表示整个特征金字塔平均特征的张量
        # average the features of fpn features
        average_fpn_feature = []
        for key, value in reference_feature_pyramid.items():
            average_fpn_feature.append(value)
        reference_fpn_features = tf.reduce_mean(tf.stack(average_fpn_feature, axis=0), axis=0)
        
        # compute the negative features
        #构建参考图像的负特征 有助于增强模型的鲁棒性和泛化能力
        with tf.variable_scope("reference_negative"):
            with slim.arg_scope([slim.conv2d],
                                padding="SAME",
                                weights_initializer=tf.glorot_uniform_initializer(),
                                weights_regularizer=slim.l2_regularizer(net_config.WEIGHT_DECAY)):
                # the shape of positive features is (1, H, W, C*channels) 构建正特征
                positive_features = tf.reshape(tf.transpose(reference_fpn_features, (1, 2, 0, 3)),
                                    (1, net_config.ROI_SIZE, net_config.ROI_SIZE, (net_config.NUM_CLASS-1)*256))
                # (1, H, W, channels) 对正特征进行卷积操作 得到负特征，并将其进行拼接
                negative_feature = slim.conv2d(positive_features, num_outputs=256, kernel_size=[3,3], stride=1)
                total_refernece_feature = tf.concat([negative_feature, reference_fpn_features], axis=0)


### 构建Fast RCNN检测网络

In [None]:
    # ***********************************************************************************************
    # *                                         Fast RCNN                                           *
    # ***********************************************************************************************

    #初始化fast_rcnn网络 将fpn和rpn的输出作为输入，同时传入真实标签
    fast_rcnn = build_fast_rcnn.FastRCNN(feature_pyramid=feature_pyramid,
                                         rpn_proposals_boxes=rpn_proposals_boxes,
                                         origin_image=origin_image_batch,
                                         gtboxes_and_label=gtboxes_and_label_batch,
                                         reference_feature=total_refernece_feature,
                                         config=net_config,
                                         is_training=False,
                                         image_window=image_window)
    #进行检测 生成预测框 类别和分数
    detections = fast_rcnn.fast_rcnn_detection()
    if DEBUG:
        rpn_proposals_vision = draw_boxes_with_scores(origin_image_batch[0, :, :, :],
                                                      rpn_proposals_boxes[0, :50, :],
                                                      rpn_proposals_scores[0, :50])
        fast_rcnn_vision = draw_boxes_with_categories_and_scores(origin_image_batch[0, :, :, :],
                                                                 detections[0, :, :4],
                                                                 detections[0, :, 4],
                                                                 detections[0, :, 5])
        tf.summary.image("rpn_proposals_vision", rpn_proposals_vision) 
        tf.summary.image("fast_rcnn_vision", fast_rcnn_vision)

    #计算fast_rcnn损失 包括分类损失和位置损失 总损失以权重系数为5进行拼接
    fast_rcnn_location_loss, fast_rcnn_classification_loss = fast_rcnn.fast_rcnn_loss()
    fast_rcnn_total_loss = 5.0*fast_rcnn_classification_loss + fast_rcnn_location_loss


### 训练过程

In [None]:
    EPOCH_BOUNDARY = [35, 50]
    EPOCH = 60
    WEIGHT_DECAY = 0.0001
    EPSILON = 1e-5
    MOMENTUM = 0.9
    GPU_GROUPS = ["/gpu:0", "/gpu:1"]
    LEARNING_RATE = 0.001
    PER_GPU_IMAGE = 1
    CLIP_GRADIENT_NORM = 5.0

In [2]:
# train
    with tf.variable_scope("regularization_losses"):
        #计算正则化损失
        regularization_list = [tf.nn.l2_loss(w.read_value()) *
                               net_config.WEIGHT_DECAY / tf.cast(tf.size(w.read_value()),
                               tf.float32) for w in tf.trainable_variables() if 'gamma' not
                               in w.name and 'beta' not in w.name]
        regularization_losses = tf.add_n(regularization_list)
    #模型的总损失 包括三部分
    total_loss = regularization_losses + fast_rcnn_total_loss + rpn_total_loss
    #跟踪学习过程中的step
    global_step = slim.get_or_create_global_step()
    #从检查点初始化模型
    tf.train.init_from_checkpoint(net_config.CHECKPOINT_DIR, {net_config.NET_NAME + "/": net_config.NET_NAME + "/"})
    
    #模型优化过程
    with tf.variable_scope("optimizer"):
        #创建分段常数学习率
        lr = tf.train.piecewise_constant(global_step,
                                         boundaries=[np.int64(net_config.BOUNDARY[0]), np.int64(net_config.BOUNDARY[1])],
                                         values=[net_config.LEARNING_RATE, net_config.LEARNING_RATE / 10,
                                                 net_config.LEARNING_RATE / 100])
        #这里使用了 Momentum 优化器，它在梯度更新时不仅考虑当前梯度，还考虑了过去梯度的累积
        optimizer = tf.train.MomentumOptimizer(lr, momentum=net_config.MOMENTUM)
        #这是一个用于多 GPU 训练的优化器封装，它将原始优化器包装在内，以便处理多 GPU 训练中的梯度同步等问题
        optimizer = tf.contrib.estimator.TowerOptimizer(optimizer)
        #获取所有需要在训练过程中更新的操作，例如批归一化中的移动平均和方差更新。
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies([tf.group(*update_ops)]):
            #计算总损失 total_loss 对于所有可训练变量的梯度。
            grads = optimizer.compute_gradients(total_loss)
            #使用梯度裁剪，限制梯度的范数不超过指定的阈值（这里是 5.0），以避免梯度爆炸的问题。
            for i, (g, v) in enumerate(grads):
                if g is not None:
                    grads[i] = (tf.clip_by_norm(g, 5.0), v)  # clip gradients
            #梯度更新
            train_op = optimizer.apply_gradients(grads, global_step)


IndentationError: unexpected indent (3705240355.py, line 2)