diff --git a/paddle/operators/detection_output_op.h b/paddle/operators/detection_output_op.h index 86285b748a7fe..2b267fd8d130c 100644 --- a/paddle/operators/detection_output_op.h +++ b/paddle/operators/detection_output_op.h @@ -19,6 +19,7 @@ limitations under the License. */ #include "paddle/operators/math/math_function.h" #include "paddle/operators/math/softmax.h" #include "paddle/operators/strided_memcpy.h" + namespace paddle { namespace operators { template @@ -47,98 +48,94 @@ inline void transpose_fun(const framework::ExecutionContext& context, offset += in_p_tensor_transpose.dims()[4] * src_stride[4]; } } + +template +inline void decode_bboxer( + std::vector>>& all_de_bboxes, + size_t num_p, const T* p_data, T* loc_data, size_t batch_size) { + for (size_t n = 0; n < batch_size; ++n) { + std::vector> decoded_bboxes; + for (size_t i = 0; i < num_p; ++i) { + size_t p_offset = i * 8; + size_t loc_pred_offset = n * num_p * 4 + i * 4; + std::vector> prior_bbox_vec; + math::GetBBoxFromPriorData(p_data + p_offset, 1, prior_bbox_vec); + std::vector> prior_bbox_var; + math::GetBBoxVarFromPriorData(p_data + p_offset, 1, prior_bbox_var); + std::vector loc_pred_data; + for (size_t j = 0; j < 4; ++j) + loc_pred_data.push_back(*(loc_data + loc_pred_offset + j)); + math::BBox bbox = math::DecodeBBoxWithVar( + prior_bbox_vec[0], prior_bbox_var[0], loc_pred_data); + decoded_bboxes.push_back(bbox); + } + all_de_bboxes.push_back(decoded_bboxes); + } +} + template class DetectionOutputKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { const framework::Tensor* in_loc = context.Input("Loc"); const framework::Tensor* in_conf = context.Input("Conf"); - const framework::Tensor* in_priorbox = + const framework::Tensor* in_pb = context.Input("PriorBox"); auto* out = context.Output("Out"); - int num_classes = context.template Attr("num_classes"); + int classes = context.template Attr("num_classes"); int top_k = context.template Attr("top_k"); int nms_top_k = context.template Attr("nms_top_k"); - int background_label_id = context.template Attr("background_label_id"); + int label_id = context.template Attr("background_label_id"); float nms_threshold = context.template Attr("nms_threshold"); - float confidence_threshold = - context.template Attr("confidence_threshold"); + float conf_th = context.template Attr("confidence_threshold"); size_t batch_size = in_conf->dims()[1]; int conf_sum_size = in_conf->numel(); - // for softmax - std::vector conf_shape_softmax_vec( - {conf_sum_size / num_classes, num_classes}); - framework::DDim conf_shape_softmax( - framework::make_ddim(conf_shape_softmax_vec)); - // for knchw => nhwc - std::vector loc_shape_vec({1, in_loc->dims()[1], in_loc->dims()[3], - in_loc->dims()[4], - in_loc->dims()[2] * in_loc->dims()[0]}); - std::vector conf_shape_vec( - {1, in_conf->dims()[1], in_conf->dims()[3], in_conf->dims()[4], - in_conf->dims()[2] * in_conf->dims()[0]}); - framework::DDim loc_shape(framework::make_ddim(loc_shape_vec)); - framework::DDim conf_shape(framework::make_ddim(conf_shape_vec)); - framework::Tensor loc_tensor; - framework::Tensor conf_tensor; - loc_tensor.mutable_data(loc_shape, context.GetPlace()); - conf_tensor.mutable_data(conf_shape, context.GetPlace()); - // for cpu + std::vector softmax_vec({conf_sum_size / classes, classes}); + framework::DDim conf_shape_softmax(framework::make_ddim(softmax_vec)); + std::vector l_vec({1, in_loc->dims()[1], in_loc->dims()[3], + in_loc->dims()[4], + in_loc->dims()[2] * in_loc->dims()[0]}); + std::vector c_vec({1, in_conf->dims()[1], in_conf->dims()[3], + in_conf->dims()[4], + in_conf->dims()[2] * in_conf->dims()[0]}); + framework::DDim loc_shape(framework::make_ddim(l_vec)); + framework::DDim conf_shape(framework::make_ddim(c_vec)); + framework::Tensor loc; + framework::Tensor conf; + loc.mutable_data(loc_shape, context.GetPlace()); + conf.mutable_data(conf_shape, context.GetPlace()); framework::Tensor loc_cpu; framework::Tensor conf_cpu; framework::Tensor priorbox_cpu; - const T* priorbox_data = in_priorbox->data(); - transpose_fun(context, *in_loc, &loc_tensor); - transpose_fun(context, *in_conf, &conf_tensor); - conf_tensor.Resize(conf_shape_softmax); + const T* p_data = in_pb->data(); + transpose_fun(context, *in_loc, &loc); + transpose_fun(context, *in_conf, &conf); + conf.Resize(conf_shape_softmax); math::SoftmaxFunctor()( - context.template device_context(), &conf_tensor, - &conf_tensor); - T* loc_data = loc_tensor.data(); - T* conf_data = conf_tensor.data(); + context.template device_context(), &conf, &conf); + T* loc_data = loc.data(); + T* conf_data = conf.data(); if (platform::is_gpu_place(context.GetPlace())) { - loc_cpu.mutable_data(loc_tensor.dims(), platform::CPUPlace()); - framework::Copy(loc_tensor, platform::CPUPlace(), - context.device_context(), &loc_cpu); + loc_cpu.mutable_data(loc.dims(), platform::CPUPlace()); + framework::Copy(loc, platform::CPUPlace(), context.device_context(), + &loc_cpu); loc_data = loc_cpu.data(); - conf_cpu.mutable_data(conf_tensor.dims(), platform::CPUPlace()); - framework::Copy(conf_tensor, platform::CPUPlace(), - context.device_context(), &conf_cpu); + conf_cpu.mutable_data(conf.dims(), platform::CPUPlace()); + framework::Copy(conf, platform::CPUPlace(), context.device_context(), + &conf_cpu); conf_data = conf_cpu.data(); - priorbox_cpu.mutable_data(in_priorbox->dims(), platform::CPUPlace()); - framework::Copy(*in_priorbox, platform::CPUPlace(), - context.device_context(), &priorbox_cpu); - priorbox_data = priorbox_cpu.data(); - } - // get decode bboxes - size_t num_priors = in_priorbox->numel() / 8; - std::vector>> all_decoded_bboxes; - for (size_t n = 0; n < batch_size; ++n) { - std::vector> decoded_bboxes; - for (size_t i = 0; i < num_priors; ++i) { - size_t prior_offset = i * 8; - size_t loc_pred_offset = n * num_priors * 4 + i * 4; - std::vector> prior_bbox_vec; - math::GetBBoxFromPriorData(priorbox_data + prior_offset, 1, - prior_bbox_vec); - std::vector> prior_bbox_var; - math::GetBBoxVarFromPriorData(priorbox_data + prior_offset, 1, - prior_bbox_var); - std::vector loc_pred_data; - for (size_t j = 0; j < 4; ++j) - loc_pred_data.push_back(*(loc_data + loc_pred_offset + j)); - math::BBox bbox = math::DecodeBBoxWithVar( - prior_bbox_vec[0], prior_bbox_var[0], loc_pred_data); - decoded_bboxes.push_back(bbox); - } - all_decoded_bboxes.push_back(decoded_bboxes); + priorbox_cpu.mutable_data(in_pb->dims(), platform::CPUPlace()); + framework::Copy(*in_pb, platform::CPUPlace(), context.device_context(), + &priorbox_cpu); + p_data = priorbox_cpu.data(); } + size_t num_p = in_pb->numel() / 8; + std::vector>> all_de_bboxes; + decode_bboxer(all_de_bboxes, num_p, p_data, loc_data, batch_size); std::vector>> all_indices; int num_kept = math::GetDetectionIndices( - conf_data, num_priors, num_classes, background_label_id, batch_size, - confidence_threshold, nms_top_k, nms_threshold, top_k, - all_decoded_bboxes, &all_indices); - + conf_data, num_p, classes, label_id, batch_size, conf_th, nms_top_k, + nms_threshold, top_k, all_de_bboxes, &all_indices); if (num_kept <= 0) { std::vector out_shape_vec({0, 0}); framework::DDim out_shape(framework::make_ddim(out_shape_vec)); @@ -154,9 +151,8 @@ class DetectionOutputKernel : public framework::OpKernel { out_cpu.mutable_data(out->dims(), platform::CPUPlace()); out_data = out_cpu.data(); } - math::GetDetectionOutput(conf_data, num_kept, num_priors, num_classes, - batch_size, all_indices, all_decoded_bboxes, - out_data); + math::GetDetectionOutput(conf_data, num_kept, num_p, classes, batch_size, + all_indices, all_de_bboxes, out_data); if (platform::is_gpu_place(context.GetPlace())) { framework::Copy(out_cpu, platform::CUDAPlace(), context.device_context(), out); diff --git a/python/paddle/v2/fluid/tests/test_detection_output_op.py b/python/paddle/v2/fluid/tests/test_detection_output_op.py index 080a9743b0182..1ea6bdde6ce58 100644 --- a/python/paddle/v2/fluid/tests/test_detection_output_op.py +++ b/python/paddle/v2/fluid/tests/test_detection_output_op.py @@ -3,7 +3,7 @@ from op_test import OpTest -class TestUnpoolOp(OpTest): +class TestDetectionOutputOp(OpTest): def setUp(self): self.op_type = "detection_output" self.init_test_case()