Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix detection_output_op.h had a funline > 100 #7399

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
140 changes: 68 additions & 72 deletions paddle/operators/detection_output_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ limitations under the License. */
#include "paddle/operators/math/math_function.h"
#include "paddle/operators/math/softmax.h"
#include "paddle/operators/strided_memcpy.h"

namespace paddle {
namespace operators {
template <typename DeviceContext, typename T>
Expand Down Expand Up @@ -47,98 +48,94 @@ inline void transpose_fun(const framework::ExecutionContext& context,
offset += in_p_tensor_transpose.dims()[4] * src_stride[4];
}
}

template <typename T>
inline void decode_bboxer(
std::vector<std::vector<operators::math::BBox<T>>>& all_de_bboxes,
size_t num_p, const T* p_data, T* loc_data, size_t batch_size) {
for (size_t n = 0; n < batch_size; ++n) {
std::vector<operators::math::BBox<T>> decoded_bboxes;
for (size_t i = 0; i < num_p; ++i) {
size_t p_offset = i * 8;
size_t loc_pred_offset = n * num_p * 4 + i * 4;
std::vector<math::BBox<T>> prior_bbox_vec;
math::GetBBoxFromPriorData<T>(p_data + p_offset, 1, prior_bbox_vec);
std::vector<std::vector<T>> prior_bbox_var;
math::GetBBoxVarFromPriorData<T>(p_data + p_offset, 1, prior_bbox_var);
std::vector<T> loc_pred_data;
for (size_t j = 0; j < 4; ++j)
loc_pred_data.push_back(*(loc_data + loc_pred_offset + j));
math::BBox<T> bbox = math::DecodeBBoxWithVar<T>(
prior_bbox_vec[0], prior_bbox_var[0], loc_pred_data);
decoded_bboxes.push_back(bbox);
}
all_de_bboxes.push_back(decoded_bboxes);
}
}

template <typename DeviceContext, typename T>
class DetectionOutputKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
const framework::Tensor* in_loc = context.Input<framework::Tensor>("Loc");
const framework::Tensor* in_conf = context.Input<framework::Tensor>("Conf");
const framework::Tensor* in_priorbox =
const framework::Tensor* in_pb =
context.Input<framework::Tensor>("PriorBox");
auto* out = context.Output<framework::Tensor>("Out");
int num_classes = context.template Attr<int>("num_classes");
int classes = context.template Attr<int>("num_classes");
int top_k = context.template Attr<int>("top_k");
int nms_top_k = context.template Attr<int>("nms_top_k");
int background_label_id = context.template Attr<int>("background_label_id");
int label_id = context.template Attr<int>("background_label_id");
float nms_threshold = context.template Attr<float>("nms_threshold");
float confidence_threshold =
context.template Attr<float>("confidence_threshold");
float conf_th = context.template Attr<float>("confidence_threshold");
size_t batch_size = in_conf->dims()[1];
int conf_sum_size = in_conf->numel();
// for softmax
std::vector<int64_t> conf_shape_softmax_vec(
{conf_sum_size / num_classes, num_classes});
framework::DDim conf_shape_softmax(
framework::make_ddim(conf_shape_softmax_vec));
// for knchw => nhwc
std::vector<int64_t> loc_shape_vec({1, in_loc->dims()[1], in_loc->dims()[3],
in_loc->dims()[4],
in_loc->dims()[2] * in_loc->dims()[0]});
std::vector<int64_t> conf_shape_vec(
{1, in_conf->dims()[1], in_conf->dims()[3], in_conf->dims()[4],
in_conf->dims()[2] * in_conf->dims()[0]});
framework::DDim loc_shape(framework::make_ddim(loc_shape_vec));
framework::DDim conf_shape(framework::make_ddim(conf_shape_vec));
framework::Tensor loc_tensor;
framework::Tensor conf_tensor;
loc_tensor.mutable_data<T>(loc_shape, context.GetPlace());
conf_tensor.mutable_data<T>(conf_shape, context.GetPlace());
// for cpu
std::vector<int64_t> softmax_vec({conf_sum_size / classes, classes});
framework::DDim conf_shape_softmax(framework::make_ddim(softmax_vec));
std::vector<int64_t> l_vec({1, in_loc->dims()[1], in_loc->dims()[3],
in_loc->dims()[4],
in_loc->dims()[2] * in_loc->dims()[0]});
std::vector<int64_t> c_vec({1, in_conf->dims()[1], in_conf->dims()[3],
in_conf->dims()[4],
in_conf->dims()[2] * in_conf->dims()[0]});
framework::DDim loc_shape(framework::make_ddim(l_vec));
framework::DDim conf_shape(framework::make_ddim(c_vec));
framework::Tensor loc;
framework::Tensor conf;
loc.mutable_data<T>(loc_shape, context.GetPlace());
conf.mutable_data<T>(conf_shape, context.GetPlace());
framework::Tensor loc_cpu;
framework::Tensor conf_cpu;
framework::Tensor priorbox_cpu;
const T* priorbox_data = in_priorbox->data<T>();
transpose_fun<DeviceContext, T>(context, *in_loc, &loc_tensor);
transpose_fun<DeviceContext, T>(context, *in_conf, &conf_tensor);
conf_tensor.Resize(conf_shape_softmax);
const T* p_data = in_pb->data<T>();
transpose_fun<DeviceContext, T>(context, *in_loc, &loc);
transpose_fun<DeviceContext, T>(context, *in_conf, &conf);
conf.Resize(conf_shape_softmax);
math::SoftmaxFunctor<DeviceContext, T>()(
context.template device_context<DeviceContext>(), &conf_tensor,
&conf_tensor);
T* loc_data = loc_tensor.data<T>();
T* conf_data = conf_tensor.data<T>();
context.template device_context<DeviceContext>(), &conf, &conf);
T* loc_data = loc.data<T>();
T* conf_data = conf.data<T>();
if (platform::is_gpu_place(context.GetPlace())) {
loc_cpu.mutable_data<T>(loc_tensor.dims(), platform::CPUPlace());
framework::Copy(loc_tensor, platform::CPUPlace(),
context.device_context(), &loc_cpu);
loc_cpu.mutable_data<T>(loc.dims(), platform::CPUPlace());
framework::Copy(loc, platform::CPUPlace(), context.device_context(),
&loc_cpu);
loc_data = loc_cpu.data<T>();
conf_cpu.mutable_data<T>(conf_tensor.dims(), platform::CPUPlace());
framework::Copy(conf_tensor, platform::CPUPlace(),
context.device_context(), &conf_cpu);
conf_cpu.mutable_data<T>(conf.dims(), platform::CPUPlace());
framework::Copy(conf, platform::CPUPlace(), context.device_context(),
&conf_cpu);
conf_data = conf_cpu.data<T>();
priorbox_cpu.mutable_data<T>(in_priorbox->dims(), platform::CPUPlace());
framework::Copy(*in_priorbox, platform::CPUPlace(),
context.device_context(), &priorbox_cpu);
priorbox_data = priorbox_cpu.data<T>();
}
// get decode bboxes
size_t num_priors = in_priorbox->numel() / 8;
std::vector<std::vector<operators::math::BBox<T>>> all_decoded_bboxes;
for (size_t n = 0; n < batch_size; ++n) {
std::vector<operators::math::BBox<T>> decoded_bboxes;
for (size_t i = 0; i < num_priors; ++i) {
size_t prior_offset = i * 8;
size_t loc_pred_offset = n * num_priors * 4 + i * 4;
std::vector<math::BBox<T>> prior_bbox_vec;
math::GetBBoxFromPriorData<T>(priorbox_data + prior_offset, 1,
prior_bbox_vec);
std::vector<std::vector<T>> prior_bbox_var;
math::GetBBoxVarFromPriorData<T>(priorbox_data + prior_offset, 1,
prior_bbox_var);
std::vector<T> loc_pred_data;
for (size_t j = 0; j < 4; ++j)
loc_pred_data.push_back(*(loc_data + loc_pred_offset + j));
math::BBox<T> bbox = math::DecodeBBoxWithVar<T>(
prior_bbox_vec[0], prior_bbox_var[0], loc_pred_data);
decoded_bboxes.push_back(bbox);
}
all_decoded_bboxes.push_back(decoded_bboxes);
priorbox_cpu.mutable_data<T>(in_pb->dims(), platform::CPUPlace());
framework::Copy(*in_pb, platform::CPUPlace(), context.device_context(),
&priorbox_cpu);
p_data = priorbox_cpu.data<T>();
}
size_t num_p = in_pb->numel() / 8;
std::vector<std::vector<operators::math::BBox<T>>> all_de_bboxes;
decode_bboxer<T>(all_de_bboxes, num_p, p_data, loc_data, batch_size);
std::vector<std::map<size_t, std::vector<size_t>>> all_indices;
int num_kept = math::GetDetectionIndices<T>(
conf_data, num_priors, num_classes, background_label_id, batch_size,
confidence_threshold, nms_top_k, nms_threshold, top_k,
all_decoded_bboxes, &all_indices);

conf_data, num_p, classes, label_id, batch_size, conf_th, nms_top_k,
nms_threshold, top_k, all_de_bboxes, &all_indices);
if (num_kept <= 0) {
std::vector<int64_t> out_shape_vec({0, 0});
framework::DDim out_shape(framework::make_ddim(out_shape_vec));
Expand All @@ -154,9 +151,8 @@ class DetectionOutputKernel : public framework::OpKernel<T> {
out_cpu.mutable_data<T>(out->dims(), platform::CPUPlace());
out_data = out_cpu.data<T>();
}
math::GetDetectionOutput<T>(conf_data, num_kept, num_priors, num_classes,
batch_size, all_indices, all_decoded_bboxes,
out_data);
math::GetDetectionOutput<T>(conf_data, num_kept, num_p, classes, batch_size,
all_indices, all_de_bboxes, out_data);
if (platform::is_gpu_place(context.GetPlace())) {
framework::Copy(out_cpu, platform::CUDAPlace(), context.device_context(),
out);
Expand Down
2 changes: 1 addition & 1 deletion python/paddle/v2/fluid/tests/test_detection_output_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from op_test import OpTest


class TestUnpoolOp(OpTest):
class TestDetectionOutputOp(OpTest):
def setUp(self):
self.op_type = "detection_output"
self.init_test_case()
Expand Down