Skip to content

Commit

Permalink
pool2d_coonvert_ut
Browse files Browse the repository at this point in the history
  • Loading branch information
fengshuai committed Feb 14, 2022
1 parent 9c2cee1 commit d61e9e8
Show file tree
Hide file tree
Showing 5 changed files with 203 additions and 143 deletions.
146 changes: 92 additions & 54 deletions paddle/fluid/inference/tensorrt/convert/pool2d_op.cc
Expand Up @@ -106,6 +106,9 @@ class Pool2dOpConverter : public OpConverter {
reduce_operation = nvinfer1::ReduceOperation::kAVG;
plugin_pool_type = plugin::PoolPlugin::PoolType::avg;
}
if (global_pooling || adaptive) {
std::fill(paddings.begin(), paddings.end(), 0);
}

if (padding_algorithm == "VALID") {
std::fill(paddings.begin(), paddings.end(), 0);
Expand Down Expand Up @@ -136,6 +139,46 @@ class Pool2dOpConverter : public OpConverter {
#endif
}

std::vector<int> real_paddings = paddings;
for (int i = 0; i < 2; ++i) {
int copy_pad = *(paddings.begin() + i);
real_paddings.insert(real_paddings.begin() + 2 * i + 1, copy_pad);
}
// SAME
if (padding_algorithm == "SAME") {
// expand
for (int i = 0; i < 2; ++i) {
int copy_pad = *(paddings.begin() + 2 * i);
paddings.insert(paddings.begin() + 2 * i + 1, copy_pad);
}
// compute
for (int i = 0; i < 2; ++i) {
int out_size = (input_shape.d[2 + i] + strides[i] - 1) / strides[i];
int pad_sum = std::max(
(out_size - 1) * strides[i] + ksize[i] - input_shape.d[2 + i], 0);
int pad_0 = pad_sum / 2;
int pad_1 = pad_sum - pad_0;
paddings[i * 2] = pad_0;
paddings[i * 2 + 1] = pad_1;
}
real_paddings = paddings;
// slice
for (int i = 0; i < 2; ++i) {
paddings.erase(paddings.begin() + i + 1);
}
}
// VALID
if (padding_algorithm == "VALID") {
std::fill(real_paddings.begin(), real_paddings.end(), 0);
}

if (global_pooling == true && !engine_->with_dynamic_shape()) {
nv_ksize.d[0] = input_shape.d[input_dims - 2];
nv_ksize.d[1] = input_shape.d[input_dims - 1];
ksize[0] = input_shape.d[input_dims - 2];
ksize[1] = input_shape.d[input_dims - 1];
}

if (engine_->with_dynamic_shape()) {
if (!adaptive && !global_pooling && !ceil_mode) {
// input_shape.d < 0 means we can't get shape info here.
Expand Down Expand Up @@ -173,15 +216,15 @@ class Pool2dOpConverter : public OpConverter {
pool_layer->setPaddingMode(nvinfer1::PaddingMode::kEXPLICIT_ROUND_UP);
}
layer = pool_layer;
} else if (global_pooling) {
} else if (global_pooling && !adaptive) {
auto *reduce_layer = TRT_ENGINE_ADD_LAYER(engine_, Reduce, *input1,
reduce_operation, 12, true);
layer = reduce_layer;
} else {
#if IS_TRT_VERSION_GE(6000)
plugin::PoolPluginDynamic *plugin =
new plugin::PoolPluginDynamic(ceil_mode, pool_type, adaptive, ksize,
strides, paddings, global_pooling);
plugin::PoolPluginDynamic *plugin = new plugin::PoolPluginDynamic(
ceil_mode, pool_type, adaptive, exclusive, ksize, strides, paddings,
global_pooling);
layer = engine_->AddDynamicPlugin(&input1, 1, plugin);
#endif
}
Expand All @@ -195,21 +238,13 @@ class Pool2dOpConverter : public OpConverter {
return;
}

if (global_pooling == true) {
nv_ksize.d[0] = input_shape.d[input_dims - 2];
nv_ksize.d[1] = input_shape.d[input_dims - 1];
if (global_pooling == true && adaptive == false) {
auto *pool_layer = TRT_ENGINE_ADD_LAYER(engine_, Pooling, *input1,
nv_pool_type, nv_ksize);
PADDLE_ENFORCE_NOT_NULL(
pool_layer, platform::errors::Fatal(
"trt pool layer in converter could not be created."));
auto output_name = op_desc.Output("Out")[0];
pool_layer->setStride(nv_strides);
pool_layer->setPadding(nv_paddings);
if (padding_algorithm == "SAME") {
pool_layer->setPaddingMode(nvinfer1::PaddingMode::kSAME_UPPER);
}
pool_layer->setAverageCountExcludesPadding(exclusive);
pool_layer->setName(("pool2d (Output: " + output_name + ")").c_str());
pool_layer->getOutput(0)->setName(output_name.c_str());
engine_->SetITensor(output_name, pool_layer->getOutput(0));
Expand All @@ -222,58 +257,61 @@ class Pool2dOpConverter : public OpConverter {

if (!adaptive) {
if (ceil_mode) {
nvinfer1::DimsHW pre_pad(0, 0);
nvinfer1::DimsHW post_pad(0, 0);
// If ceil mode is true, we will pad the appropriate size to the input.
DealCeilMode(input_shape, ksize, strides, paddings, &pre_pad, &post_pad,
input_dims);
auto *pad_layer =
TRT_ENGINE_ADD_LAYER(engine_, Padding, *input1, pre_pad, post_pad);

std::vector<int> input_shape_v;
for (int i = 0; i < input_dims; i++) {
input_shape_v.push_back(input_shape.d[i]);
}
plugin::PoolPlugin *plugin = new plugin::PoolPlugin(
ceil_mode, plugin_pool_type, adaptive, exclusive, ksize, strides,
paddings, input_shape_v, real_paddings);
auto *pool_layer = engine_->AddPlugin(&input1, 1, plugin);
PADDLE_ENFORCE_NOT_NULL(
pad_layer, platform::errors::Fatal(
"Pad layer in poolOp converter could not be "
"created. The pointer to pad layer is `NULL`."));
input1 = pad_layer->getOutput(0);
}
pool_layer,
platform::errors::Fatal(
"trt pool plugin layer in converter could not be created."));
layer = pool_layer;
} else {
#if IS_TRT_VERSION_GE(8000)
// Exclude padding pixels from the average mean is not supported well by
// TRT
// so enable padding for trt8.0 above.
if ((g_post_pad.w() > 0 || g_post_pad.h() > 0) &&
(padding_algorithm != "SAME") && !ceil_mode) {
auto *pad_layer = TRT_ENGINE_ADD_LAYER(engine_, Padding, *input1,
g_pre_pad, g_post_pad);
PADDLE_ENFORCE_NOT_NULL(
pad_layer, platform::errors::Fatal(
"Pad layer in poolOp converter could not be "
"created. The pointer to pad layer is `NULL`."));
input1 = pad_layer->getOutput(0);
}
// Exclude padding pixels from the average mean is not supported well by
// TRT
// so enable padding for trt8.0 above.
if ((g_post_pad.w() > 0 || g_post_pad.h() > 0) &&
(padding_algorithm != "SAME") && !ceil_mode) {
auto *pad_layer = TRT_ENGINE_ADD_LAYER(engine_, Padding, *input1,
g_pre_pad, g_post_pad);
PADDLE_ENFORCE_NOT_NULL(
pad_layer, platform::errors::Fatal(
"Pad layer in poolOp converter could not be "
"created. The pointer to pad layer is `NULL`."));
input1 = pad_layer->getOutput(0);
}
#endif
auto *pool_layer = TRT_ENGINE_ADD_LAYER(engine_, Pooling, *input1,
nv_pool_type, nv_ksize);
PADDLE_ENFORCE_NOT_NULL(
pool_layer, platform::errors::Fatal(
"trt pool layer in converter could not be created."));
pool_layer->setStride(nv_strides);
pool_layer->setPadding(nv_paddings);
if (padding_algorithm == "SAME") {
pool_layer->setPaddingMode(nvinfer1::PaddingMode::kSAME_UPPER);
auto *pool_layer = TRT_ENGINE_ADD_LAYER(engine_, Pooling, *input1,
nv_pool_type, nv_ksize);
PADDLE_ENFORCE_NOT_NULL(
pool_layer,
platform::errors::Fatal(
"trt pool layer in converter could not be created."));
pool_layer->setStride(nv_strides);
pool_layer->setPadding(nv_paddings);
if (padding_algorithm == "SAME") {
pool_layer->setPaddingMode(nvinfer1::PaddingMode::kSAME_UPPER);
}
pool_layer->setAverageCountExcludesPadding(exclusive);
layer = pool_layer;
}
pool_layer->setAverageCountExcludesPadding(exclusive);
layer = pool_layer;

} else {
// Average pooling needs to exclude the padding pixels from the average
// mean.
// It is not supported well by TRT, we use a plugin here.
// It is not supported well by TRT, we use a plugin here
std::vector<int> input_shape_v;
for (int i = 0; i < input_dims; i++) {
input_shape_v.push_back(input_shape.d[i]);
}
plugin::PoolPlugin *plugin =
new plugin::PoolPlugin(ceil_mode, plugin_pool_type, adaptive, ksize,
strides, paddings, input_shape_v);
plugin::PoolPlugin *plugin = new plugin::PoolPlugin(
ceil_mode, plugin_pool_type, adaptive, exclusive, ksize, strides,
paddings, input_shape_v, real_paddings);
auto *pool_layer = engine_->AddPlugin(&input1, 1, plugin);
PADDLE_ENFORCE_NOT_NULL(
pool_layer,
Expand Down
61 changes: 53 additions & 8 deletions paddle/fluid/inference/tensorrt/plugin/pool_op_plugin.cu
Expand Up @@ -35,6 +35,36 @@ nvinfer1::Dims PoolPlugin::getOutputDimensions(int index,
return output_dims;
}

size_t PoolPlugin::getSerializationSize() const TRT_NOEXCEPT {
return getBaseSerializationSize() + SerializedSize(ceil_mode_) +
SerializedSize(pool_type_) + SerializedSize(adaptive_) +
SerializedSize(exclusive_) + SerializedSize(ksize_) +
SerializedSize(strides_) + SerializedSize(paddings_) +
SerializedSize(real_paddings_) + SerializedSize(input_shape_) +
SerializedSize(output_shape_);
}

// TRT will call this func when we need to serialize the configuration of
// tensorrt.
void PoolPlugin::serialize(void *buffer) const TRT_NOEXCEPT {
serializeBase(buffer);
SerializeValue(&buffer, ceil_mode_);
SerializeValue(&buffer, pool_type_);
SerializeValue(&buffer, adaptive_);
SerializeValue(&buffer, exclusive_);
SerializeValue(&buffer, ksize_);
SerializeValue(&buffer, strides_);
SerializeValue(&buffer, paddings_);
SerializeValue(&buffer, real_paddings_);
SerializeValue(&buffer, input_shape_);
SerializeValue(&buffer, output_shape_);
}

PoolPlugin *PoolPlugin::clone() const TRT_NOEXCEPT {
return new PoolPlugin(ceil_mode_, pool_type_, adaptive_, exclusive_, ksize_,
strides_, paddings_, input_shape_, real_paddings_);
}

int PoolPlugin::enqueue(int batchSize, const void *const *inputs,
#if IS_TRT_VERSION_LT(8000)
void **outputs, void *workspace,
Expand All @@ -59,14 +89,15 @@ int PoolPlugin::enqueue(int batchSize, const void *const *inputs,
paddle::operators::math::MaxPool<float>, float>
pool2d_forward;
pool2d_forward(idata, input_shape, output_shape, ksize_, strides_,
paddings_, true, adaptive_, odatas[0], stream, pool_process);
paddings_, true, false, odatas[0], stream, pool_process);
} else if (pool_type_ == PoolType::avg) {
paddle::operators::math::AvgPool<float> pool_process;
paddle::operators::math::Pool2dDirectCUDAFunctor<
paddle::operators::math::AvgPool<float>, float>
pool2d_forward;
pool2d_forward(idata, input_shape, output_shape, ksize_, strides_,
paddings_, true, adaptive_, odatas[0], stream, pool_process);
paddings_, exclusive_, adaptive_, odatas[0], stream,
pool_process);
}

return cudaGetLastError() != cudaSuccess;
Expand All @@ -82,6 +113,7 @@ PoolPluginDynamic::PoolPluginDynamic(void const *serialData,
DeserializeValue(&serialData, &serialLength, &pool_type);
pool_type_ = std::string(pool_type);
DeserializeValue(&serialData, &serialLength, &adaptive_);
DeserializeValue(&serialData, &serialLength, &exclusive_);
DeserializeValue(&serialData, &serialLength, &ksize_);
DeserializeValue(&serialData, &serialLength, &strides_);
DeserializeValue(&serialData, &serialLength, &paddings_);
Expand All @@ -90,21 +122,27 @@ PoolPluginDynamic::PoolPluginDynamic(void const *serialData,

size_t PoolPluginDynamic::getSerializationSize() const TRT_NOEXCEPT {
return SerializedSize(ceil_mode_) + SerializedSize(pool_type_.c_str()) +
SerializedSize(adaptive_) + SerializedSize(ksize_) +
SerializedSize(strides_) + SerializedSize(paddings_) +
SerializedSize(is_global_);
SerializedSize(adaptive_) + SerializedSize(exclusive_) +
SerializedSize(ksize_) + SerializedSize(strides_) +
SerializedSize(paddings_) + SerializedSize(is_global_);
}

void PoolPluginDynamic::serialize(void *buffer) const TRT_NOEXCEPT {
SerializeValue(&buffer, ceil_mode_);
SerializeValue(&buffer, pool_type_.c_str());
SerializeValue(&buffer, adaptive_);
SerializeValue(&buffer, exclusive_);
SerializeValue(&buffer, ksize_);
SerializeValue(&buffer, strides_);
SerializeValue(&buffer, paddings_);
SerializeValue(&buffer, is_global_);
}

nvinfer1::IPluginV2DynamicExt *PoolPluginDynamic::clone() const TRT_NOEXCEPT {
return new PoolPluginDynamic(ceil_mode_, pool_type_, adaptive_, exclusive_,
ksize_, strides_, paddings_, is_global_);
}

nvinfer1::DimsExprs PoolPluginDynamic::getOutputDimensions(
int output_index, const nvinfer1::DimsExprs *inputs, int nb_inputs,
nvinfer1::IExprBuilder &expr_builder) TRT_NOEXCEPT {
Expand All @@ -117,11 +155,14 @@ nvinfer1::DimsExprs PoolPluginDynamic::getOutputDimensions(
platform::errors::InvalidArgument("The channel dimension should be "
"static, but we found it's dynamic."));
nvinfer1::DimsExprs output(inputs[0]);
if (is_global_) {
if (is_global_ && !adaptive_) {
output.d[2] = expr_builder.constant(1);
output.d[3] = expr_builder.constant(1);
return output;
}
if (is_global_ && adaptive_) {
return inputs[0];
}
if (adaptive_) {
output.d[2] = expr_builder.constant(ksize_[0]);
output.d[3] = expr_builder.constant(ksize_[1]);
Expand Down Expand Up @@ -245,21 +286,25 @@ int PoolPluginDynamic::enqueue(const nvinfer1::PluginTensorDesc *input_desc,
output_shape[2] = data_dim[0];
output_shape[3] = data_dim[1];
}
if (adaptive_) {
output_shape[2] = h;
output_shape[3] = w;
}

if (pool_type_ == "max") {
paddle::operators::math::MaxPool<float> pool_process;
paddle::operators::math::Pool2dDirectCUDAFunctor<
paddle::operators::math::MaxPool<float>, float>
pool2d_forward;
pool2d_forward(input, input_shape, output_shape, ksize, strides_, paddings,
true, adaptive_, output, stream, pool_process);
true, false, output, stream, pool_process);
} else if (pool_type_ == "avg") {
paddle::operators::math::AvgPool<float> pool_process;
paddle::operators::math::Pool2dDirectCUDAFunctor<
paddle::operators::math::AvgPool<float>, float>
pool2d_forward;
pool2d_forward(input, input_shape, output_shape, ksize, strides_, paddings,
true, adaptive_, output, stream, pool_process);
exclusive_, adaptive_, output, stream, pool_process);
}

return cudaGetLastError() != cudaSuccess;
Expand Down

1 comment on commit d61e9e8

@paddle-bot-old
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Congratulation! Your pull request passed all required CI. You could ask reviewer(s) to approve and merge. 🎉

Please sign in to comment.