Skip to content

Commit

Permalink
Merge pull request #5602 from chengduoZH/add_cudnn_pool3d
Browse files Browse the repository at this point in the history
Add cudnn pool3d
  • Loading branch information
chengduo committed Nov 20, 2017
2 parents f350be3 + 3bd3cc0 commit 134eaf2
Show file tree
Hide file tree
Showing 14 changed files with 235 additions and 180 deletions.
7 changes: 4 additions & 3 deletions paddle/operators/conv_cudnn_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ REGISTER_OP(conv_cudnn, ops::ConvOp, ops::CudnnConvOpMaker, conv_cudnn_grad,
ops::ConvOpGrad);

REGISTER_OP_CPU_KERNEL(conv_cudnn,
ops::GemmConvKernel<paddle::platform::CPUPlace, float>);
ops::GemmConvKernel<paddle::platform::CPUPlace, float>,
ops::GemmConvKernel<paddle::platform::CPUPlace, double>);
REGISTER_OP_CPU_KERNEL(
conv_cudnn_grad,
ops::GemmConvGradKernel<paddle::platform::CPUPlace, float>);
conv_cudnn_grad, ops::GemmConvGradKernel<paddle::platform::CPUPlace, float>,
ops::GemmConvGradKernel<paddle::platform::CPUPlace, double>);
6 changes: 4 additions & 2 deletions paddle/operators/conv_cudnn_op.cu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,8 @@ class CudnnConvGradOpKernel : public framework::OpKernel<T> {
} // namespace operators
} // namespace paddle

REGISTER_OP_GPU_KERNEL(conv_cudnn, paddle::operators::CudnnConvOpKernel<float>);
REGISTER_OP_GPU_KERNEL(conv_cudnn, paddle::operators::CudnnConvOpKernel<float>,
paddle::operators::CudnnConvOpKernel<double>);
REGISTER_OP_GPU_KERNEL(conv_cudnn_grad,
paddle::operators::CudnnConvGradOpKernel<float>);
paddle::operators::CudnnConvGradOpKernel<float>,
paddle::operators::CudnnConvGradOpKernel<double>);
12 changes: 8 additions & 4 deletions paddle/operators/conv_transpose_cudnn_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -61,18 +61,22 @@ REGISTER_OP(conv2d_transpose_cudnn, ops::ConvTransposeOp,

REGISTER_OP_CPU_KERNEL(
conv2d_transpose_cudnn,
ops::GemmConvTransposeKernel<paddle::platform::CPUPlace, float>);
ops::GemmConvTransposeKernel<paddle::platform::CPUPlace, float>,
ops::GemmConvTransposeKernel<paddle::platform::CPUPlace, double>);
REGISTER_OP_CPU_KERNEL(
conv2d_transpose_cudnn_grad,
ops::GemmConvTransposeGradKernel<paddle::platform::CPUPlace, float>);
ops::GemmConvTransposeGradKernel<paddle::platform::CPUPlace, float>,
ops::GemmConvTransposeGradKernel<paddle::platform::CPUPlace, double>);

REGISTER_OP(conv3d_transpose_cudnn, ops::ConvTransposeOp,
ops::CudnnConv3DTransposeOpMaker, conv3d_transpose_cudnn_grad,
ops::ConvTransposeOpGrad);

REGISTER_OP_CPU_KERNEL(
conv3d_transpose_cudnn,
ops::GemmConvTransposeKernel<paddle::platform::CPUPlace, float>);
ops::GemmConvTransposeKernel<paddle::platform::CPUPlace, float>,
ops::GemmConvTransposeKernel<paddle::platform::CPUPlace, double>);
REGISTER_OP_CPU_KERNEL(
conv3d_transpose_cudnn_grad,
ops::GemmConvTransposeGradKernel<paddle::platform::CPUPlace, float>);
ops::GemmConvTransposeGradKernel<paddle::platform::CPUPlace, float>,
ops::GemmConvTransposeGradKernel<paddle::platform::CPUPlace, double>);
12 changes: 8 additions & 4 deletions paddle/operators/conv_transpose_cudnn_op.cu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -235,11 +235,15 @@ class CudnnConvTransposeGradOpKernel : public framework::OpKernel<T> {
namespace ops = paddle::operators;

REGISTER_OP_GPU_KERNEL(conv2d_transpose_cudnn,
ops::CudnnConvTransposeOpKernel<float>);
ops::CudnnConvTransposeOpKernel<float>,
ops::CudnnConvTransposeOpKernel<double>);
REGISTER_OP_GPU_KERNEL(conv2d_transpose_cudnn_grad,
ops::CudnnConvTransposeGradOpKernel<float>);
ops::CudnnConvTransposeGradOpKernel<float>,
ops::CudnnConvTransposeGradOpKernel<double>);

REGISTER_OP_GPU_KERNEL(conv3d_transpose_cudnn,
ops::CudnnConvTransposeOpKernel<float>);
ops::CudnnConvTransposeOpKernel<float>,
ops::CudnnConvTransposeOpKernel<double>);
REGISTER_OP_GPU_KERNEL(conv3d_transpose_cudnn_grad,
ops::CudnnConvTransposeGradOpKernel<float>);
ops::CudnnConvTransposeGradOpKernel<float>,
ops::CudnnConvTransposeGradOpKernel<double>);
16 changes: 14 additions & 2 deletions paddle/operators/pool_cudnn_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,18 @@ REGISTER_OP(pool2d_cudnn, ops::PoolOp, ops::Pool2dOpMaker, pool2d_cudnn_grad,
ops::PoolOpGrad);

REGISTER_OP_CPU_KERNEL(pool2d_cudnn,
ops::PoolKernel<paddle::platform::CPUPlace, float>);
ops::PoolKernel<paddle::platform::CPUPlace, float>,
ops::PoolKernel<paddle::platform::CPUPlace, double>);
REGISTER_OP_CPU_KERNEL(pool2d_cudnn_grad,
ops::PoolGradKernel<paddle::platform::CPUPlace, float>)
ops::PoolGradKernel<paddle::platform::CPUPlace, float>,
ops::PoolGradKernel<paddle::platform::CPUPlace, double>)

REGISTER_OP(pool3d_cudnn, ops::PoolOp, ops::Pool3dOpMaker, pool3d_cudnn_grad,
ops::PoolOpGrad);

REGISTER_OP_CPU_KERNEL(pool3d_cudnn,
ops::PoolKernel<paddle::platform::CPUPlace, float>,
ops::PoolKernel<paddle::platform::CPUPlace, double>);
REGISTER_OP_CPU_KERNEL(pool3d_cudnn_grad,
ops::PoolGradKernel<paddle::platform::CPUPlace, float>,
ops::PoolGradKernel<paddle::platform::CPUPlace, double>)
27 changes: 23 additions & 4 deletions paddle/operators/pool_cudnn_op.cu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,13 @@ class PoolCudnnOpKernel : public framework::OpKernel<T> {
ScopedTensorDescriptor input_desc;
ScopedTensorDescriptor output_desc;
ScopedPoolingDescriptor pool_desc;
DataLayout layout = DataLayout::kNCHW;
DataLayout layout;

if (strides.size() == 2U) {
layout = DataLayout::kNCHW;
} else {
layout = DataLayout::kNCDHW;
}

cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor<T>(
layout, framework::vectorize2int(input->dims()));
Expand Down Expand Up @@ -112,7 +118,13 @@ class PoolCudnnGradOpKernel : public framework::OpKernel<T> {
ScopedTensorDescriptor input_desc;
ScopedTensorDescriptor output_desc;
ScopedPoolingDescriptor pool_desc;
DataLayout layout = DataLayout::kNCHW;
DataLayout layout;

if (strides.size() == 2U) {
layout = DataLayout::kNCHW;
} else {
layout = DataLayout::kNCDHW;
}

cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor<T>(
layout, framework::vectorize2int(input->dims()));
Expand Down Expand Up @@ -150,5 +162,12 @@ class PoolCudnnGradOpKernel : public framework::OpKernel<T> {

namespace ops = paddle::operators;

REGISTER_OP_GPU_KERNEL(pool2d_cudnn, ops::PoolCudnnOpKernel<float>);
REGISTER_OP_GPU_KERNEL(pool2d_cudnn_grad, ops::PoolCudnnGradOpKernel<float>);
REGISTER_OP_GPU_KERNEL(pool2d_cudnn, ops::PoolCudnnOpKernel<float>,
ops::PoolCudnnOpKernel<double>);
REGISTER_OP_GPU_KERNEL(pool2d_cudnn_grad, ops::PoolCudnnGradOpKernel<float>,
ops::PoolCudnnGradOpKernel<double>);

REGISTER_OP_GPU_KERNEL(pool3d_cudnn, ops::PoolCudnnOpKernel<float>,
ops::PoolCudnnOpKernel<double>);
REGISTER_OP_GPU_KERNEL(pool3d_cudnn_grad, ops::PoolCudnnGradOpKernel<float>,
ops::PoolCudnnGradOpKernel<double>);
12 changes: 8 additions & 4 deletions paddle/operators/pool_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -217,14 +217,18 @@ REGISTER_OP(pool2d, ops::PoolOp, ops::Pool2dOpMaker, pool2d_grad,
ops::PoolOpGrad);

REGISTER_OP_CPU_KERNEL(pool2d,
ops::PoolKernel<paddle::platform::CPUPlace, float>);
ops::PoolKernel<paddle::platform::CPUPlace, float>,
ops::PoolKernel<paddle::platform::CPUPlace, double>);
REGISTER_OP_CPU_KERNEL(pool2d_grad,
ops::PoolGradKernel<paddle::platform::CPUPlace, float>)
ops::PoolGradKernel<paddle::platform::CPUPlace, float>,
ops::PoolGradKernel<paddle::platform::CPUPlace, double>)

REGISTER_OP(pool3d, ops::PoolOp, ops::Pool3dOpMaker, pool3d_grad,
ops::PoolOpGrad);

REGISTER_OP_CPU_KERNEL(pool3d,
ops::PoolKernel<paddle::platform::CPUPlace, float>);
ops::PoolKernel<paddle::platform::CPUPlace, float>,
ops::PoolKernel<paddle::platform::CPUPlace, double>);
REGISTER_OP_CPU_KERNEL(pool3d_grad,
ops::PoolGradKernel<paddle::platform::CPUPlace, float>);
ops::PoolGradKernel<paddle::platform::CPUPlace, float>,
ops::PoolGradKernel<paddle::platform::CPUPlace, double>);
12 changes: 8 additions & 4 deletions paddle/operators/pool_op.cu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,15 @@ limitations under the License. */
namespace ops = paddle::operators;

REGISTER_OP_GPU_KERNEL(pool2d,
ops::PoolKernel<paddle::platform::GPUPlace, float>);
ops::PoolKernel<paddle::platform::GPUPlace, float>,
ops::PoolKernel<paddle::platform::GPUPlace, double>);
REGISTER_OP_GPU_KERNEL(pool2d_grad,
ops::PoolGradKernel<paddle::platform::GPUPlace, float>);
ops::PoolGradKernel<paddle::platform::GPUPlace, float>,
ops::PoolGradKernel<paddle::platform::GPUPlace, double>);

REGISTER_OP_GPU_KERNEL(pool3d,
ops::PoolKernel<paddle::platform::GPUPlace, float>);
ops::PoolKernel<paddle::platform::GPUPlace, float>,
ops::PoolKernel<paddle::platform::GPUPlace, double>);
REGISTER_OP_GPU_KERNEL(pool3d_grad,
ops::PoolGradKernel<paddle::platform::GPUPlace, float>);
ops::PoolGradKernel<paddle::platform::GPUPlace, float>,
ops::PoolGradKernel<paddle::platform::GPUPlace, double>);
12 changes: 8 additions & 4 deletions paddle/operators/pool_with_index_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -250,18 +250,22 @@ REGISTER_OP(max_pool2d_with_index, ops::MaxPoolWithIndexOp,

REGISTER_OP_CPU_KERNEL(
max_pool2d_with_index,
ops::MaxPoolWithIndexKernel<paddle::platform::CPUPlace, float>);
ops::MaxPoolWithIndexKernel<paddle::platform::CPUPlace, float>,
ops::MaxPoolWithIndexKernel<paddle::platform::CPUPlace, double>);
REGISTER_OP_CPU_KERNEL(
max_pool2d_with_index_grad,
ops::MaxPoolWithIndexGradKernel<paddle::platform::CPUPlace, float>)
ops::MaxPoolWithIndexGradKernel<paddle::platform::CPUPlace, float>,
ops::MaxPoolWithIndexGradKernel<paddle::platform::CPUPlace, double>)

REGISTER_OP(max_pool3d_with_index, ops::MaxPoolWithIndexOp,
ops::MaxPool3dWithIndexOpMaker, max_pool3d_with_index_grad,
ops::MaxPoolWithIndexOpGrad);

REGISTER_OP_CPU_KERNEL(
max_pool3d_with_index,
ops::MaxPoolWithIndexKernel<paddle::platform::CPUPlace, float>);
ops::MaxPoolWithIndexKernel<paddle::platform::CPUPlace, float>,
ops::MaxPoolWithIndexKernel<paddle::platform::CPUPlace, double>);
REGISTER_OP_CPU_KERNEL(
max_pool3d_with_index_grad,
ops::MaxPoolWithIndexGradKernel<paddle::platform::CPUPlace, float>)
ops::MaxPoolWithIndexGradKernel<paddle::platform::CPUPlace, float>,
ops::MaxPoolWithIndexGradKernel<paddle::platform::CPUPlace, double>)
12 changes: 8 additions & 4 deletions paddle/operators/pool_with_index_op.cu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,18 @@ namespace ops = paddle::operators;

REGISTER_OP_GPU_KERNEL(
max_pool2d_with_index,
ops::MaxPoolWithIndexKernel<paddle::platform::GPUPlace, float>);
ops::MaxPoolWithIndexKernel<paddle::platform::GPUPlace, float>,
ops::MaxPoolWithIndexKernel<paddle::platform::GPUPlace, double>);
REGISTER_OP_GPU_KERNEL(
max_pool2d_with_index_grad,
ops::MaxPoolWithIndexGradKernel<paddle::platform::GPUPlace, float>)
ops::MaxPoolWithIndexGradKernel<paddle::platform::GPUPlace, float>,
ops::MaxPoolWithIndexGradKernel<paddle::platform::GPUPlace, double>)

REGISTER_OP_GPU_KERNEL(
max_pool3d_with_index,
ops::MaxPoolWithIndexKernel<paddle::platform::GPUPlace, float>);
ops::MaxPoolWithIndexKernel<paddle::platform::GPUPlace, float>,
ops::MaxPoolWithIndexKernel<paddle::platform::GPUPlace, double>);
REGISTER_OP_GPU_KERNEL(
max_pool3d_with_index_grad,
ops::MaxPoolWithIndexGradKernel<paddle::platform::GPUPlace, float>)
ops::MaxPoolWithIndexGradKernel<paddle::platform::GPUPlace, float>,
ops::MaxPoolWithIndexGradKernel<paddle::platform::GPUPlace, double>)
6 changes: 4 additions & 2 deletions paddle/platform/cudnn_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -224,13 +224,15 @@ class ScopedConvolutionDescriptor {
PADDLE_ENFORCE_EQ(pads.size(), strides.size());
PADDLE_ENFORCE_EQ(pads.size(), dilations.size());

#if CUDNN_VERSION < 6000
#if !CUDNN_VERSION_MIN(6, 0, 0)
// cudnn v5 does not support dilation conv, the argument is called upscale
// instead of dilations and it is must be one.
for (size_t i = 0; i < dilations.size(); ++i) {
PADDLE_ENFORCE_EQ(
dilations[i], 1,
"Dilations conv is not supported in this cuDNN version");
"Dilations conv is not supported in this cuDNN version(%d.%d.%d).",
CUDNN_VERSION / 1000, CUDNN_VERSION % 1000 / 100,
CUDNN_VERSION % 100);
}
#endif

Expand Down
34 changes: 34 additions & 0 deletions paddle/platform/cudnn_helper_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,26 @@ TEST(CudnnHelper, ScopedTensorDescriptor) {
EXPECT_EQ(strides[2], 6);
EXPECT_EQ(strides[1], 36);
EXPECT_EQ(strides[0], 144);

// test tensor5d: ScopedTensorDescriptor
ScopedTensorDescriptor tensor5d_desc;
std::vector<int> shape_5d = {2, 4, 6, 6, 6};
auto desc_5d = tensor5d_desc.descriptor<float>(DataLayout::kNCDHW, shape_5d);

std::vector<int> dims_5d(5);
std::vector<int> strides_5d(5);
paddle::platform::dynload::cudnnGetTensorNdDescriptor(
desc_5d, 5, &type, &nd, dims_5d.data(), strides_5d.data());

EXPECT_EQ(nd, 5);
for (size_t i = 0; i < dims_5d.size(); ++i) {
EXPECT_EQ(dims_5d[i], shape_5d[i]);
}
EXPECT_EQ(strides_5d[4], 1);
EXPECT_EQ(strides_5d[3], 6);
EXPECT_EQ(strides_5d[2], 36);
EXPECT_EQ(strides_5d[1], 216);
EXPECT_EQ(strides_5d[0], 864);
}

TEST(CudnnHelper, ScopedFilterDescriptor) {
Expand All @@ -60,6 +80,20 @@ TEST(CudnnHelper, ScopedFilterDescriptor) {
for (size_t i = 0; i < shape.size(); ++i) {
EXPECT_EQ(kernel[i], shape[i]);
}

ScopedFilterDescriptor filter_desc_4d;
std::vector<int> shape_4d = {2, 3, 3, 3};
auto desc_4d = filter_desc.descriptor<float>(DataLayout::kNCDHW, shape_4d);

std::vector<int> kernel_4d(4);
paddle::platform::dynload::cudnnGetFilterNdDescriptor(
desc_4d, 4, &type, &format, &nd, kernel_4d.data());

EXPECT_EQ(GetCudnnTensorFormat(DataLayout::kNCHW), format);
EXPECT_EQ(nd, 4);
for (size_t i = 0; i < shape_4d.size(); ++i) {
EXPECT_EQ(kernel_4d[i], shape_4d[i]);
}
}

TEST(CudnnHelper, ScopedConvolutionDescriptor) {
Expand Down
Loading

0 comments on commit 134eaf2

Please sign in to comment.