diff --git a/paddle/function/ConvOp.h b/paddle/function/ConvOp.h index baf78bc6c88d0..062ea25a11470 100644 --- a/paddle/function/ConvOp.h +++ b/paddle/function/ConvOp.h @@ -61,6 +61,7 @@ class ConvFunctionBase : public FunctionBase { // function arguments strides_ = config.get>("strides"); paddings_ = config.get>("paddings"); + dilations_ = config.get>("dilations"); groups_ = config.get("groups"); // number of inputs and outputs @@ -118,6 +119,7 @@ class ConvFunctionBase : public FunctionBase { std::vector strides_; std::vector paddings_; + std::vector dilations_; /// Group size, refer to grouped convolution in /// Alex Krizhevsky's paper: when group=2, the first half of the @@ -133,6 +135,10 @@ class ConvFunctionBase : public FunctionBase { inline int paddingW() const { return paddings_[1]; } + inline int dilationH() const { return dilations_[0]; } + + inline int dilationW() const { return dilations_[1]; } + // A temporary memory in convolution calculation. MemoryHandlePtr memory_; diff --git a/paddle/function/ConvOpTest.h b/paddle/function/ConvOpTest.h index cb02a96d0dbef..d8d3c792df236 100644 --- a/paddle/function/ConvOpTest.h +++ b/paddle/function/ConvOpTest.h @@ -79,45 +79,59 @@ void Convolution(const std::string& conv1, if (outputChannels < inputChannels) continue; for (size_t stride : {1, 2}) { for (size_t padding : {0, 1}) { - if (padding >= filterSize) break; + for (size_t dilation : {1, 3}) { + if (padding >= filterSize) break; + size_t filterS = (filterSize - 1) * dilation + 1; - // NNPACK only supports stride = 1 if batchSize > 1 - if ((conv1 == "NNPACKConv-CPU" || conv2 == "NNPACKConv-CPU") && - batchSize > 1 && stride > 1) - break; + if (inputSize + 2 * padding < filterS) break; - size_t outputSize = - (inputSize - filterSize + 2 * padding + stride) / stride; - VLOG(3) << " batchSize=" << batchSize - << " inputChannels=" << inputChannels - << " inputHeight=" << inputSize - << " inputWidth=" << inputSize - << " outputChannels=" << outputChannels - << " filterHeight=" << filterSize - << " filterWidth=" << filterSize - << " outputHeight=" << outputSize - << " outputWidth=" << outputSize << " stride=" << stride - << " padding=" << padding; + if ((conv1 == "NaiveConv-CPU" || conv2 == "NaiveConv-CPU" || + conv1 == "NNPACKConv-CPU" || + conv2 == "NNPACKConv-CPU") && + dilation > 1) + break; - std::vector paddings = {padding, padding}; - std::vector strides = {stride, stride}; - Compare2Function test( - conv1, - conv2, - FuncConfig() - .set("paddings", paddings) - .set("strides", strides) - .set("groups", (size_t)1) - .set("algo", (std::string) "auto")); + // NNPACK only supports stride = 1 if batchSize > 1 + if ((conv1 == "NNPACKConv-CPU" || + conv2 == "NNPACKConv-CPU") && + batchSize > 1 && stride > 1) + break; - TensorShape input{ - batchSize, inputChannels, inputSize, inputSize}; - TensorShape filter{ - outputChannels, inputChannels, filterSize, filterSize}; - TensorShape output{ - batchSize, outputChannels, outputSize, outputSize}; + size_t outputSize = + (inputSize - filterS + 2 * padding + stride) / stride; + VLOG(3) << " batchSize=" << batchSize + << " inputChannels=" << inputChannels + << " inputHeight=" << inputSize + << " inputWidth=" << inputSize + << " outputChannels=" << outputChannels + << " filterHeight=" << filterSize + << " filterWidth=" << filterSize + << " outputHeight=" << outputSize + << " outputWidth=" << outputSize + << " stride=" << stride << " padding=" << padding; - function(test, input, filter, output); + std::vector paddings = {padding, padding}; + std::vector strides = {stride, stride}; + std::vector dilations = {dilation, dilation}; + Compare2Function test( + conv1, + conv2, + FuncConfig() + .set("paddings", paddings) + .set("strides", strides) + .set("dilations", dilations) + .set("groups", (size_t)1) + .set("algo", (std::string) "auto")); + + TensorShape input{ + batchSize, inputChannels, inputSize, inputSize}; + TensorShape filter{ + outputChannels, inputChannels, filterSize, filterSize}; + TensorShape output{ + batchSize, outputChannels, outputSize, outputSize}; + + function(test, input, filter, output); + } } } } @@ -144,6 +158,7 @@ void Convolution2(const std::string& conv1, for (size_t outputChannels : {7}) { size_t stride = 1; size_t padding = 0; + size_t dilation = 1; size_t outputHeight = (inputHeight - filterHeight + 2 * padding + stride) / stride; @@ -162,6 +177,7 @@ void Convolution2(const std::string& conv1, std::vector paddings = {padding, padding}; std::vector strides = {stride, stride}; + std::vector dilations = {dilation, dilation}; Compare2Function test( conv1, conv2, @@ -169,6 +185,7 @@ void Convolution2(const std::string& conv1, .set("paddings", paddings) .set("strides", strides) .set("groups", (size_t)1) + .set("dilations", dilations) .set("algo", (std::string) "auto")); TensorShape input{ @@ -223,6 +240,7 @@ void DepthwiseConvolution(const std::string& conv1, std::vector paddings = {padding, padding}; std::vector strides = {stride, stride}; + std::vector dilations = {1, 1}; size_t groups = inputChannels; Compare2Function test( conv1, @@ -231,6 +249,7 @@ void DepthwiseConvolution(const std::string& conv1, .set("paddings", paddings) .set("strides", strides) .set("groups", groups) + .set("dilations", dilations) .set("algo", (std::string) "auto")); TensorShape input{ diff --git a/paddle/function/GemmConvOp.cpp b/paddle/function/GemmConvOp.cpp index bdb56ddac38b9..8d34eee886a62 100644 --- a/paddle/function/GemmConvOp.cpp +++ b/paddle/function/GemmConvOp.cpp @@ -100,7 +100,9 @@ class GemmConvFunction : public ConvFunctionBase { strideH(), strideW(), paddingH(), - paddingW()); + paddingW(), + dilationH(), + dilationW()); } else { colData = inputData + g * inputOffset; } @@ -223,7 +225,9 @@ class GemmConvGradInputFunction : public ConvFunctionBase { strideH(), strideW(), paddingH(), - paddingW()); + paddingW(), + dilationH(), + dilationW()); } } inputGrad += inputChannels * inputHeight * inputWidth; @@ -310,7 +314,9 @@ class GemmConvGradFilterFunction : public ConvFunctionBase { strideH(), strideW(), paddingH(), - paddingW()); + paddingW(), + dilationH(), + dilationW()); } else { colData = inputData + g * inputOffset; } diff --git a/paddle/function/Im2Col.h b/paddle/function/Im2Col.h index 1e0cff436ff60..0c37fc972484b 100644 --- a/paddle/function/Im2Col.h +++ b/paddle/function/Im2Col.h @@ -78,7 +78,9 @@ class Im2ColFunctor { int strideHeight, int strideWidth, int paddingHeight, - int paddingWidth); + int paddingWidth, + int dilationHeight = 1, + int dilationWidth = 1); }; template @@ -91,7 +93,9 @@ class Col2ImFunctor { int strideHeight, int strideWidth, int paddingHeight, - int paddingWidth); + int paddingWidth, + int dilationHeight = 1, + int dilationWidth = 1); }; } // namespace paddle diff --git a/paddle/function/Im2ColOp.cpp b/paddle/function/Im2ColOp.cpp index b7d1eb1eded7a..f864d42f80752 100644 --- a/paddle/function/Im2ColOp.cpp +++ b/paddle/function/Im2ColOp.cpp @@ -31,7 +31,9 @@ class Im2ColFunctor { int strideHeight, int strideWidth, int paddingHeight, - int paddingWidth) { + int paddingWidth, + int dilationHeight, + int dilationWidth) { int inputChannels = imShape[0]; int inputHeight = imShape[1]; int inputWidth = imShape[2]; @@ -47,8 +49,8 @@ class Im2ColFunctor { int c_im = c / filterWidth / filterHeight; for (int h = 0; h < outputHeight; ++h) { for (int w = 0; w < outputWidth; ++w) { - int imRowIdx = h * strideHeight + hOffset; - int imColIdx = w * strideWidth + wOffset; + int imRowIdx = h * strideHeight + hOffset * dilationHeight; + int imColIdx = w * strideWidth + wOffset * dilationWidth; if ((imRowIdx - paddingHeight) < 0 || (imRowIdx - paddingHeight) >= inputHeight || (imColIdx - paddingWidth) < 0 || @@ -81,7 +83,9 @@ class Col2ImFunctor { int strideHeight, int strideWidth, int paddingHeight, - int paddingWidth) { + int paddingWidth, + int dilationHeight, + int dilationWidth) { int inputChannels = imShape[0]; int inputHeight = imShape[1]; int inputWidth = imShape[2]; @@ -97,8 +101,8 @@ class Col2ImFunctor { int c_im = c / filterWidth / filterHeight; for (int h = 0; h < outputHeight; ++h) { for (int w = 0; w < outputWidth; ++w) { - int imRowIdx = h * strideHeight + hOffset; - int imColIdx = w * strideWidth + wOffset; + int imRowIdx = h * strideHeight + hOffset * dilationHeight; + int imColIdx = w * strideWidth + wOffset * dilationWidth; if ((imRowIdx - paddingHeight) >= 0 && (imRowIdx - paddingHeight) < inputHeight && (imColIdx - paddingWidth) >= 0 && @@ -134,7 +138,9 @@ class Im2ColFunctor { int strideHeight, int strideWidth, int paddingHeight, - int paddingWidth) { + int paddingWidth, + int dilationHeight = 1, + int dilationWidth = 1) { int inputChannels = imShape[0]; int inputHeight = imShape[1]; int inputWidth = imShape[2]; @@ -147,9 +153,10 @@ class Im2ColFunctor { for (int channel = 0; channel < inputChannels; ++channel) { for (int filterH = 0; filterH < filterHeight; ++filterH) { for (int filterW = 0; filterW < filterWidth; ++filterW) { - int imRowOffset = - outputH * strideHeight + filterH - paddingHeight; - int imColOffset = outputW * strideWidth + filterW - paddingWidth; + int imRowOffset = outputH * strideHeight + + filterH * dilationHeight - paddingHeight; + int imColOffset = outputW * strideWidth + + filterW * dilationWidth - paddingWidth; int colDataOffset = (((outputH * outputWidth + outputW) * inputChannels + channel) * @@ -189,7 +196,9 @@ class Col2ImFunctor { int strideHeight, int strideWidth, int paddingHeight, - int paddingWidth) { + int paddingWidth, + int dilationHeight = 1, + int dilationWidth = 1) { int inputChannels = imShape[0]; int inputHeight = imShape[1]; int inputWidth = imShape[2]; @@ -202,9 +211,10 @@ class Col2ImFunctor { for (int channel = 0; channel < inputChannels; ++channel) { for (int filterH = 0; filterH < filterHeight; ++filterH) { for (int filterW = 0; filterW < filterWidth; ++filterW) { - int imRowOffset = - outputH * strideHeight + filterH - paddingHeight; - int imColOffset = outputW * strideWidth + filterW - paddingWidth; + int imRowOffset = outputH * strideHeight + + filterH * dilationHeight - paddingHeight; + int imColOffset = outputW * strideWidth + + filterW * dilationWidth - paddingWidth; int colDataOffset = (((outputH * outputWidth + outputW) * inputChannels + channel) * diff --git a/paddle/function/Im2ColOpGpu.cu b/paddle/function/Im2ColOpGpu.cu index bd98610498b1a..71da11b95557d 100644 --- a/paddle/function/Im2ColOpGpu.cu +++ b/paddle/function/Im2ColOpGpu.cu @@ -28,6 +28,8 @@ __global__ void im2col(const T* data_im, int strideW, int paddingH, int paddingW, + int dilationH, + int dilationW, int height_col, int width_col, T* data_col) { @@ -44,8 +46,8 @@ __global__ void im2col(const T* data_im, data_col += (channel_out * height_col + h_out) * width_col + w_out; for (int i = 0; i < blockH; ++i) { for (int j = 0; j < blockW; ++j) { - int rIdx = int(h_in + i); - int cIdx = int(w_in + j); + int rIdx = int(h_in + i * dilationH); + int cIdx = int(w_in + j * dilationW); if ((rIdx - (int)paddingH) >= (int)height || (rIdx - (int)paddingH) < 0 || (cIdx - (int)paddingW) >= (int)width || @@ -77,7 +79,9 @@ public: int strideHeight, int strideWidth, int paddingHeight, - int paddingWidth) { + int paddingWidth, + int dilationHeight, + int dilationWidth) { int inputChannels = imShape[0]; int inputHeight = imShape[1]; int inputWidth = imShape[2]; @@ -102,6 +106,8 @@ public: strideWidth, paddingHeight, paddingWidth, + dilationHeight, + dilationWidth, outputHeight, outputWidth, colData); @@ -121,6 +127,8 @@ __global__ void col2im(size_t n, size_t strideW, size_t paddingH, size_t paddingW, + size_t dilationH, + size_t dilationW, size_t height_col, size_t width_col, T* data_im) { @@ -131,23 +139,34 @@ __global__ void col2im(size_t n, int w = int(index % width); int h = int((index / width) % height); int c = int(index / (width * height)); + int filterH = (blockH - 1) * dilationH + 1; + int filterW = (blockW - 1) * dilationW + 1; + if ((w - (int)paddingW) >= 0 && (w - (int)paddingW) < (width - 2 * paddingW) && (h - (int)paddingH) >= 0 && (h - paddingH) < (height - 2 * paddingH)) { // compute the start and end of the output int w_col_start = - (w < (int)blockW) ? 0 : (w - int(blockW)) / (int)strideW + 1; + (w < (int)filterW) ? 0 : (w - int(filterW)) / (int)strideW + 1; int w_col_end = min((int)(w / (int)strideW + 1), (int)(width_col)); int h_col_start = - (h < (int)blockH) ? 0 : (h - (int)blockH) / (int)strideH + 1; + (h < (int)filterH) ? 0 : (h - (int)filterH) / (int)strideH + 1; int h_col_end = min(int(h / strideH + 1), int(height_col)); + for (int h_col = h_col_start; h_col < h_col_end; ++h_col) { for (int w_col = w_col_start; w_col < w_col_end; ++w_col) { // the col location: [c * width * height + h_out, w_out] - int c_col = int(c * blockH * blockW) + - (h - h_col * (int)strideH) * (int)blockW + - (w - w_col * (int)strideW); - val += data_col[(c_col * height_col + h_col) * width_col + w_col]; + int h_k = (h - h_col * strideH); + int w_k = (w - w_col * strideW); + if (h_k % dilationH == 0 && w_k % dilationW == 0) { + h_k /= dilationH; + w_k /= dilationW; + int c_col = + (((c * blockH + h_k) * blockW + w_k) * height_col + h_col) * + width_col + + w_col; + val += data_col[c_col]; + } } } h -= paddingH; @@ -173,7 +192,9 @@ public: int strideHeight, int strideWidth, int paddingHeight, - int paddingWidth) { + int paddingWidth, + int dilationHeight, + int dilationWidth) { int inputChannels = imShape[0]; int inputHeight = imShape[1]; int inputWidth = imShape[2]; @@ -205,6 +226,8 @@ public: strideWidth, paddingHeight, paddingWidth, + dilationHeight, + dilationWidth, outputHeight, outputWidth, imData); @@ -229,6 +252,8 @@ __global__ void im2colOCF(const T* imData, int strideWidth, int paddingHeight, int paddingWidth, + int dilationHeight, + int dilationWidth, int outputHeight, int outputWidth) { int swId = blockIdx.x; @@ -237,8 +262,10 @@ __global__ void im2colOCF(const T* imData, channelId += blockDim.z) { for (int idy = threadIdx.y; idy < filterHeight; idy += blockDim.y) { for (int idx = threadIdx.x; idx < filterWidth; idx += blockDim.x) { - int widthOffset = idx + swId * strideWidth - paddingWidth; - int heightOffset = idy + shId * strideHeight - paddingHeight; + int widthOffset = + idx * dilationHeight + swId * strideWidth - paddingWidth; + int heightOffset = + idy * dilationWidth + shId * strideHeight - paddingHeight; int imOffset = widthOffset + heightOffset * inputWidth + channelId * inputHeight * inputWidth; @@ -273,7 +300,9 @@ public: int strideHeight, int strideWidth, int paddingHeight, - int paddingWidth) { + int paddingWidth, + int dilationHeight, + int dilationWidth) { int inputChannels = imShape[0]; int inputHeight = imShape[1]; int inputWidth = imShape[2]; @@ -312,6 +341,8 @@ public: strideWidth, paddingHeight, paddingWidth, + dilationHeight, + dilationWidth, outputHeight, outputWidth); CHECK_SYNC("Im2ColFunctor GPU failed"); @@ -330,6 +361,8 @@ __global__ void col2imOCF(T* imData, int strideWidth, int paddingHeight, int paddingWidth, + int dilationHeight, + int dilationWidth, int outputHeight, int outputWidth) { int swId = blockIdx.x; @@ -338,8 +371,10 @@ __global__ void col2imOCF(T* imData, channelId += blockDim.z) { for (int idy = threadIdx.y; idy < filterHeight; idy += blockDim.y) { for (int idx = threadIdx.x; idx < filterWidth; idx += blockDim.x) { - int widthOffset = idx + swId * strideWidth - paddingWidth; - int heightOffset = idy + shId * strideHeight - paddingHeight; + int widthOffset = + idx * dilationWidth + swId * strideWidth - paddingWidth; + int heightOffset = + idy * dilationHeight + shId * strideHeight - paddingHeight; int imOffset = widthOffset + heightOffset * inputWidth + channelId * inputHeight * inputWidth; @@ -372,7 +407,9 @@ public: int strideHeight, int strideWidth, int paddingHeight, - int paddingWidth) { + int paddingWidth, + int dilationHeight, + int dilationWidth) { int inputChannels = imShape[0]; int inputHeight = imShape[1]; int inputWidth = imShape[2]; @@ -411,6 +448,8 @@ public: strideWidth, paddingHeight, paddingWidth, + dilationHeight, + dilationWidth, outputHeight, outputWidth); CHECK_SYNC("Col2ImFunctor GPU failed"); diff --git a/paddle/function/Im2ColTest.cpp b/paddle/function/Im2ColTest.cpp index a0a01a5fc7fc0..1f085538d8190 100644 --- a/paddle/function/Im2ColTest.cpp +++ b/paddle/function/Im2ColTest.cpp @@ -29,82 +29,98 @@ void TestIm2ColFunctor() { for (size_t filterWidth : {3, 7}) { for (size_t stride : {1, 2}) { for (size_t padding : {0, 1}) { - if (inputHeight <= filterHeight || inputWidth <= filterWidth) - break; - if (padding >= filterHeight || padding >= filterWidth) break; - size_t outputHeight = - (inputHeight - filterHeight + 2 * padding + stride) / - stride; - size_t outputWidth = - (inputWidth - filterWidth + 2 * padding + stride) / stride; - - TensorShape imShape = - TensorShape({channels, inputHeight, inputWidth}); - TensorShape colShape1 = TensorShape({channels, - filterHeight, - filterWidth, - outputHeight, - outputWidth}); - TensorShape colShape2 = TensorShape({outputHeight, - outputWidth, - channels, - filterHeight, - filterWidth}); - - size_t height = channels * filterHeight * filterWidth; - size_t width = outputHeight * outputWidth; - VectorPtr input1 = Vector::create(imShape.getElements(), false); - VectorPtr input2 = Vector::create(imShape.getElements(), false); - MatrixPtr output1 = Matrix::create(height, width, false, false); - MatrixPtr output2 = Matrix::create(width, height, false, false); - input1->uniform(0.001, 1); - input2->copyFrom(*input1); - - Im2ColFunctor im2Col1; - Im2ColFunctor im2Col2; - im2Col1(input1->getData(), - imShape, - output1->getData(), - colShape1, - stride, - stride, - padding, - padding); - im2Col2(input2->getData(), - imShape, - output2->getData(), - colShape2, - stride, - stride, - padding, - padding); - - // The transposition of the result of ColFormat == kCFO - // is equal to the result of ColFormat == kOCF. - MatrixPtr test; - output2->transpose(test, true); - autotest::TensorCheckErr(*output1, *test); - - Col2ImFunctor col2Im1; - Col2ImFunctor col2Im2; - col2Im1(input1->getData(), - imShape, - output1->getData(), - colShape1, - stride, - stride, - padding, - padding); - col2Im2(input2->getData(), - imShape, - output2->getData(), - colShape2, - stride, - stride, - padding, - padding); - - autotest::TensorCheckErr(*input1, *input2); + for (size_t dilation : {1, 3}) { + size_t filterSizeH = (filterHeight - 1) * dilation + 1; + size_t filterSizeW = (filterWidth - 1) * dilation + 1; + if (inputHeight + 2 * padding < filterSizeH || + inputWidth + 2 * padding < filterSizeW) + break; + if (padding >= filterSizeH || padding >= filterSizeW) break; + size_t outputHeight = + (inputHeight - filterSizeH + 2 * padding) / stride + 1; + size_t outputWidth = + (inputWidth - filterSizeW + 2 * padding) / stride + 1; + + TensorShape imShape = + TensorShape({channels, inputHeight, inputWidth}); + TensorShape colShape1 = TensorShape({channels, + filterHeight, + filterWidth, + outputHeight, + outputWidth}); + TensorShape colShape2 = TensorShape({outputHeight, + outputWidth, + channels, + filterHeight, + filterWidth}); + + size_t height = channels * filterHeight * filterWidth; + size_t width = outputHeight * outputWidth; + VectorPtr input1 = + Vector::create(imShape.getElements(), false); + VectorPtr input2 = + Vector::create(imShape.getElements(), false); + MatrixPtr output1 = + Matrix::create(height, width, false, false); + MatrixPtr output2 = + Matrix::create(width, height, false, false); + input1->uniform(0.001, 1); + input2->copyFrom(*input1); + + Im2ColFunctor im2Col1; + Im2ColFunctor im2Col2; + im2Col1(input1->getData(), + imShape, + output1->getData(), + colShape1, + stride, + stride, + padding, + padding, + dilation, + dilation); + im2Col2(input2->getData(), + imShape, + output2->getData(), + colShape2, + stride, + stride, + padding, + padding, + dilation, + dilation); + + // The transposition of the result of ColFormat == kCFO + // is equal to the result of ColFormat == kOCF. + MatrixPtr test; + output2->transpose(test, true); + autotest::TensorCheckErr(*output1, *test); + + Col2ImFunctor col2Im1; + Col2ImFunctor col2Im2; + + col2Im1(input1->getData(), + imShape, + output1->getData(), + colShape1, + stride, + stride, + padding, + padding, + dilation, + dilation); + col2Im2(input2->getData(), + imShape, + output2->getData(), + colShape2, + stride, + stride, + padding, + padding, + dilation, + dilation); + autotest::TensorCheckErr(*input1, *input2); + } } } } diff --git a/paddle/gserver/layers/ExpandConvLayer.cpp b/paddle/gserver/layers/ExpandConvLayer.cpp index 48dfcb49a4c2c..7ff0c73721d3d 100644 --- a/paddle/gserver/layers/ExpandConvLayer.cpp +++ b/paddle/gserver/layers/ExpandConvLayer.cpp @@ -79,6 +79,10 @@ bool ExpandConvLayer::init(const LayerMap &layerMap, for (int i = 0; i < config_.inputs_size(); i++) { std::vector paddings = {(size_t)paddingY_[i], (size_t)padding_[i]}; std::vector strides = {(size_t)strideY_[i], (size_t)stride_[i]}; + std::vector dilations = {(size_t)dilationY_[i], + (size_t)dilation_[i]}; + + bool useDilation = ((size_t)dilationY_[i] > 1 || (size_t)dilation_[i] > 1); // Convolution Layer uses the GemmConv function by default. convType = "GemmConv"; @@ -97,13 +101,14 @@ bool ExpandConvLayer::init(const LayerMap &layerMap, #if defined(__ARM_NEON__) || defined(__ARM_NEON) if ((filterSize_[i] == filterSizeY_[i]) && (filterSize_[i] == 3 || filterSize_[i] == 4) && - (stride_[i] == strideY_[i]) && (stride_[i] == 1 || stride_[i] == 2)) { + (stride_[i] == strideY_[i]) && (stride_[i] == 1 || stride_[i] == 2) && + !useDilation) { convType = "NeonDepthwiseConv"; } #endif } - if (FLAGS_use_nnpack && !isDeconv_) { + if (FLAGS_use_nnpack && !isDeconv_ && !useDilation) { createFunction(forward_, "NNPACKConv", FuncConfig() @@ -117,6 +122,7 @@ bool ExpandConvLayer::init(const LayerMap &layerMap, FuncConfig() .set("paddings", paddings) .set("strides", strides) + .set("dilations", dilations) .set("groups", (size_t)groups_[i])); createFunction(backward_, @@ -124,6 +130,7 @@ bool ExpandConvLayer::init(const LayerMap &layerMap, FuncConfig() .set("paddings", paddings) .set("strides", strides) + .set("dilations", dilations) .set("groups", (size_t)groups_[i])); createFunction(backward_, @@ -131,6 +138,7 @@ bool ExpandConvLayer::init(const LayerMap &layerMap, FuncConfig() .set("paddings", paddings) .set("strides", strides) + .set("dilations", dilations) .set("groups", (size_t)groups_[i])); } } diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index 3afaab630b21e..3517d293e3c90 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -434,7 +434,7 @@ void testConvLayer(const string& type, bool trans, bool useGpu) { config.layerConfig.set_partial_sum(1); config.layerConfig.set_shared_biases(true); - int dilation = 1; + int dilation = 2; if (type == "cudnn_conv") { #if CUDNN_VERSION >= 6000 dilation = 2; diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 5891b1f0cd657..5bd68e211ac1c 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -1200,8 +1200,14 @@ def TestData(data_config, async_load_data=None): #caffe_mode: compute the output size using floor instead of ceil, # which is consistent of caffe and CuDNN's convention. -def cnn_output_size(img_size, filter_size, padding, stride, caffe_mode): - output = (2 * padding + img_size - filter_size) / float(stride) +def cnn_output_size(img_size, + filter_size, + padding, + stride, + caffe_mode, + dilation=1): + filter_s = (filter_size - 1) * dilation + 1 + output = (2 * padding + img_size - filter_s) / float(stride) if caffe_mode: return 1 + int(math.floor(output)) else: @@ -1210,8 +1216,14 @@ def cnn_output_size(img_size, filter_size, padding, stride, caffe_mode): #calcualte image_size based on output_size for de-convolution (ConvTransLayer). #It is the reverse function of cnn_output_size -def cnn_image_size(output_size, filter_size, padding, stride, caffe_mode): - img_size = (output_size - 1) * stride + filter_size - 2 * padding +def cnn_image_size(output_size, + filter_size, + padding, + stride, + caffe_mode, + dilation=1): + filter_s = (filter_size - 1) * dilation + 1 + img_size = (output_size - 1) * stride + filter_s - 2 * padding if not caffe_mode: img_size = img_size + 1 return img_size @@ -1376,6 +1388,12 @@ def parse_conv(conv, input_layer_name, conv_conf, num_filters, trans=False): conv_conf.stride_y = conv.stride_y conv_conf.groups = conv.groups conv_conf.caffe_mode = conv.caffe_mode + if not conv.dilation: + conv.dilation = 1 + conv.dilation_y = 1 + else: + conv_conf.dilation = conv.dilation + conv_conf.dilation_y = conv.dilation_y if not trans: conv_conf.filter_channels = conv.channels / conv.groups @@ -1383,20 +1401,20 @@ def parse_conv(conv, input_layer_name, conv_conf, num_filters, trans=False): get_img_size(input_layer_name, conv.channels) conv_conf.output_x = cnn_output_size( conv_conf.img_size, conv_conf.filter_size, conv_conf.padding, - conv_conf.stride, conv_conf.caffe_mode) + conv_conf.stride, conv_conf.caffe_mode, conv.dilation) conv_conf.output_y = cnn_output_size( conv_conf.img_size_y, conv_conf.filter_size_y, conv_conf.padding_y, - conv_conf.stride_y, conv_conf.caffe_mode) + conv_conf.stride_y, conv_conf.caffe_mode, conv.dilation_y) else: conv_conf.filter_channels = num_filters / conv.groups conv_conf.output_x, conv_conf.output_y = \ get_img_size(input_layer_name, conv.channels) conv_conf.img_size = cnn_image_size( conv_conf.output_x, conv_conf.filter_size, conv_conf.padding, - conv_conf.stride, conv_conf.caffe_mode) + conv_conf.stride, conv_conf.caffe_mode, conv.dilation) conv_conf.img_size_y = cnn_image_size( conv_conf.output_y, conv_conf.filter_size_y, conv_conf.padding_y, - conv_conf.stride_y, conv_conf.caffe_mode) + conv_conf.stride_y, conv_conf.caffe_mode, conv.dilation_y) #caffe_mode: compute the output size using floor instead of ceil, diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index d36b40ff6e7d5..b59f2f657d7cd 100644 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -2571,7 +2571,9 @@ def img_conv_layer(input, if layer_type: if dilation > 1 or dilation_y > 1: - assert layer_type in ["cudnn_conv", "cudnn_convt"] + assert layer_type in [ + "cudnn_conv", "cudnn_convt", "exconv", "exconvt" + ] if trans: assert layer_type in ["exconvt", "cudnn_convt"] else: diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/img_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/img_layers.protostr index 5ddf6052df021..b14121e82cb7d 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/img_layers.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/img_layers.protostr @@ -28,6 +28,8 @@ layers { stride_y: 1 output_y: 227 img_size_y: 256 + dilation: 1 + dilation_y: 1 } } bias_parameter_name: "___conv_0__.wbias" diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr index c0252b945b4c7..c7a487a11231c 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr @@ -28,6 +28,8 @@ layers { stride_y: 1 output_y: 227 img_size_y: 256 + dilation: 1 + dilation_y: 1 } } bias_parameter_name: "___conv_0__.wbias" diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_bilinear_interp.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_bilinear_interp.protostr index fd5224ca55cd1..25ec6323751fa 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_bilinear_interp.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_bilinear_interp.protostr @@ -28,6 +28,8 @@ layers { stride_y: 1 output_y: 48 img_size_y: 48 + dilation: 1 + dilation_y: 1 } } bias_parameter_name: "___conv_0__.wbias" diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_maxout.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_maxout.protostr index 03f4f3a31d6c2..39dc487146978 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_maxout.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_maxout.protostr @@ -30,6 +30,8 @@ layers { stride_y: 1 output_y: 48 img_size_y: 48 + dilation: 1 + dilation_y: 1 } } bias_parameter_name: "___conv_0__.wbias" @@ -105,6 +107,8 @@ layers { stride_y: 1 output_y: 24 img_size_y: 24 + dilation: 1 + dilation_y: 1 } } bias_parameter_name: "___conv_1__.wbias" diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_pad.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_pad.protostr index 15c6ab4dc8e61..d5d6d31a17b84 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_pad.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_pad.protostr @@ -30,6 +30,8 @@ layers { stride_y: 1 output_y: 48 img_size_y: 48 + dilation: 1 + dilation_y: 1 } } bias_parameter_name: "___conv_0__.wbias" diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_roi_pool_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_roi_pool_layer.protostr index f1bc65b3aee74..0ec88aa998cce 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_roi_pool_layer.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_roi_pool_layer.protostr @@ -36,6 +36,8 @@ layers { stride_y: 1 output_y: 14 img_size_y: 14 + dilation: 1 + dilation_y: 1 } } bias_parameter_name: "___conv_0__.wbias"