Skip to content

Commit

Permalink
Merge pull request #5165 from NHZlX/add_dilation
Browse files Browse the repository at this point in the history
Add dilation for exconv layer
  • Loading branch information
NHZlX committed Nov 14, 2017
2 parents 3e6f768 + f3818bd commit d7319c2
Show file tree
Hide file tree
Showing 17 changed files with 299 additions and 157 deletions.
6 changes: 6 additions & 0 deletions paddle/function/ConvOp.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ class ConvFunctionBase : public FunctionBase {
// function arguments
strides_ = config.get<std::vector<size_t>>("strides");
paddings_ = config.get<std::vector<size_t>>("paddings");
dilations_ = config.get<std::vector<size_t>>("dilations");
groups_ = config.get<size_t>("groups");

// number of inputs and outputs
Expand Down Expand Up @@ -118,6 +119,7 @@ class ConvFunctionBase : public FunctionBase {

std::vector<size_t> strides_;
std::vector<size_t> paddings_;
std::vector<size_t> dilations_;

/// Group size, refer to grouped convolution in
/// Alex Krizhevsky's paper: when group=2, the first half of the
Expand All @@ -133,6 +135,10 @@ class ConvFunctionBase : public FunctionBase {

inline int paddingW() const { return paddings_[1]; }

inline int dilationH() const { return dilations_[0]; }

inline int dilationW() const { return dilations_[1]; }

// A temporary memory in convolution calculation.
MemoryHandlePtr memory_;

Expand Down
87 changes: 53 additions & 34 deletions paddle/function/ConvOpTest.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,45 +79,59 @@ void Convolution(const std::string& conv1,
if (outputChannels < inputChannels) continue;
for (size_t stride : {1, 2}) {
for (size_t padding : {0, 1}) {
if (padding >= filterSize) break;
for (size_t dilation : {1, 3}) {
if (padding >= filterSize) break;
size_t filterS = (filterSize - 1) * dilation + 1;

// NNPACK only supports stride = 1 if batchSize > 1
if ((conv1 == "NNPACKConv-CPU" || conv2 == "NNPACKConv-CPU") &&
batchSize > 1 && stride > 1)
break;
if (inputSize + 2 * padding < filterS) break;

size_t outputSize =
(inputSize - filterSize + 2 * padding + stride) / stride;
VLOG(3) << " batchSize=" << batchSize
<< " inputChannels=" << inputChannels
<< " inputHeight=" << inputSize
<< " inputWidth=" << inputSize
<< " outputChannels=" << outputChannels
<< " filterHeight=" << filterSize
<< " filterWidth=" << filterSize
<< " outputHeight=" << outputSize
<< " outputWidth=" << outputSize << " stride=" << stride
<< " padding=" << padding;
if ((conv1 == "NaiveConv-CPU" || conv2 == "NaiveConv-CPU" ||
conv1 == "NNPACKConv-CPU" ||
conv2 == "NNPACKConv-CPU") &&
dilation > 1)
break;

std::vector<size_t> paddings = {padding, padding};
std::vector<size_t> strides = {stride, stride};
Compare2Function<DType1, DType2> test(
conv1,
conv2,
FuncConfig()
.set("paddings", paddings)
.set("strides", strides)
.set("groups", (size_t)1)
.set("algo", (std::string) "auto"));
// NNPACK only supports stride = 1 if batchSize > 1
if ((conv1 == "NNPACKConv-CPU" ||
conv2 == "NNPACKConv-CPU") &&
batchSize > 1 && stride > 1)
break;

TensorShape input{
batchSize, inputChannels, inputSize, inputSize};
TensorShape filter{
outputChannels, inputChannels, filterSize, filterSize};
TensorShape output{
batchSize, outputChannels, outputSize, outputSize};
size_t outputSize =
(inputSize - filterS + 2 * padding + stride) / stride;
VLOG(3) << " batchSize=" << batchSize
<< " inputChannels=" << inputChannels
<< " inputHeight=" << inputSize
<< " inputWidth=" << inputSize
<< " outputChannels=" << outputChannels
<< " filterHeight=" << filterSize
<< " filterWidth=" << filterSize
<< " outputHeight=" << outputSize
<< " outputWidth=" << outputSize
<< " stride=" << stride << " padding=" << padding;

function(test, input, filter, output);
std::vector<size_t> paddings = {padding, padding};
std::vector<size_t> strides = {stride, stride};
std::vector<size_t> dilations = {dilation, dilation};
Compare2Function<DType1, DType2> test(
conv1,
conv2,
FuncConfig()
.set("paddings", paddings)
.set("strides", strides)
.set("dilations", dilations)
.set("groups", (size_t)1)
.set("algo", (std::string) "auto"));

TensorShape input{
batchSize, inputChannels, inputSize, inputSize};
TensorShape filter{
outputChannels, inputChannels, filterSize, filterSize};
TensorShape output{
batchSize, outputChannels, outputSize, outputSize};

function(test, input, filter, output);
}
}
}
}
Expand All @@ -144,6 +158,7 @@ void Convolution2(const std::string& conv1,
for (size_t outputChannels : {7}) {
size_t stride = 1;
size_t padding = 0;
size_t dilation = 1;
size_t outputHeight =
(inputHeight - filterHeight + 2 * padding + stride) /
stride;
Expand All @@ -162,13 +177,15 @@ void Convolution2(const std::string& conv1,

std::vector<size_t> paddings = {padding, padding};
std::vector<size_t> strides = {stride, stride};
std::vector<size_t> dilations = {dilation, dilation};
Compare2Function<DType1, DType2> test(
conv1,
conv2,
FuncConfig()
.set("paddings", paddings)
.set("strides", strides)
.set("groups", (size_t)1)
.set("dilations", dilations)
.set("algo", (std::string) "auto"));

TensorShape input{
Expand Down Expand Up @@ -223,6 +240,7 @@ void DepthwiseConvolution(const std::string& conv1,

std::vector<size_t> paddings = {padding, padding};
std::vector<size_t> strides = {stride, stride};
std::vector<size_t> dilations = {1, 1};
size_t groups = inputChannels;
Compare2Function<DType1, DType2> test(
conv1,
Expand All @@ -231,6 +249,7 @@ void DepthwiseConvolution(const std::string& conv1,
.set("paddings", paddings)
.set("strides", strides)
.set("groups", groups)
.set("dilations", dilations)
.set("algo", (std::string) "auto"));

TensorShape input{
Expand Down
12 changes: 9 additions & 3 deletions paddle/function/GemmConvOp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,9 @@ class GemmConvFunction : public ConvFunctionBase {
strideH(),
strideW(),
paddingH(),
paddingW());
paddingW(),
dilationH(),
dilationW());
} else {
colData = inputData + g * inputOffset;
}
Expand Down Expand Up @@ -223,7 +225,9 @@ class GemmConvGradInputFunction : public ConvFunctionBase {
strideH(),
strideW(),
paddingH(),
paddingW());
paddingW(),
dilationH(),
dilationW());
}
}
inputGrad += inputChannels * inputHeight * inputWidth;
Expand Down Expand Up @@ -310,7 +314,9 @@ class GemmConvGradFilterFunction : public ConvFunctionBase {
strideH(),
strideW(),
paddingH(),
paddingW());
paddingW(),
dilationH(),
dilationW());
} else {
colData = inputData + g * inputOffset;
}
Expand Down
8 changes: 6 additions & 2 deletions paddle/function/Im2Col.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,9 @@ class Im2ColFunctor {
int strideHeight,
int strideWidth,
int paddingHeight,
int paddingWidth);
int paddingWidth,
int dilationHeight = 1,
int dilationWidth = 1);
};

template <ColFormat Format, DeviceType Device, class T>
Expand All @@ -91,7 +93,9 @@ class Col2ImFunctor {
int strideHeight,
int strideWidth,
int paddingHeight,
int paddingWidth);
int paddingWidth,
int dilationHeight = 1,
int dilationWidth = 1);
};

} // namespace paddle
38 changes: 24 additions & 14 deletions paddle/function/Im2ColOp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@ class Im2ColFunctor<kCFO, DEVICE_TYPE_CPU, T> {
int strideHeight,
int strideWidth,
int paddingHeight,
int paddingWidth) {
int paddingWidth,
int dilationHeight,
int dilationWidth) {
int inputChannels = imShape[0];
int inputHeight = imShape[1];
int inputWidth = imShape[2];
Expand All @@ -47,8 +49,8 @@ class Im2ColFunctor<kCFO, DEVICE_TYPE_CPU, T> {
int c_im = c / filterWidth / filterHeight;
for (int h = 0; h < outputHeight; ++h) {
for (int w = 0; w < outputWidth; ++w) {
int imRowIdx = h * strideHeight + hOffset;
int imColIdx = w * strideWidth + wOffset;
int imRowIdx = h * strideHeight + hOffset * dilationHeight;
int imColIdx = w * strideWidth + wOffset * dilationWidth;
if ((imRowIdx - paddingHeight) < 0 ||
(imRowIdx - paddingHeight) >= inputHeight ||
(imColIdx - paddingWidth) < 0 ||
Expand Down Expand Up @@ -81,7 +83,9 @@ class Col2ImFunctor<kCFO, DEVICE_TYPE_CPU, T> {
int strideHeight,
int strideWidth,
int paddingHeight,
int paddingWidth) {
int paddingWidth,
int dilationHeight,
int dilationWidth) {
int inputChannels = imShape[0];
int inputHeight = imShape[1];
int inputWidth = imShape[2];
Expand All @@ -97,8 +101,8 @@ class Col2ImFunctor<kCFO, DEVICE_TYPE_CPU, T> {
int c_im = c / filterWidth / filterHeight;
for (int h = 0; h < outputHeight; ++h) {
for (int w = 0; w < outputWidth; ++w) {
int imRowIdx = h * strideHeight + hOffset;
int imColIdx = w * strideWidth + wOffset;
int imRowIdx = h * strideHeight + hOffset * dilationHeight;
int imColIdx = w * strideWidth + wOffset * dilationWidth;
if ((imRowIdx - paddingHeight) >= 0 &&
(imRowIdx - paddingHeight) < inputHeight &&
(imColIdx - paddingWidth) >= 0 &&
Expand Down Expand Up @@ -134,7 +138,9 @@ class Im2ColFunctor<kOCF, DEVICE_TYPE_CPU, T> {
int strideHeight,
int strideWidth,
int paddingHeight,
int paddingWidth) {
int paddingWidth,
int dilationHeight = 1,
int dilationWidth = 1) {
int inputChannels = imShape[0];
int inputHeight = imShape[1];
int inputWidth = imShape[2];
Expand All @@ -147,9 +153,10 @@ class Im2ColFunctor<kOCF, DEVICE_TYPE_CPU, T> {
for (int channel = 0; channel < inputChannels; ++channel) {
for (int filterH = 0; filterH < filterHeight; ++filterH) {
for (int filterW = 0; filterW < filterWidth; ++filterW) {
int imRowOffset =
outputH * strideHeight + filterH - paddingHeight;
int imColOffset = outputW * strideWidth + filterW - paddingWidth;
int imRowOffset = outputH * strideHeight +
filterH * dilationHeight - paddingHeight;
int imColOffset = outputW * strideWidth +
filterW * dilationWidth - paddingWidth;
int colDataOffset =
(((outputH * outputWidth + outputW) * inputChannels +
channel) *
Expand Down Expand Up @@ -189,7 +196,9 @@ class Col2ImFunctor<kOCF, DEVICE_TYPE_CPU, T> {
int strideHeight,
int strideWidth,
int paddingHeight,
int paddingWidth) {
int paddingWidth,
int dilationHeight = 1,
int dilationWidth = 1) {
int inputChannels = imShape[0];
int inputHeight = imShape[1];
int inputWidth = imShape[2];
Expand All @@ -202,9 +211,10 @@ class Col2ImFunctor<kOCF, DEVICE_TYPE_CPU, T> {
for (int channel = 0; channel < inputChannels; ++channel) {
for (int filterH = 0; filterH < filterHeight; ++filterH) {
for (int filterW = 0; filterW < filterWidth; ++filterW) {
int imRowOffset =
outputH * strideHeight + filterH - paddingHeight;
int imColOffset = outputW * strideWidth + filterW - paddingWidth;
int imRowOffset = outputH * strideHeight +
filterH * dilationHeight - paddingHeight;
int imColOffset = outputW * strideWidth +
filterW * dilationWidth - paddingWidth;
int colDataOffset =
(((outputH * outputWidth + outputW) * inputChannels +
channel) *
Expand Down
Loading

0 comments on commit d7319c2

Please sign in to comment.