Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Support for Dilated Convolution #3452

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 14 additions & 8 deletions include/caffe/layers/base_conv_layer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ class BaseConvolutionLayer : public Layer<Dtype> {
Blob<int> stride_;
/// @brief The spatial dimensions of the padding.
Blob<int> pad_;
/// @brief The spatial dimensions of the dilation.
Blob<int> dilation_;
/// @brief The spatial dimensions of the convolution input.
Blob<int> conv_input_shape_;
/// @brief The spatial dimensions of the col_buffer.
Expand Down Expand Up @@ -99,11 +101,12 @@ class BaseConvolutionLayer : public Layer<Dtype> {
conv_input_shape_.cpu_data()[1], conv_input_shape_.cpu_data()[2],
kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1],
pad_.cpu_data()[0], pad_.cpu_data()[1],
stride_.cpu_data()[0], stride_.cpu_data()[1], col_buff);
stride_.cpu_data()[0], stride_.cpu_data()[1],
dilation_.cpu_data()[0], dilation_.cpu_data()[1], col_buff);
} else {
im2col_nd_cpu(data, num_spatial_axes_, conv_input_shape_.cpu_data(),
col_buffer_shape_.data(), kernel_shape_.cpu_data(),
pad_.cpu_data(), stride_.cpu_data(), col_buff);
pad_.cpu_data(), stride_.cpu_data(), dilation_.cpu_data(), col_buff);
}
}
inline void conv_col2im_cpu(const Dtype* col_buff, Dtype* data) {
Expand All @@ -112,11 +115,12 @@ class BaseConvolutionLayer : public Layer<Dtype> {
conv_input_shape_.cpu_data()[1], conv_input_shape_.cpu_data()[2],
kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1],
pad_.cpu_data()[0], pad_.cpu_data()[1],
stride_.cpu_data()[0], stride_.cpu_data()[1], data);
stride_.cpu_data()[0], stride_.cpu_data()[1],
dilation_.cpu_data()[0], dilation_.cpu_data()[1], data);
} else {
col2im_nd_cpu(col_buff, num_spatial_axes_, conv_input_shape_.cpu_data(),
col_buffer_shape_.data(), kernel_shape_.cpu_data(),
pad_.cpu_data(), stride_.cpu_data(), data);
pad_.cpu_data(), stride_.cpu_data(), dilation_.cpu_data(), data);
}
}
#ifndef CPU_ONLY
Expand All @@ -126,12 +130,13 @@ class BaseConvolutionLayer : public Layer<Dtype> {
conv_input_shape_.cpu_data()[1], conv_input_shape_.cpu_data()[2],
kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1],
pad_.cpu_data()[0], pad_.cpu_data()[1],
stride_.cpu_data()[0], stride_.cpu_data()[1], col_buff);
stride_.cpu_data()[0], stride_.cpu_data()[1],
dilation_.cpu_data()[0], dilation_.cpu_data()[1], col_buff);
} else {
im2col_nd_gpu(data, num_spatial_axes_, num_kernels_im2col_,
conv_input_shape_.gpu_data(), col_buffer_.gpu_shape(),
kernel_shape_.gpu_data(), pad_.gpu_data(),
stride_.gpu_data(), col_buff);
stride_.gpu_data(), dilation_.gpu_data(), col_buff);
}
}
inline void conv_col2im_gpu(const Dtype* col_buff, Dtype* data) {
Expand All @@ -140,12 +145,13 @@ class BaseConvolutionLayer : public Layer<Dtype> {
conv_input_shape_.cpu_data()[1], conv_input_shape_.cpu_data()[2],
kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1],
pad_.cpu_data()[0], pad_.cpu_data()[1],
stride_.cpu_data()[0], stride_.cpu_data()[1], data);
stride_.cpu_data()[0], stride_.cpu_data()[1],
dilation_.cpu_data()[0], dilation_.cpu_data()[1], data);
} else {
col2im_nd_gpu(col_buff, num_spatial_axes_, num_kernels_col2im_,
conv_input_shape_.gpu_data(), col_buffer_.gpu_shape(),
kernel_shape_.gpu_data(), pad_.gpu_data(), stride_.gpu_data(),
data);
dilation_.gpu_data(), data);
}
}
#endif
Expand Down
3 changes: 3 additions & 0 deletions include/caffe/layers/conv_layer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@ class ConvolutionLayer : public BaseConvolutionLayer<Dtype> {
* convolution, given by pad for equal dimensions or pad_h and pad_w for
* different padding. Input padding is computed implicitly instead of
* actually padding.
* - dilation (\b optional, default 1). The filter
* dilation, given by dilation_size for equal dimensions for different
* dilation. By default the convolution has dilation 1.
* - group (\b optional, default 1). The number of filter groups. Group
* convolution is a method for reducing parameterization by selectively
* connecting input and output channels. The input and output channel dimensions must be divisible
Expand Down
2 changes: 2 additions & 0 deletions include/caffe/layers/im2col_layer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ class Im2colLayer : public Layer<Dtype> {
Blob<int> stride_;
/// @brief The spatial dimensions of the padding.
Blob<int> pad_;
/// @brief The spatial dimensions of the dilation.
Blob<int> dilation_;

int num_spatial_axes_;
int bottom_dim_;
Expand Down
20 changes: 12 additions & 8 deletions include/caffe/util/im2col.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,49 +7,53 @@ template <typename Dtype>
void im2col_nd_cpu(const Dtype* data_im, const int num_spatial_axes,
const int* im_shape, const int* col_shape,
const int* kernel_shape, const int* pad, const int* stride,
Dtype* data_col);
const int* dilation, Dtype* data_col);

template <typename Dtype>
void im2col_cpu(const Dtype* data_im, const int channels,
const int height, const int width, const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w, const int stride_h,
const int stride_w, Dtype* data_col);
const int stride_w, const int dilation_h, const int dilation_w,
Dtype* data_col);

template <typename Dtype>
void col2im_nd_cpu(const Dtype* data_col, const int num_spatial_axes,
const int* im_shape, const int* col_shape,
const int* kernel_shape, const int* pad, const int* stride,
Dtype* data_im);
const int* dilation, Dtype* data_im);

template <typename Dtype>
void col2im_cpu(const Dtype* data_col, const int channels,
const int height, const int width, const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w, const int stride_h,
const int stride_w, Dtype* data_im);
const int stride_w, const int dilation_h, const int dilation_w,
Dtype* data_im);

template <typename Dtype>
void im2col_nd_gpu(const Dtype* data_im, const int num_spatial_axes,
const int col_size, const int* im_shape, const int* col_shape,
const int* kernel_shape, const int* pad, const int* stride,
Dtype* data_col);
const int* dilation, Dtype* data_col);

template <typename Dtype>
void im2col_gpu(const Dtype* data_im, const int channels,
const int height, const int width, const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w, const int stride_h,
const int stride_w, Dtype* data_col);
const int stride_w, const int dilation_h, const int dilation_w,
Dtype* data_col);

template <typename Dtype>
void col2im_nd_gpu(const Dtype* data_col, const int num_spatial_axes,
const int im_size, const int* im_shape, const int* col_shape,
const int* kernel_shape, const int* pad, const int* stride,
Dtype* data_im);
const int* dilation, Dtype* data_im);

template <typename Dtype>
void col2im_gpu(const Dtype* data_col, const int channels,
const int height, const int width, const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w, const int stride_h,
const int stride_w, Dtype* data_im);
const int stride_w, const int dilation_h, const int dilation_w,
Dtype* data_im);

} // namespace caffe

Expand Down
19 changes: 17 additions & 2 deletions src/caffe/layer_factory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,17 +37,32 @@ namespace caffe {
template <typename Dtype>
shared_ptr<Layer<Dtype> > GetConvolutionLayer(
const LayerParameter& param) {
ConvolutionParameter_Engine engine = param.convolution_param().engine();
ConvolutionParameter conv_param = param.convolution_param();
ConvolutionParameter_Engine engine = conv_param.engine();
#ifdef USE_CUDNN
bool use_dilation = false;
for (int i = 0; i < conv_param.dilation_size(); ++i) {
if (conv_param.dilation(i) > 1) {
use_dilation = true;
}
}
#endif
if (engine == ConvolutionParameter_Engine_DEFAULT) {
engine = ConvolutionParameter_Engine_CAFFE;
#ifdef USE_CUDNN
engine = ConvolutionParameter_Engine_CUDNN;
if (!use_dilation) {
engine = ConvolutionParameter_Engine_CUDNN;
}
#endif
}
if (engine == ConvolutionParameter_Engine_CAFFE) {
return shared_ptr<Layer<Dtype> >(new ConvolutionLayer<Dtype>(param));
#ifdef USE_CUDNN
} else if (engine == ConvolutionParameter_Engine_CUDNN) {
if (use_dilation) {
LOG(FATAL) << "CuDNN doesn't support the dilated convolution at Layer "
<< param.name();
}
return shared_ptr<Layer<Dtype> >(new CuDNNConvolutionLayer<Dtype>(param));
#endif
} else {
Expand Down
35 changes: 26 additions & 9 deletions src/caffe/layers/base_conv_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,15 @@ void BaseConvolutionLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
CHECK_EQ(num_spatial_axes_, 2)
<< "kernel_h & kernel_w can only be used for 2D convolution.";
CHECK_EQ(0, conv_param.kernel_size_size())
<< "Either kernel_size or kernel_h/w should be specified; not both.";
<< "Either kernel_size or kernel_h/w should be specified, not both.";
kernel_shape_data[0] = conv_param.kernel_h();
kernel_shape_data[1] = conv_param.kernel_w();
} else {
const int num_kernel_dims = conv_param.kernel_size_size();
CHECK(num_kernel_dims == 1 || num_kernel_dims == num_spatial_axes_)
<< "kernel_size must be specified once, or once per spatial dimension "
<< "(kernel_size specified " << num_kernel_dims << " times; "
<< num_spatial_axes_ << " spatial dims);";
<< "(kernel_size specified " << num_kernel_dims << " times "
<< num_spatial_axes_ << " spatial dims).";
for (int i = 0; i < num_spatial_axes_; ++i) {
kernel_shape_data[i] =
conv_param.kernel_size((num_kernel_dims == 1) ? 0 : i);
Expand All @@ -52,16 +52,16 @@ void BaseConvolutionLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
CHECK_EQ(num_spatial_axes_, 2)
<< "stride_h & stride_w can only be used for 2D convolution.";
CHECK_EQ(0, conv_param.stride_size())
<< "Either stride or stride_h/w should be specified; not both.";
<< "Either stride or stride_h/w should be specified, not both.";
stride_data[0] = conv_param.stride_h();
stride_data[1] = conv_param.stride_w();
} else {
const int num_stride_dims = conv_param.stride_size();
CHECK(num_stride_dims == 0 || num_stride_dims == 1 ||
num_stride_dims == num_spatial_axes_)
<< "stride must be specified once, or once per spatial dimension "
<< "(stride specified " << num_stride_dims << " times; "
<< num_spatial_axes_ << " spatial dims);";
<< "(stride specified " << num_stride_dims << " times "
<< num_spatial_axes_ << " spatial dims).";
const int kDefaultStride = 1;
for (int i = 0; i < num_spatial_axes_; ++i) {
stride_data[i] = (num_stride_dims == 0) ? kDefaultStride :
Expand All @@ -76,22 +76,39 @@ void BaseConvolutionLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
CHECK_EQ(num_spatial_axes_, 2)
<< "pad_h & pad_w can only be used for 2D convolution.";
CHECK_EQ(0, conv_param.pad_size())
<< "Either pad or pad_h/w should be specified; not both.";
<< "Either pad or pad_h/w should be specified, not both.";
pad_data[0] = conv_param.pad_h();
pad_data[1] = conv_param.pad_w();
} else {
const int num_pad_dims = conv_param.pad_size();
CHECK(num_pad_dims == 0 || num_pad_dims == 1 ||
num_pad_dims == num_spatial_axes_)
<< "pad must be specified once, or once per spatial dimension "
<< "(pad specified " << num_pad_dims << " times; "
<< num_spatial_axes_ << " spatial dims);";
<< "(pad specified " << num_pad_dims << " times "
<< num_spatial_axes_ << " spatial dims)";
const int kDefaultPad = 0;
for (int i = 0; i < num_spatial_axes_; ++i) {
pad_data[i] = (num_pad_dims == 0) ? kDefaultPad :
conv_param.pad((num_pad_dims == 1) ? 0 : i);
}
}
// Setup dilation dimensions (dilation_).
dilation_.Reshape(spatial_dim_blob_shape);
int* dilation_data = dilation_.mutable_cpu_data();
const int num_dilation_dims = conv_param.dilation_size();
CHECK(num_dilation_dims == 0 || num_dilation_dims == 1 ||
num_dilation_dims == num_spatial_axes_)
<< "dilation must be specified once, or once per spatial dimension "
<< "(dilation specified " << num_dilation_dims << " times "
<< num_spatial_axes_ << " spatial dims).";
const int kDefaultDilation = 1;
for (int i = 0; i < num_spatial_axes_; ++i) {
dilation_data[i] = (num_dilation_dims == 0) ? kDefaultDilation :
conv_param.dilation((num_dilation_dims == 1) ? 0 : i);
if (reverse_dimensions()) {
CHECK_EQ(dilation_data[i], 1) << "Deconvolution doesn't support dilation";
}
}
// Special case: im2col is the identity for 1x1 convolution with stride 1
// and no padding, so flag for skipping the buffer and transformation.
is_1x1_ = true;
Expand Down
4 changes: 3 additions & 1 deletion src/caffe/layers/conv_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,13 @@ void ConvolutionLayer<Dtype>::compute_output_shape() {
const int* kernel_shape_data = this->kernel_shape_.cpu_data();
const int* stride_data = this->stride_.cpu_data();
const int* pad_data = this->pad_.cpu_data();
const int* dilation_data = this->dilation_.cpu_data();
this->output_shape_.clear();
for (int i = 0; i < this->num_spatial_axes_; ++i) {
// i + 1 to skip channel axis
const int input_dim = this->input_shape(i + 1);
const int output_dim = (input_dim + 2 * pad_data[i] - kernel_shape_data[i])
const int kernel_extent = dilation_data[i] * (kernel_shape_data[i] - 1) + 1;
const int output_dim = (input_dim + 2 * pad_data[i] - kernel_extent)
/ stride_data[i] + 1;
this->output_shape_.push_back(output_dim);
}
Expand Down
25 changes: 22 additions & 3 deletions src/caffe/layers/im2col_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,20 @@ void Im2colLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
conv_param.pad((num_pad_dims == 1) ? 0 : i);
}
}
// Setup dilation dimensions (dilation_).
dilation_.Reshape(dim_blob_shape);
int* dilation_data = dilation_.mutable_cpu_data();
const int num_dilation_dims = conv_param.dilation_size();
CHECK(num_dilation_dims == 0 || num_dilation_dims == 1 ||
num_dilation_dims == num_spatial_axes_)
<< "dilation must be specified once, or once per spatial dimension "
<< "(dilation specified " << num_dilation_dims << " times; "
<< num_spatial_axes_ << " spatial dims);";
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See above.

const int kDefaultDilation = 1;
for (int i = 0; i < num_spatial_axes_; ++i) {
dilation_data[i] = (num_dilation_dims == 0) ? kDefaultDilation :
conv_param.dilation((num_dilation_dims == 1) ? 0 : i);
}
}

template <typename Dtype>
Expand All @@ -96,10 +110,12 @@ void Im2colLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
const int* kernel_shape_data = kernel_shape_.cpu_data();
const int* stride_data = stride_.cpu_data();
const int* pad_data = pad_.cpu_data();
const int* dilation_data = dilation_.cpu_data();
for (int i = 0; i < num_spatial_axes_; ++i) {
top_shape[channel_axis_] *= kernel_shape_data[i];
const int input_dim = bottom[0]->shape(channel_axis_ + i + 1);
const int output_dim = (input_dim + 2 * pad_data[i] - kernel_shape_data[i])
const int kernel_extent = dilation_data[i] * (kernel_shape_data[i] - 1) + 1;
const int output_dim = (input_dim + 2 * pad_data[i] - kernel_extent)
/ stride_data[i] + 1;
top_shape[channel_axis_ + i + 1] = output_dim;
}
Expand All @@ -122,20 +138,22 @@ void Im2colLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
DCHECK_EQ(kernel_shape_.count(), num_spatial_axes_);
DCHECK_EQ(pad_.count(), num_spatial_axes_);
DCHECK_EQ(stride_.count(), num_spatial_axes_);
DCHECK_EQ(dilation_.count(), num_spatial_axes_);
if (!force_nd_im2col_ && num_spatial_axes_ == 2) {
im2col_cpu(bottom_data + n * bottom_dim_, channels_,
bottom[0]->shape(channel_axis_ + 1),
bottom[0]->shape(channel_axis_ + 2),
kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1],
pad_.cpu_data()[0], pad_.cpu_data()[1],
stride_.cpu_data()[0], stride_.cpu_data()[1],
dilation_.cpu_data()[0], dilation_.cpu_data()[1],
top_data + n * top_dim_);
} else {
im2col_nd_cpu(bottom_data + n * bottom_dim_, num_spatial_axes_,
bottom[0]->shape().data() + channel_axis_,
top[0]->shape().data() + channel_axis_,
kernel_shape_.cpu_data(), pad_.cpu_data(), stride_.cpu_data(),
top_data + n * top_dim_);
dilation_.cpu_data(), top_data + n * top_dim_);
}
}
}
Expand All @@ -153,13 +171,14 @@ void Im2colLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1],
pad_.cpu_data()[0], pad_.cpu_data()[1],
stride_.cpu_data()[0], stride_.cpu_data()[1],
dilation_.cpu_data()[0], dilation_.cpu_data()[1],
bottom_diff + n * bottom_dim_);
} else {
col2im_nd_cpu(top_diff + n * top_dim_, num_spatial_axes_,
bottom[0]->shape().data() + channel_axis_,
top[0]->shape().data() + channel_axis_,
kernel_shape_.cpu_data(), pad_.cpu_data(), stride_.cpu_data(),
bottom_diff + n * bottom_dim_);
dilation_.cpu_data(), bottom_diff + n * bottom_dim_);
}
}
}
Expand Down
6 changes: 4 additions & 2 deletions src/caffe/layers/im2col_layer.cu
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,14 @@ void Im2colLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1],
pad_.cpu_data()[0], pad_.cpu_data()[1],
stride_.cpu_data()[0], stride_.cpu_data()[1],
dilation_.cpu_data()[0], dilation_.cpu_data()[1],
top_data + n * top_dim_);
} else {
im2col_nd_gpu(bottom_data + n * bottom_dim_, num_spatial_axes_,
num_kernels, bottom[0]->gpu_shape() + channel_axis_,
top[0]->gpu_shape() + channel_axis_,
kernel_shape_.gpu_data(), pad_.gpu_data(), stride_.gpu_data(),
top_data + n * top_dim_);
dilation_.gpu_data(), top_data + n * top_dim_);
}
}
}
Expand All @@ -43,13 +44,14 @@ void Im2colLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1],
pad_.cpu_data()[0], pad_.cpu_data()[1],
stride_.cpu_data()[0], stride_.cpu_data()[1],
dilation_.cpu_data()[0], dilation_.cpu_data()[1],
bottom_diff + n * bottom_dim_);
} else {
col2im_nd_gpu(top_diff + n * top_dim_, num_spatial_axes_, bottom_dim_,
bottom[0]->gpu_shape() + channel_axis_,
top[0]->gpu_shape() + channel_axis_,
kernel_shape_.gpu_data(), pad_.gpu_data(), stride_.gpu_data(),
bottom_diff + n * bottom_dim_);
dilation_.gpu_data(), bottom_diff + n * bottom_dim_);
}
}
}
Expand Down
Loading