Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add DeformableConv2D #4070

Merged
merged 16 commits into from
Jul 28, 2022
Merged
30 changes: 30 additions & 0 deletions docs/developer-guide/operators.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
* [DeconvolutionDepthWise](#deconvolutiondepthwise)
* [DeconvolutionDepthWise1D](#deconvolutiondepthwise1d)
* [DeconvolutionDepthWise3D](#deconvolutiondepthwise3d)
* [DeformableConv2D](#deformableconv2d)
* [Dequantize](#dequantize)
* [Dropout](#dropout)
* [Eltwise](#eltwise)
Expand Down Expand Up @@ -664,6 +665,35 @@ y = activation(x3, act_type, act_params)
| weight_data | float/fp16 | [kernel_w, kernel_h, kernel_d, num_input / group, num_output / group, group] |
| bias_data | float | [num_output] |

# DeformableConv2D
```
x2 = deformableconv2d(x, offset, mask, weight, kernel, stride, dilation) + bias
y = activation(x2, act_type, act_params)
```

| param id | name | type | default | description |
| --------- | ------------- | ----- | --------- | ----------------- |
| 0 | num_output | int | 0 | |
| 1 | kernel_w | int | 0 | |
| 2 | dilation_w | int | 1 | |
| 3 | stride_w | int | 1 | |
| 4 | pad_left | int | 0 | |
| 5 | bias_term | int | 0 | |
| 6 | weight_data_size| int | 0 | |
| 9 | activation_type| int | 0 | |
| 10 | activation_params| array | [ ] | |
| 11 | kernel_h | int | kernel_w | |
| 12 | dilation_h | int | dilation_w | |
| 13 | stride_h | int | stride_w | |
| 14 | pad_top | int | pad_left | |
| 15 | pad_right | int | pad_left | |
| 16 | pad_bottom | int | pad_top | |

| weight | type | shape |
| ------------- | ----- | --------------------- |
| weight_data | float/fp16/int8 | [kernel_w, kernel_h, num_input, num_output] |
| bias_data | float | [num_output] |

# Dequantize
```
y = x * scale + bias
Expand Down
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ ncnn_add_layer(DeconvolutionDepthWise1D)
ncnn_add_layer(Deconvolution3D)
ncnn_add_layer(DeconvolutionDepthWise3D)
ncnn_add_layer(Einsum)
ncnn_add_layer(DeformableConv2D)

if(NCNN_VULKAN)
ncnn_add_shader(${CMAKE_CURRENT_SOURCE_DIR}/convert_ycbcr.comp)
Expand Down
173 changes: 173 additions & 0 deletions src/layer/deformableconv2d.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

#include "deformableconv2d.h"

#include "fused_activation.h"

namespace ncnn {

DeformableConv2D::DeformableConv2D()
{
one_blob_only = false;
support_inplace = false;
}

int DeformableConv2D::load_param(const ParamDict& pd)
{
num_output = pd.get(0, 0);
kernel_w = pd.get(1, 0);
kernel_h = pd.get(11, kernel_w);
dilation_w = pd.get(2, 1);
dilation_h = pd.get(12, dilation_w);
stride_w = pd.get(3, 1);
stride_h = pd.get(13, stride_w);
pad_left = pd.get(4, 0);
pad_right = pd.get(15, pad_left);
pad_top = pd.get(14, pad_left);
pad_bottom = pd.get(16, pad_top);
bias_term = pd.get(5, 0);
weight_data_size = pd.get(6, 0);
activation_type = pd.get(9, 0);
activation_params = pd.get(10, Mat());
return 0;
}

int DeformableConv2D::load_model(const ModelBin& mb)
{
weight_data = mb.load(weight_data_size, 0);
if (weight_data.empty())
return -100;

if (bias_term)
{
bias_data = mb.load(num_output, 1);
if (bias_data.empty())
return -100;
}
return 0;
}

int DeformableConv2D::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const
{
const Mat& bottom_blob = bottom_blobs[0];
const Mat& offset = bottom_blobs[1];

const bool has_mask = (bottom_blobs.size() == 3);

const int w = bottom_blob.w;
const int h = bottom_blob.h;
const int in_c = bottom_blob.c;
const size_t elemsize = bottom_blob.elemsize;

const int kernel_extent_w = dilation_w * (kernel_w - 1) + 1;
const int kernel_extent_h = dilation_h * (kernel_h - 1) + 1;

const int out_w = (w + pad_left + pad_right - kernel_extent_w) / stride_w + 1;
const int out_h = (h + pad_top + pad_bottom - kernel_extent_h) / stride_h + 1;

// output.shape is [num_output, out_h, out_w] (in python).
Mat& output = top_blobs[0];
output.create(out_w, out_h, num_output, elemsize, opt.blob_allocator);
if (output.empty())
return -100;

const float* weight_ptr = weight_data;
const float* bias_ptr = weight_data;
if (bias_term)
bias_ptr = bias_data;

// deformable conv
#pragma omp parallel for num_threads(opt.num_threads)
for (int h_col = 0; h_col < out_h; h_col++)
{
for (int w_col = 0; w_col < out_w; w_col++)
{
int h_in = h_col * stride_h - pad_top;
int w_in = w_col * stride_w - pad_left;
for (int oc = 0; oc < num_output; oc++)
{
float sum = 0.f;
if (bias_term)
sum = bias_ptr[oc];
for (int i = 0; i < kernel_h; i++)
{
for (int j = 0; j < kernel_w; j++)
{
const float offset_h = offset.channel((i * kernel_w + j) * 2).row(h_col)[w_col];
const float offset_w = offset.channel((i * kernel_w + j) * 2 + 1).row(h_col)[w_col];
const float mask_ = has_mask ? bottom_blobs[2].channel(i * kernel_w + j).row(h_col)[w_col] : 1.f;
const float h_im = h_in + i * dilation_h + offset_h;
const float w_im = w_in + j * dilation_w + offset_w;

// Bilinear
const bool cond = h_im > -1 && w_im > -1 && h_im < h && w_im < w;
int h_low = 0;
int w_low = 0;
int h_high = 0;
int w_high = 0;
float w1 = 0.f;
float w2 = 0.f;
float w3 = 0.f;
float w4 = 0.f;
bool v1_cond = false;
bool v2_cond = false;
bool v3_cond = false;
bool v4_cond = false;
if (cond)
{
h_low = floor(h_im);
w_low = floor(w_im);
h_high = h_low + 1;
w_high = w_low + 1;

float lh = h_im - h_low;
float lw = w_im - w_low;
float hh = 1 - lh;
float hw = 1 - lw;

v1_cond = (h_low >= 0 && w_low >= 0);
v2_cond = (h_low >= 0 && w_high <= w - 1);
v3_cond = (h_high <= h - 1 && w_low >= 0);
v4_cond = (h_high <= h - 1 && w_high <= w - 1);

w1 = hh * hw;
w2 = hh * lw;
w3 = lh * hw;
w4 = lh * lw;
}

for (int c_im = 0; c_im < in_c; c_im++)
{
float val = 0.f;
if (cond)
{
float v1 = v1_cond ? bottom_blob.channel(c_im).row(h_low)[w_low] : 0.f;
float v2 = v2_cond ? bottom_blob.channel(c_im).row(h_low)[w_high] : 0.f;
float v3 = v3_cond ? bottom_blob.channel(c_im).row(h_high)[w_low] : 0.f;
float v4 = v4_cond ? bottom_blob.channel(c_im).row(h_high)[w_high] : 0.f;
val = w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4;
}
sum += val * mask_ * weight_ptr[((oc * in_c + c_im) * kernel_h + i) * kernel_w + j];
}
}
}
output.channel(oc).row(h_col)[w_col] = activation_ss(sum, activation_type, activation_params);
}
}
}
return 0;
}

} // namespace ncnn
61 changes: 61 additions & 0 deletions src/layer/deformableconv2d.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

#ifndef LAYER_DEFORMABLECONV2D_H
#define LAYER_DEFORMABLECONV2D_H

#include "layer.h"

namespace ncnn {

class DeformableConv2D : public Layer
{
public:
DeformableConv2D();

virtual int load_param(const ParamDict& pd);

virtual int load_model(const ModelBin& mb);

virtual int forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const;

public:
// param
int num_output;
int kernel_w;
int kernel_h;
int dilation_w;
int dilation_h;
int stride_w;
int stride_h;
int pad_left; // -233=SAME_UPPER -234=SAME_LOWER
int pad_right;
int pad_top;
int pad_bottom;
int bias_term;

int weight_data_size;

// 0=none 1=relu 2=leakyrelu 3=clip 4=sigmoid
int activation_type;
Mat activation_params;

// model
Mat weight_data;
Mat bias_data;
};

} // namespace ncnn

#endif // LAYER_DEFORMABLECONV2D_H