Skip to content

Commit

Permalink
implement GLU and pnnx conversion (#4283)
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj committed Oct 19, 2022
1 parent c62d256 commit 5281d51
Show file tree
Hide file tree
Showing 18 changed files with 761 additions and 2 deletions.
17 changes: 17 additions & 0 deletions docs/developer-guide/operators.md
Expand Up @@ -29,6 +29,7 @@
* [Exp](#exp)
* [Flatten](#flatten)
* [GELU](#gelu)
* [GLU](#glu)
* [Gemm](#gemm)
* [GroupNorm](#groupnorm)
* [GRU](#gru)
Expand Down Expand Up @@ -784,6 +785,22 @@ else y = 0.5 * x * erfc(-0.70710678 * x)
| --------- | ------------- | ----- | --------- | ----------------- |
| 0 | fast_gelu | int | 0 | use approximation |

# GLU

If axis < 0, we use axis = x.dims + axis

GLU(a,b)=a⊗σ(b)

where a is the first half of the input matrix and b is the second half.

axis specifies the dimension to split the input

* one_blob_only

| param id | name | type | default | description |
| --------- | ------------- | ----- | --------- | ----------------- |
| 0 | axis | int | 0 | |

# Gemm
```
a = transA ? transpose(x0) : x0
Expand Down
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Expand Up @@ -156,6 +156,7 @@ ncnn_add_layer(Deconvolution3D)
ncnn_add_layer(DeconvolutionDepthWise3D)
ncnn_add_layer(Einsum)
ncnn_add_layer(DeformableConv2D)
ncnn_add_layer(GLU)

if(NCNN_VULKAN)
ncnn_add_shader(${CMAKE_CURRENT_SOURCE_DIR}/convert_ycbcr.comp)
Expand Down
220 changes: 220 additions & 0 deletions src/layer/glu.cpp
@@ -0,0 +1,220 @@
// Copyright (c) 2022 Xiaomi Corp. (author: Fangjun Kuang)
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this
// file except in compliance with the License. You may obtain a copy of the
// License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations under
// the License.

#include "glu.h"

#include <math.h>

namespace ncnn {

GLU::GLU()
{
one_blob_only = true;
support_inplace = false;
}

int GLU::load_param(const ParamDict& pd)
{
axis = pd.get(0, 0);

return 0;
}

int GLU::forward(const Mat& bottom_blob, Mat& top_blob,
const Option& opt) const
{
int dims = bottom_blob.dims;
int positive_axis = axis < 0 ? dims + axis : axis;

if (dims == 1)
{ // ignore axis
int w = bottom_blob.w;
int out_w = w / 2;
top_blob.create(out_w, sizeof(float), opt.blob_allocator);

const float* in_ptr = bottom_blob;
float* out_ptr = top_blob;

#pragma omp parallel for num_threads(opt.num_threads)
for (int x = 0; x < out_w; ++x)
{
float sigmoid = static_cast<float>(1.f / (1.f + expf(-in_ptr[x + out_w])));

out_ptr[x] = in_ptr[x] * sigmoid;
}

return 0;
} // if (dims == 1)

if (dims == 2 && positive_axis == 0)
{
int w = bottom_blob.w;
int h = bottom_blob.h;
int out_w = w;
int out_h = h / 2;
top_blob.create(out_w, out_h, sizeof(float), opt.blob_allocator);

int offset = out_w * out_h;

#if 0
// this one is equivalent to the else branch. It is more readable
// but less efficient
#pragma omp parallel for num_threads(opt.num_threads)
for (int y = 0; y < out_h; ++y) {
const float *in_ptr = bottom_blob.row(y);
float *out_ptr = top_blob.row(y);

for (int x = 0; x < w; ++x) {
float sigmoid =
static_cast<float>(1.f / (1.f + exp(-in_ptr[x + offset])));

out_ptr[x] = in_ptr[x] * sigmoid;
}
}
#else
int size = offset;
const float* in_ptr = bottom_blob;
float* out_ptr = top_blob;

#pragma omp parallel for num_threads(opt.num_threads)
for (int i = 0; i < size; ++i)
{
float sigmoid = static_cast<float>(1.f / (1.f + exp(-in_ptr[i + offset])));
out_ptr[i] = in_ptr[i] * sigmoid;
}
#endif

return 0;
} // if (dims == 2 && positive_axis == 0)

if (dims == 2 && positive_axis == 1)
{
int w = bottom_blob.w;
int h = bottom_blob.h;
int out_w = w / 2;
int out_h = h;

top_blob.create(out_w, out_h, sizeof(float), opt.blob_allocator);

#pragma omp parallel for num_threads(opt.num_threads)
for (int y = 0; y < h; ++y)
{
const float* in_ptr = bottom_blob.row(y);
float* out_ptr = top_blob.row(y);

for (int x = 0; x < out_w; ++x)
{
float sigmoid = static_cast<float>(1.f / (1.f + exp(-in_ptr[x + out_w])));
out_ptr[x] = in_ptr[x] * sigmoid;
}
}

return 0;
} // if (dims == 2 && positive_axis == 1)

if (dims == 3 && positive_axis == 0)
{
int w = bottom_blob.w;
int h = bottom_blob.h;
int c = bottom_blob.c;

int out_w = w;
int out_h = h;
int out_c = c / 2;

top_blob.create(out_w, out_h, out_c, sizeof(float), opt.blob_allocator);

int offset = out_c * bottom_blob.cstep;
int size = w * h;

#pragma omp parallel for num_threads(opt.num_threads)
for (int q = 0; q < out_c; ++q)
{
const float* in_ptr = bottom_blob.channel(q);
float* out_ptr = top_blob.channel(q);

for (int i = 0; i < size; ++i)
{
float sigmoid = static_cast<float>(1.f / (1.f + exp(-in_ptr[i + offset])));
out_ptr[i] = in_ptr[i] * sigmoid;
}
}
return 0;
} // if (dims == 3 && positive_axis == 0) {

if (dims == 3 && positive_axis == 1)
{
int w = bottom_blob.w;
int h = bottom_blob.h;
int c = bottom_blob.c;

int out_w = w;
int out_h = h / 2;
int out_c = c;

top_blob.create(out_w, out_h, out_c, sizeof(float), opt.blob_allocator);

int offset = out_h * out_w;
int size = offset;

#pragma omp parallel for num_threads(opt.num_threads)
for (int q = 0; q < c; ++q)
{
const float* in_ptr = bottom_blob.channel(q);
float* out_ptr = top_blob.channel(q);

for (int i = 0; i < size; ++i)
{
float sigmoid = static_cast<float>(1.f / (1.f + exp(-in_ptr[i + offset])));
out_ptr[i] = in_ptr[i] * sigmoid;
}
}
return 0;
} // if (dims == 3 && positive_axis == 1)

if (dims == 3 && positive_axis == 2)
{
int w = bottom_blob.w;
int h = bottom_blob.h;
int c = bottom_blob.c;

int out_w = w / 2;
int out_h = h;
int out_c = c;

top_blob.create(out_w, out_h, out_c, sizeof(float), opt.blob_allocator);

#pragma omp parallel for num_threads(opt.num_threads)
for (int q = 0; q < c; ++q)
{
const float* in_ptr = bottom_blob.channel(q);
float* out_ptr = top_blob.channel(q);
for (int y = 0; y < h; ++y)
{
for (int x = 0; x < out_w; ++x)
{
float sigmoid = static_cast<float>(1.f / (1.f + exp(-in_ptr[x + out_w])));
out_ptr[x] = in_ptr[x] * sigmoid;
}
in_ptr += w;
out_ptr += out_w;
}
}
return 0;
} // if (dims == 3 && positive_axis == 2)

return -100;
}

} // namespace ncnn
38 changes: 38 additions & 0 deletions src/layer/glu.h
@@ -0,0 +1,38 @@
// Copyright (c) 2022 Xiaomi Corp. (author: Fangjun Kuang)
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this
// file except in compliance with the License. You may obtain a copy of the
// License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations under
// the License.

#ifndef LAYER_GLU_H
#define LAYER_GLU_H

#include "layer.h"

namespace ncnn {

class GLU : public Layer
{
public:
GLU();

virtual int load_param(const ParamDict& pd);

virtual int forward(const Mat& bottom_blob, Mat& top_blob,
const Option& opt) const;

public:
int axis;
};

} // namespace ncnn

#endif // LAYER_GLU_H
1 change: 1 addition & 0 deletions tests/CMakeLists.txt
Expand Up @@ -86,6 +86,7 @@ ncnn_add_layer_test(ELU)
ncnn_add_layer_test(ExpandDims)
ncnn_add_layer_test(Flatten)
ncnn_add_layer_test(GELU)
ncnn_add_layer_test(GLU)
ncnn_add_layer_test(Gemm)
ncnn_add_layer_test(GroupNorm)
ncnn_add_layer_test(GRU)
Expand Down
69 changes: 69 additions & 0 deletions tests/test_glu.cpp
@@ -0,0 +1,69 @@
// Copyright (c) 2022 Xiaomi Corp. (author: Fangjun Kuang)
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

#include "layer/glu.h"
#include "testutil.h"

static int test_glu(const ncnn::Mat& a, int axis)
{
ncnn::ParamDict pd;
pd.set(0, axis);

std::vector<ncnn::Mat> weights(0);

int ret = test_layer<ncnn::GLU>("GLU", pd, weights, a);
if (ret != 0)
{
fprintf(stderr, "test_glu failed a.dims=%d a=(%d %d %d) axis=%d\n", a.dims, a.w, a.h, a.c, axis);
}

return ret;
}

static int test_glu_0()
{
return 0
|| test_glu(RandomMat(6, 7, 24), 0)
|| test_glu(RandomMat(6, 8, 24), 1)
|| test_glu(RandomMat(6, 8, 24), 2)
|| test_glu(RandomMat(36, 7, 22), 0)
|| test_glu(RandomMat(5, 256, 23), -2)
|| test_glu(RandomMat(129, 9, 60), 2)
|| test_glu(RandomMat(129, 9, 30), -1);
}

static int test_glu_1()
{
return 0
|| test_glu(RandomMat(10, 24), 0)
|| test_glu(RandomMat(7, 24), 1)
|| test_glu(RandomMat(128, 22), 0)
|| test_glu(RandomMat(128, 256), 1);
}

static int test_glu_2()
{
return 0
|| test_glu(RandomMat(10), 0)
|| test_glu(RandomMat(20), 0)
|| test_glu(RandomMat(128), 0);
}

int main()
{
SRAND(7767517);

return 0
|| test_glu_0()
|| test_glu_1()
|| test_glu_2();
}
3 changes: 2 additions & 1 deletion tools/pnnx/README.md
Expand Up @@ -488,6 +488,7 @@ TORCH_LIBRARY(upfirdn2d_op, m) {
|nn.FractionalMaxPool2d | |
|nn.FractionalMaxPool3d | |
|nn.GELU | :heavy_check_mark: | :heavy_check_mark: |
|nn.GLU | :heavy_check_mark: | :heavy_check_mark: |
|nn.GroupNorm | :heavy_check_mark: | :heavy_check_mark: |
|nn.GRU | :heavy_check_mark: | :heavy_check_mark: |
|nn.GRUCell | |
Expand Down Expand Up @@ -603,7 +604,7 @@ TORCH_LIBRARY(upfirdn2d_op, m) {
|F.fractional_max_pool2d | |
|F.fractional_max_pool3d | |
|F.gelu | :heavy_check_mark: | :heavy_check_mark: |
|F.glu | |
|F.glu | :heavy_check_mark: | :heavy_check_mark: |
|F.grid_sample | :heavy_check_mark: |
|F.group_norm | :heavy_check_mark: | :heavy_check_mark: |
|F.gumbel_softmax | |
Expand Down

0 comments on commit 5281d51

Please sign in to comment.