Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dev transformer #1723

Open
wants to merge 28 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
47d318f
添加 README_EVA.md 说明文件
May 12, 2022
28610c0
修复由于 GEMM 算子导致的 ONNX2TNN 失败问题
May 12, 2022
90e90c5
解决因为 Selu 算子 default 值导致的 2TNN 不对齐问题
May 12, 2022
62cff44
为 Transformer 任务需求提供支持,相关改动已更新至iwiki https://iwiki.woa.com/pages/view…
Jul 6, 2022
16e2fae
return error when mat.data_ == nullptr (#1733)
bluaxe Jul 19, 2022
4e00824
[CUDA][ADD] add logsoftmax operator & unittest (#1728)
sjfeng1999 Jul 19, 2022
09c73e1
[UPD]fix coreml error; support swish and groupnorm op (#1738)
darrenyao87 Jul 23, 2022
dc81d63
Dev groupnorm (#1739)
quinnrong94 Jul 26, 2022
17fb9dc
Delete README_EVA file.
bluaxe Aug 2, 2022
3d1d70c
refine PR 1723 comments
bluaxe Aug 2, 2022
09ec860
fix onnx converter
bluaxe Aug 2, 2022
d2819e9
refine comments for PR 1723
bluaxe Aug 2, 2022
21528dc
bugfix: cpu 模式下,layer_res->element_shape 值未正确传递,使得二元算子计算时仅使用了第一个元素参与计算
Aug 2, 2022
3e88ced
为 Transformer 任务需求提供支持,相关改动已更新至iwiki https://iwiki.woa.com/pages/view…
Jul 6, 2022
1283bfe
[UPD]fix coreml error; support swish and groupnorm op (#1738)
darrenyao87 Jul 23, 2022
456b51e
Dev groupnorm (#1739)
quinnrong94 Jul 26, 2022
8aa24e4
refine PR 1723 comments
bluaxe Aug 2, 2022
2f67e21
fix onnx converter
bluaxe Aug 2, 2022
1ecee3e
refine comments for PR 1723
bluaxe Aug 2, 2022
729b1b8
使用 Optimizer 方式实现 INT32 计算支持
Aug 29, 2022
6acbd67
merge with master
Aug 29, 2022
0ad3828
merge with master part 2
Aug 29, 2022
11fdfb0
Merge branch 'master' into dev_transformer
gttiankai Sep 21, 2022
cc195da
Merge branch 'master' into dev_transformer
bluaxe Dec 7, 2022
abc85ac
Merge branch 'master' into dev_transformer
bluaxe Jan 4, 2023
e08c15e
Merge branch 'master' into dev_transformer
bluaxe Jan 17, 2023
1ed16fd
Merge branch 'master' into dev_transformer
bluaxe Feb 3, 2023
7839397
Merge branch 'master' into dev_transformer
gttiankai Feb 13, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions platforms/ios/tnn.xcodeproj/project.pbxproj
Original file line number Diff line number Diff line change
Expand Up @@ -4832,6 +4832,7 @@
9DAD98952886B3A700170339 /* metal_batch_norm_layer_acc.mm in Sources */,
9DAD94462886B1D400170339 /* scatter_layer.cc in Sources */,
9DAD956A2886B22300170339 /* CONV_DW_3X3S1_INT8_SDOT_SLIDEW.S in Sources */,
369005C5267314D900412264 /* pad_utils.cc in Sources */,
9DAD97842886B39600170339 /* metal_mat_converter.metal in Sources */,
360F96A62966A0B800E3695A /* graph_matcher.cc in Sources */,
9D32FCBE24557EEC002DCDAB /* cpu_utils.cc in Sources */,
Expand Down
2 changes: 1 addition & 1 deletion source/tnn/core/default_network.cc
Original file line number Diff line number Diff line change
Expand Up @@ -573,7 +573,7 @@ Status DefaultNetwork::Forward() {
std::vector<Blob *> outputs = layer->GetOutputBlobs();

{

#if DUMP_INPUT_BLOB
if (runtime_model_ == RUNTIME_MODE_NORMAL) {
// InputBlob data in dumped into files in NCHW_FLOAT format as default
Expand Down
72 changes: 61 additions & 11 deletions source/tnn/device/arm/acc/arm_binary_layer_acc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -108,11 +108,47 @@ Float4 binary_op<ArmBinaryOpType::kHARDSWISH, Float4>(const Float4 &a, const Flo
return a * Float4::max(Float4::min(b * alpha + beta, 1.0f), 0.f);
}

template <>
float binary_op<ArmBinaryOpType::kEQUAL, float>(const float &a, const float &b, float alpha, float beta) {
return a == b ? 1.0f : 0.f;
}
template <>
bfp16_t binary_op<ArmBinaryOpType::kEQUAL, bfp16_t>(const bfp16_t &a, const bfp16_t &b, float alpha, float beta) {
return static_cast<float>(a) == static_cast<float>(b) ? 1.0f : 0.f;
}
template <>
float binary_op<ArmBinaryOpType::kGREATER, float>(const float &a, const float &b, float alpha, float beta) {
return a > b ? 1.0f : 0.f;
}
template <>
bfp16_t binary_op<ArmBinaryOpType::kGREATER, bfp16_t>(const bfp16_t &a, const bfp16_t &b, float alpha, float beta) {
return static_cast<float>(a) > static_cast<float>(b) ? 1.0f : 0.f;
}
template <>
Float4 binary_op<ArmBinaryOpType::kEQUAL, Float4>(const Float4 &a, const Float4 &b, float alpha, float beta) {
Float4 dst;
for (int i = 0; i < 4; ++i) {
dst.value[i] = (a.value[i] == b.value[i] ? 1.0f : 0.f);
}
return dst;
}
template <>
Float4 binary_op<ArmBinaryOpType::kGREATER, Float4>(const Float4 &a, const Float4 &b, float alpha, float beta) {
Float4 dst;
for (int i = 0; i < 4; ++i) {
dst.value[i] = a.value[i] > b.value[i] ? 1.0f : 0.f;
}
return dst;
}

Status ArmBinaryLayerAcc::Init(Context *context, LayerParam *param, LayerResource *resource,
const std::vector<Blob *> &inputs, const std::vector<Blob *> &outputs) {

desc_for_config_const_blob_ = outputs[0]->GetBlobDesc();
RETURN_ON_NEQ(ArmLayerAcc::Init(context, param, resource, inputs, outputs), TNN_OK);
if (outputs[0]->GetBlobDesc().data_type == DATA_TYPE_FLOAT) {

if (outputs[0]->GetBlobDesc().data_type == DATA_TYPE_FLOAT
|| outputs[0]->GetBlobDesc().data_type == DATA_TYPE_INT32) {
RETURN_ON_NEQ(allocateBufferParam(inputs, outputs), TNN_OK);
}
#if TNN_ARM82
Expand Down Expand Up @@ -221,7 +257,8 @@ Status ArmBinaryLayerAcc::Reshape(const std::vector<Blob *> &inputs, const std::

// SUPPORTED DATATYPES
bool ArmBinaryLayerAcc::DataTypeSupported(DataType data_type) {
if (data_type == DATA_TYPE_FLOAT || data_type == DATA_TYPE_HALF || data_type == DATA_TYPE_BFP16)
if (data_type == DATA_TYPE_FLOAT || data_type == DATA_TYPE_HALF || data_type == DATA_TYPE_BFP16
|| data_type == DATA_TYPE_INT32)
return true;
else
return false;
Expand Down Expand Up @@ -269,7 +306,7 @@ Status ArmBinaryLayerAcc::allocateBufferParam(const std::vector<Blob *> &inputs,
auto data_byte_size = DataTypeUtils::GetBytesSize(element_handle.GetDataType());
auto layer_data = element_handle.force_to<void *>();
if (element_handle.GetDataType() == DATA_TYPE_FLOAT) {
if (layer_res_size == 1) {
if (layer_res_size == 1 || dims_pad.size() == 0 ){
// broadcast single, just memcpy
RawBuffer temp(4 * layer_res_size * data_byte_size);
memcpy(temp.force_to<void *>(), layer_data, layer_res_size * data_byte_size);
Expand All @@ -290,6 +327,7 @@ Status ArmBinaryLayerAcc::allocateBufferParam(const std::vector<Blob *> &inputs,
hw_stride = DimsVectorUtils::Count(dims_pad, 2);
}
RawBuffer temp(count * data_byte_size);

DataFormatConverter::ConvertFromNCHWToNCHW4Float(
static_cast<float *>(layer_data), temp.force_to<float *>(), dims_pad[0], channel, hw_stride, 1);
broadcast_ = temp;
Expand All @@ -311,6 +349,7 @@ Status ArmBinaryLayerAcc::allocateBufferParam(const std::vector<Blob *> &inputs,
}
}


return TNN_OK;
}

Expand All @@ -331,7 +370,6 @@ Status ArmBinaryLayerAcc::Exec(const std::vector<Blob *> &inputs, const std::vec
auto output_ptr = GetBlobHandlePtr(output->GetHandle());
auto input0_ptr = input_ptrs_[0];
auto input1_ptr = input_ptrs_[1];

// input0_shape != output_shape && input1_shape != output_shape -> general impl
if (!DimsVectorUtils::Equal(output_dims, input_shapes_[0]) &&
!DimsVectorUtils::Equal(output_dims, input_shapes_[1])) {
Expand Down Expand Up @@ -361,7 +399,6 @@ Status ArmBinaryLayerAcc::Exec(const std::vector<Blob *> &inputs, const std::vec
BinaryFunc<T, op_type>(output_ptr, output_ptr, input_ptr, output_dims, input0_pad_shape, alpha_, beta_);
}
}

return TNN_OK;
}

Expand Down Expand Up @@ -412,7 +449,10 @@ Status ArmBinaryLayerAcc::DoForward(const std::vector<Blob *> &inputs, const std
return Exec<float, ArmBinaryOpType::kMIN>(inputs, outputs);
case ArmBinaryOpType::kHARDSWISH:
return Exec<float, ArmBinaryOpType::kHARDSWISH>(inputs, outputs);

case ArmBinaryOpType::kEQUAL:
return Exec<float, ArmBinaryOpType::kEQUAL>(inputs, outputs);
case ArmBinaryOpType::kGREATER:
return Exec<float, ArmBinaryOpType::kGREATER>(inputs, outputs);
default:
LOGE("Error, unknown binary op_type\n");
return TNNERR_LAYER_ERR;
Expand All @@ -439,12 +479,22 @@ Status ArmBinaryLayerAcc::DoForward(const std::vector<Blob *> &inputs, const std
return TNNERR_LAYER_ERR;
}
} else if (data_type == DATA_TYPE_INT8) {
if (op_type_ == ArmBinaryOpType::kADD) {
return ExecInt8(inputs, outputs);
} else {
LOGE("Error, int8 binary op only support add\n");
return TNNERR_LAYER_ERR;
switch (op_type_) {
case ArmBinaryOpType::kADD:
return ExecInt8(inputs, outputs);
case ArmBinaryOpType::kEQUAL:
outputs[0]->GetBlobDesc().data_type = DATA_TYPE_FLOAT;
Exec<float, ArmBinaryOpType::kEQUAL>(inputs, outputs);
break;
case ArmBinaryOpType::kGREATER:
outputs[0]->GetBlobDesc().data_type = DATA_TYPE_FLOAT;
Exec<float, ArmBinaryOpType::kGREATER>(inputs, outputs);
break;
default:
LOGE("Error, int8 binary op only support add\n");
return TNNERR_LAYER_ERR;
}
return TNN_OK;
}
#if TNN_ARM82
else if (data_type == DATA_TYPE_HALF) {
Expand Down
5 changes: 5 additions & 0 deletions source/tnn/device/arm/acc/arm_binary_layer_acc.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ enum class ArmBinaryOpType : int {
kMAX = 4,
kMIN = 5,
kHARDSWISH = 6,
kEQUAL = 7,
kGREATER = 8,
};

class ArmBinaryLayerAcc : public ArmLayerAcc {
Expand All @@ -41,6 +43,9 @@ class ArmBinaryLayerAcc : public ArmLayerAcc {
template <typename T, ArmBinaryOpType op_type>
Status Exec(const std::vector<Blob *> &inputs, const std::vector<Blob *> &outputs);

template <typename T_IN, typename T_OUT>
Status TransDataType(void *data, const DimsVector &shapes);

// int8 will be implemented inside op
virtual Status ExecInt8(const std::vector<Blob *> &inputs, const std::vector<Blob *> &outputs);

Expand Down
20 changes: 19 additions & 1 deletion source/tnn/device/arm/acc/arm_cast_layer_acc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,18 +41,21 @@ Status ArmCastLayerAcc::DoForward(const std::vector<Blob *> &inputs, const std::
if (output_dims.size() > 1) {
channel = output_dims[1];
}

count = count / channel;
count = count * ROUND_UP(channel, 4);
}

if (input_data_type == output_data_type) {
if (output_data_type == DATA_TYPE_FLOAT ||
output_data_type == DATA_TYPE_BFP16 ||
output_data_type == DATA_TYPE_INT32) {
output_data_type == DATA_TYPE_INT32 ||
output_data_type == DATA_TYPE_INT8) {
if (output_data != input_data) {
memcpy(output_data, input_data, count * ele_size);
}
} else {
LOGE("Unsupported data type in cast %d\n", (int)output_data_type);
return Status(TNNERR_LAYER_ERR, "Unsupported data type in cast");
}
} else {
Expand All @@ -70,7 +73,22 @@ Status ArmCastLayerAcc::DoForward(const std::vector<Blob *> &inputs, const std::
for(int i = 0; i < count; ++i) {
output_data_ptr[i] = static_cast<int>(input_data_ptr[i]);
}
} else if (input_data_type == DATA_TYPE_FLOAT &&
output_data_type == DATA_TYPE_INT8) {
auto *input_data_ptr = (float *)input_data;
auto *output_data_ptr = (int8_t *)output_data;
for(int i = 0; i < count; ++i) {
output_data_ptr[i] = static_cast<int8_t>(input_data_ptr[i]);
}
} else if (input_data_type == DATA_TYPE_INT32 &&
output_data_type == DATA_TYPE_INT8) {
auto *input_data_ptr = (int *)input_data;
auto *output_data_ptr = (int8_t *)output_data;
for(int i = 0; i < count; ++i) {
output_data_ptr[i] = static_cast<int8_t>(input_data_ptr[i]);
}
} else {
LOGE("Unsupported data type in cast input=%d output=%d\n", (int)input_data_type, (int)output_data_type);
return Status(TNNERR_LAYER_ERR, "Unsupported data type in cast");
}
}
Expand Down
17 changes: 13 additions & 4 deletions source/tnn/device/arm/acc/arm_concat_layer_acc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "tnn/utils/dims_utils.h"
#include "tnn/utils/naive_compute.h"
#include "tnn/utils/data_type_utils.h"
#include <math.h>

namespace TNN_NS {

Expand Down Expand Up @@ -216,7 +217,12 @@ static int concat_common_i8(Blob *output, const std::vector<Blob *> &inputs, int
}

static DimsVector GetCXRoundDims(const DimsVector &dims, const int round) {
DimsVector round_dims = {dims[0], UP_DIV(dims[1], round)};
DimsVector round_dims = {dims[0]};
if (dims.size() < 2) {
round_dims.push_back(0);
} else {
round_dims.push_back(UP_DIV(dims[1], round));
}
for (int i = 2; i < dims.size(); ++i) {
round_dims.push_back(dims[i]);
}
Expand All @@ -242,6 +248,8 @@ static int concat_common(Blob *output, const std::vector<Blob *> &inputs, int ax
auto input_dims = input->GetBlobDesc().dims;
DimsVector round_input_dims = GetCXRoundDims(input_dims, 4);
auto input_stride = DimsVectorUtils::Count(round_input_dims, axis);
// concat 拼接时,若原始数据不足 4,则每次至少取 1
input_stride = std::max(1, input_stride);
auto input_ptr = reinterpret_cast<T *>(GetBlobHandlePtr(input->GetHandle())) + n * input_stride;
memcpy(output_ptr, input_ptr, input_stride * sizeof(T));
output_ptr += input_stride;
Expand Down Expand Up @@ -492,14 +500,15 @@ Status ArmConcatLayerAcc::DoForward(const std::vector<Blob *> &inputs, const std
}

if (inputs[0]->GetBlobDesc().data_format == DATA_FORMAT_NCHW) {
return ExecNchw(inputs, outputs);
ExecNchw(inputs, outputs);
} else if (inputs[0]->GetBlobDesc().data_format == DATA_FORMAT_NC4HW4 ||
inputs[0]->GetBlobDesc().data_format == DATA_FORMAT_NC8HW8) {
return Exec(inputs, outputs);
Exec(inputs, outputs);
} else {
return Status(TNNERR_LAYER_ERR, "Unsupported data format in concat");
}
return TNNERR_LAYER_ERR;

return TNN_OK;
}

REGISTER_ARM_ACC(Concat, LAYER_CONCAT)
Expand Down
3 changes: 3 additions & 0 deletions source/tnn/device/arm/acc/arm_concat_layer_acc.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ class ArmConcatLayerAcc : public ArmLayerAcc {
public:
virtual ~ArmConcatLayerAcc();

template <typename T_IN, typename T_OUT>
Status TransDataType(void *data, const DimsVector &shapes, bool padding = false);

Status ExecInt8(const std::vector<Blob *> &inputs, const std::vector<Blob *> &outputs);
Status ExecNchw(const std::vector<Blob *> &inputs, const std::vector<Blob *> &outputs);
Status Exec(const std::vector<Blob *> &inputs, const std::vector<Blob *> &outputs);
Expand Down
39 changes: 39 additions & 0 deletions source/tnn/device/arm/acc/arm_equal_layer_acc.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// Tencent is pleased to support the open source community by making TNN available.
//
// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

#include "tnn/device/arm/acc/arm_binary_layer_acc.h"

namespace TNN_NS {

DECLARE_ARM_BINARY_ACC(Equal);

Status ArmEqualLayerAcc::Init(Context *context, LayerParam *param, LayerResource *resource,
const std::vector<Blob *> &inputs, const std::vector<Blob *> &outputs) {
Status status = ArmBinaryLayerAcc::Init(context, param, resource, inputs, outputs);
if (status != TNN_OK) {
return status;
}

op_type_ = ArmBinaryOpType::kEQUAL;

return TNN_OK;
}

ArmEqualLayerAcc::~ArmEqualLayerAcc() {}

REGISTER_ARM_ACC(Equal, LAYER_EQUAL)
REGISTER_ARM_PRECISION_FP16(LAYER_EQUAL)
REGISTER_ARM_LAYOUT(LAYER_EQUAL, DATA_FORMAT_NC4HW4)

} // namespace TNN_NS
4 changes: 2 additions & 2 deletions source/tnn/device/arm/acc/arm_gather_layer_acc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ Status ArmGatherLayerAcc::DoForward(const std::vector<Blob *> &inputs, const std

const int ele_size = DataTypeUtils::GetBytesSize(outputs[0]->GetBlobDesc().data_type);
auto output_data_ptr = GetBlobHandlePtr(outputs[0]->GetHandle());

for (int b = 0; b < batch; b++) {
int input_index_b = b * input_slice_count * slice_size;
int output_index_b = b * output_slice_count * slice_size;
Expand All @@ -69,7 +69,7 @@ Status ArmGatherLayerAcc::DoForward(const std::vector<Blob *> &inputs, const std
slice_index += input_slice_count;
}
if (slice_index < 0 || slice_index >= input_slice_count) {
LOGE("ArmGatherLayerAcc::Forward invalid slice_index\n");
LOGE("ArmGatherLayerAcc::Forward invalid slice_index %d %d\n", slice_index, (int)indices_data_ptr[i]);
return Status(TNNERR_MODEL_ERR, "ArmGatherLayerAcc::Forward invalid slice_index");
}
int input_index = input_index_b + slice_index * slice_size;
Expand Down
39 changes: 39 additions & 0 deletions source/tnn/device/arm/acc/arm_greater_layer_acc.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// Tencent is pleased to support the open source community by making TNN available.
//
// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

#include "tnn/device/arm/acc/arm_binary_layer_acc.h"

namespace TNN_NS {

DECLARE_ARM_BINARY_ACC(Greater);

Status ArmGreaterLayerAcc::Init(Context *context, LayerParam *param, LayerResource *resource,
const std::vector<Blob *> &inputs, const std::vector<Blob *> &outputs) {
Status status = ArmBinaryLayerAcc::Init(context, param, resource, inputs, outputs);
if (status != TNN_OK) {
return status;
}

op_type_ = ArmBinaryOpType::kGREATER;

return TNN_OK;
}

ArmGreaterLayerAcc::~ArmGreaterLayerAcc() {}

REGISTER_ARM_ACC(Greater, LAYER_GREATER)
REGISTER_ARM_PRECISION_FP16(LAYER_GREATER)
REGISTER_ARM_LAYOUT(LAYER_GREATER, DATA_FORMAT_NC4HW4)

} // namespace TNN_NS
Loading