diff --git a/source/tnn/core/const_folder.cc b/source/tnn/core/const_folder.cc index 4df4451e4..89985561b 100644 --- a/source/tnn/core/const_folder.cc +++ b/source/tnn/core/const_folder.cc @@ -161,7 +161,7 @@ Status ConstFolder::Forward() { std::stringstream ss; ss << "<" << blob->GetBlobDesc().name << "> shape:["; for(int i: blob->GetBlobDesc().dims) {ss << i << ","; } ss << "]"; - LOGD("ConstFolder save const with name: %s\n", ss.str().c_str()); + LOGD("ConstFolder save const with name: %s, dtype=%d\n", ss.str().c_str(), blob->GetBlobDesc().data_type); } #endif diff --git a/source/tnn/core/default_network.cc b/source/tnn/core/default_network.cc index 6294f5be4..d48bdf39a 100644 --- a/source/tnn/core/default_network.cc +++ b/source/tnn/core/default_network.cc @@ -234,6 +234,7 @@ Status DefaultNetwork::InitLayers(NetStructure *net_structure, NetResource *net_ cur_layer->InferShapeAhead(inputs, outputs_for_shape, layer_info->param.get(), net_resource->resource_map[layer_name].get()); + LOGD("InferShapeAhead Output Shape: [%s]\n", cur_layer->GetOutputBlobs()[0]->GetBlobDesc().description().c_str()); delete cur_layer; } diff --git a/source/tnn/device/arm/acc/arm_expand_layer_acc.cc b/source/tnn/device/arm/acc/arm_expand_layer_acc.cc index d1012ab1e..2bc608367 100644 --- a/source/tnn/device/arm/acc/arm_expand_layer_acc.cc +++ b/source/tnn/device/arm/acc/arm_expand_layer_acc.cc @@ -21,6 +21,10 @@ namespace TNN_NS { ArmExpandLayerAcc::~ArmExpandLayerAcc() {} +bool ArmExpandLayerAcc::UseNaiveConstantBlobs() { + return true; +} + Status ArmExpandLayerAcc::InferRuntimeOutputShape(const std::vector &inputs, const std::vector &outputs) { auto expand_param = dynamic_cast(param_); CHECK_PARAM_NULL(expand_param); diff --git a/source/tnn/device/arm/acc/arm_expand_layer_acc.h b/source/tnn/device/arm/acc/arm_expand_layer_acc.h index 81d1ecf12..75f8344b2 100644 --- a/source/tnn/device/arm/acc/arm_expand_layer_acc.h +++ b/source/tnn/device/arm/acc/arm_expand_layer_acc.h @@ -27,6 +27,9 @@ class ArmExpandLayerAcc : public ArmLayerAcc { virtual Status InferRuntimeOutputShape(const std::vector &inputs, const std::vector &outputs) override; virtual Status DoForward(const std::vector &inputs, const std::vector &outputs) override; + +protected: + virtual bool UseNaiveConstantBlobs() override; }; } // namespace TNN_NS diff --git a/source/tnn/device/arm/acc/arm_padv2_layer_acc.cc b/source/tnn/device/arm/acc/arm_padv2_layer_acc.cc index 57dc0f825..62bae1d7b 100644 --- a/source/tnn/device/arm/acc/arm_padv2_layer_acc.cc +++ b/source/tnn/device/arm/acc/arm_padv2_layer_acc.cc @@ -32,7 +32,7 @@ Status ArmPadV2LayerAcc::DoForward(const std::vector &inputs, const std: auto input_dims = input_blob->GetBlobDesc().dims; auto output_dims = output_blob->GetBlobDesc().dims; PadUtils::PadContext pad_context; - if (input_dims.size() < 2 || input_dims.size() > 5) { + if (input_dims.size() < 2 || input_dims.size() > 6) { LOGE("Error: ArmPadV2LayerAcc layer acc does not support input dims size %lu\n", input_dims.size()); return Status(TNNERR_MODEL_ERR, "Error: ArmPadV2LayerAcc layer acc does not support;"); } @@ -62,7 +62,7 @@ Status ArmPadV2LayerAcc::DoForward(const std::vector &inputs, const std: pad_context.output_width = output_dims[4]; } const auto pads = layer_param->pads; - if (pads.size() < 2 || pads.size() > 10) { + if (pads.size() < 2 || pads.size() > 12) { LOGE("Error: ArmPadV2LayerAcc layer acc does not support pas size %lu\n", pads.size()); return Status(TNNERR_MODEL_ERR, "Error: ArmPadV2LayerAcc layer acc does not support"); } @@ -107,6 +107,9 @@ Status ArmPadV2LayerAcc::DoForward(const std::vector &inputs, const std: pad_context.pad_r = pads[9]; // pad width end break; } + case 12: { + pad_context.pads = pads; + } } pad_context.type = layer_param->type; diff --git a/source/tnn/device/arm/acc/arm_scatter_nd_layer_acc.cc b/source/tnn/device/arm/acc/arm_scatter_nd_layer_acc.cc index 3c6025dab..3a737ebd2 100644 --- a/source/tnn/device/arm/acc/arm_scatter_nd_layer_acc.cc +++ b/source/tnn/device/arm/acc/arm_scatter_nd_layer_acc.cc @@ -19,7 +19,11 @@ namespace TNN_NS { -DECLARE_ARM_ACC(ScatterND, LAYER_SCATTER_ND); +DECLARE_ARM_ACC_WITH_EXTRA(ScatterND, LAYER_SCATTER_ND, + virtual bool UseNaiveConstantBlobs() { + return true; + } +); Status ArmScatterNDLayerAcc::DoForward(const std::vector& inputs, const std::vector& outputs) { auto resource = dynamic_cast(resource_); diff --git a/source/tnn/device/arm/acc/arm_stride_slice_layer_acc.cc b/source/tnn/device/arm/acc/arm_stride_slice_layer_acc.cc index f75c1a36a..157eb1ebd 100644 --- a/source/tnn/device/arm/acc/arm_stride_slice_layer_acc.cc +++ b/source/tnn/device/arm/acc/arm_stride_slice_layer_acc.cc @@ -161,7 +161,12 @@ Status ArmStrideSliceLayerAcc::DoForward(const std::vector &inputs, cons REGISTER_ARM_ACC(StrideSlice, LAYER_STRIDED_SLICE) REGISTER_ARM_LAYOUT(LAYER_STRIDED_SLICE, DATA_FORMAT_NC4HW4) -DECLARE_ARM_ACC(StrideSliceV2, LAYER_STRIDED_SLICE_V2); + +DECLARE_ARM_ACC_WITH_EXTRA(StrideSliceV2, LAYER_STRIDED_SLICE_V2, + virtual bool UseNaiveConstantBlobs() { + return true; + } +); static Status FastSliceForHW(const std::vector &inputs, const std::vector &outputs, StrideSliceV2LayerParam *param) { diff --git a/source/tnn/device/arm/acc/compute/pad_function.cc b/source/tnn/device/arm/acc/compute/pad_function.cc index 686b4b084..4e374ecc6 100644 --- a/source/tnn/device/arm/acc/compute/pad_function.cc +++ b/source/tnn/device/arm/acc/compute/pad_function.cc @@ -50,6 +50,92 @@ static void CommonPadImpl(float *input_data, float *output_data, int batch_c_r4, } } +// Common pad in height and width directions +static Status CommonPadDimSixImpl(float *input_data, float *output_data, int batch_c_r4, DimsVector input_dims, DimsVector output_dims, DimsVector pads, Float4 &vvalue) { + if (input_dims[2] != output_dims[2] || input_dims[4] != output_dims[4]) { + LOGE("CommonPadDimSixImpl(const type) only support 3 & 5 dims\n"); + return Status(TNNERR_UNKNOWN_LAYER, "CommonPadDimSixImpl(const type) only support 3 & 5 dims"); + } + + int height = input_dims[2]; + int width = input_dims[4]; + + int in_h = input_dims[3]; + int in_w = input_dims[5]; + int out_h = output_dims[3]; + int out_w = output_dims[5]; + + int pad_t = pads[3]; + int pad_b = pads[9]; + int pad_l = pads[5]; + int pad_r = pads[11]; + + /* + printf("input_dims: "); + for (auto dim : input_dims) { + printf("%d, ", dim); + } + printf("\n"); + + printf("output_dims:"); + for (auto dim : output_dims) { + printf("%d, ", dim); + } + printf("\n"); + + printf("pads: "); + for (auto dim : pads) { + printf("%d, ", dim); + } + printf("\n"); + + input_dims: 1, 512, 32, 1, 32, 1, + output_dims:1, 512, 32, 2, 32, 2, + pads: 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, + */ + + for (int c = 0; c < batch_c_r4; c += 4) { + auto input_ptr_c = input_data + c * height * in_h * width * in_w; + auto output_ptr_c = output_data + c * height * out_h * width * out_w; + + for (int hi = 0; hi < height; ++hi) { + if (pad_t) + for (int i = 0; i < width * out_w * pad_t; ++i) + Float4::save(output_ptr_c + i * 4, vvalue); + + for (int h = 0; h < in_h; ++h) { + auto output_ptr_h = output_ptr_c + width * out_w * (h + pad_t) * 4; + auto input_ptr_h = input_ptr_c + width * in_w * h * 4; + + for (int w = 0; w < width; ++w) { + auto output_ptr_w = output_ptr_h + w * out_w * 4; + auto input_ptr_w = input_ptr_h + w * in_w * 4; + + for (int i = 0; i < pad_l; i++) + Float4::save(output_ptr_w + i * 4, vvalue); + + memcpy(output_ptr_w + pad_l * 4, input_ptr_w, in_w * sizeof(float) * 4); + + for (int i = in_w + pad_l; i < out_w; i++) + Float4::save(output_ptr_w + i * 4, vvalue); + } + } + + if (pad_b) { + auto output_ptr_h = output_ptr_c + width * out_w * (in_h + pad_t) * 4; + for (int i = 0; i < width * out_w * pad_b; ++i) + Float4::save(output_ptr_h + i * 4, vvalue); + } + + input_ptr_c += in_h * width * in_w * 4; + output_ptr_c += out_h * width * out_w * 4; + } + + } + + return TNN_OK; +} + static void CalculatePad(Float4 &src, const Float4 &vvalue, const int padded_zero) { if (padded_zero) src = Float4::pad(src, vvalue, padded_zero); @@ -218,9 +304,9 @@ static void ChannelPadImpl(float *input_data, float *output_data, int batch, int Status PadUtils::ConstPadV2(float *input_data, float *output_data, DimsVector input_dims, DimsVector output_dims, PadContext context) { - if (input_dims.size() < 2 || input_dims.size() > 5) { - LOGE("Arm PadV2(const type) only support 2 - 5 dims\n"); - return Status(TNNERR_UNKNOWN_LAYER, "Arm PadV2 only support 2 - 5 dims"); + if (input_dims.size() < 2 || input_dims.size() > 6) { + LOGE("Arm PadV2(const type) only support 2 - 6 dims\n"); + return Status(TNNERR_UNKNOWN_LAYER, "Arm PadV2 only support 2 - 6 dims"); } const int batch = context.output_batch; const int oc_r4 = context.output_channel_r4; @@ -237,7 +323,16 @@ Status PadUtils::ConstPadV2(float *input_data, float *output_data, DimsVector in const int pad_b = context.pad_b; const int pad_r = context.pad_r; Float4 value_v = Float4(context.value); - + + if (input_dims.size() == 6) { + if (input_dims[0] == output_dims[0] && context.pads[1] == context.pads[7]) { + return CommonPadDimSixImpl(input_data, output_data, batch * oc_r4, input_dims, output_dims, context.pads, value_v); + } else { + LOGE("Arm PadV2(const type) with 6 dims do not support channel dim\n"); + return Status(TNNERR_UNKNOWN_LAYER, "Arm PadV2(const type) with 6 dims do not support channel dim"); + } + } + //ncdhw, extend dim except the batch n if (context.input_batch == context.output_batch) { if (pad_c_b == 0 && pad_c_e == 0) { diff --git a/source/tnn/device/arm/acc/compute/pad_function.h b/source/tnn/device/arm/acc/compute/pad_function.h index c76e74041..3f56a3e75 100644 --- a/source/tnn/device/arm/acc/compute/pad_function.h +++ b/source/tnn/device/arm/acc/compute/pad_function.h @@ -47,6 +47,7 @@ class PadUtils { int32_t pad_b = 0; int32_t pad_l = 0; int32_t pad_r = 0; + DimsVector pads; int32_t type = 0; float value = 0.0f; }; diff --git a/source/tnn/device/cpu/acc/cpu_layer_acc.cc b/source/tnn/device/cpu/acc/cpu_layer_acc.cc index 1ad4399a7..d77564d07 100644 --- a/source/tnn/device/cpu/acc/cpu_layer_acc.cc +++ b/source/tnn/device/cpu/acc/cpu_layer_acc.cc @@ -55,13 +55,22 @@ Status CpuLayerAcc::ReloadConstantBlobs(const std::vector &inputs, bool if (const_blob_map.find(name) != const_blob_map.end()) { blob = const_blob_map[name]; } - auto status = RawBuffer2Blob(buffer.get(), blob); + + auto buffer_cvt = RawBuffer(); + if (buffer->GetDataType() == DATA_TYPE_HALF) { + buffer_cvt = ConvertHalfHandle(*(buffer.get())); + } else { + buffer_cvt = *(buffer.get()); + } + + auto status = RawBuffer2Blob(&buffer_cvt, blob); RETURN_ON_NEQ(status, TNN_OK); blob->SetFlag(DATA_FLAG_CHANGE_NEVER); const_blob_map[name] = blob; iter->SetHandle(blob->GetHandle()); - LOGD("Reload constant blob: %s %p\n", name.c_str(), &blob); + iter->SetBlobDesc(blob->GetBlobDesc()); + LOGD("Reload constant blob: %s %p, dtype=%d\n", name.c_str(), &blob, blob->GetBlobDesc().data_type); } const_blob_map_ = const_blob_map; return TNN_OK; diff --git a/source/tnn/layer/conv_layer.cc b/source/tnn/layer/conv_layer.cc index f5cd2ade5..1d304ab41 100644 --- a/source/tnn/layer/conv_layer.cc +++ b/source/tnn/layer/conv_layer.cc @@ -32,9 +32,14 @@ Status ConvLayer::InferOutputShape(bool ignore_error) { ConvLayerParam* conv_param = dynamic_cast(param_); CHECK_PARAM_NULL(conv_param); - int num = input_blob->GetBlobDesc().dims[0]; - int height = input_blob->GetBlobDesc().dims[2]; - int width = input_blob->GetBlobDesc().dims[3]; + if (input_blob->GetBlobDesc().dims.size() == 0) { + LOGE_IF(!ignore_error, "Error: dims not supported\n"); + return Status(TNNERR_PARAM_ERR, "Error: dims not supported"); + } + + int num = DimsVectorUtils::Count(input_blob->GetBlobDesc().dims, 0, 1); + int height = DimsVectorUtils::Count(input_blob->GetBlobDesc().dims, 2, 3); + int width = DimsVectorUtils::Count(input_blob->GetBlobDesc().dims, 3, 4); const int pad_w_begin = conv_param->pads[0]; const int pad_h_begin = conv_param->pads[2]; diff --git a/source/tnn/layer/pad_layer.cc b/source/tnn/layer/pad_layer.cc index 76dae2e06..5d413ca91 100644 --- a/source/tnn/layer/pad_layer.cc +++ b/source/tnn/layer/pad_layer.cc @@ -36,6 +36,11 @@ Status PadLayer::InferOutputShape(bool ignore_error) { Blob* input_blob = input_blobs_[0]; Blob* output_blob = output_blobs_[0]; auto dims = input_blob->GetBlobDesc().dims; + if (dims.size() < 4) { + LOGE_IF(!ignore_error, "Error: dims not supported\n"); + return Status(TNNERR_PARAM_ERR, "Error: dims not supported"); + } + dims[3] += layer_param->pads[0] + layer_param->pads[1]; dims[2] += layer_param->pads[2] + layer_param->pads[3]; dims[1] += layer_param->pads[4] + layer_param->pads[5]; diff --git a/source/tnn/optimizer/graph_matcher/ir.cc b/source/tnn/optimizer/graph_matcher/ir.cc index 264b760f4..8703f6164 100644 --- a/source/tnn/optimizer/graph_matcher/ir.cc +++ b/source/tnn/optimizer/graph_matcher/ir.cc @@ -581,7 +581,7 @@ namespace TNN_NS { RETURN_ON_FAIL(IsConnectedGraph(this, connected)); } if (!connected) { - ERRORV("the graph is not connected.", msg); + DEBUGV("the graph is not connected.", msg); return Status(TNNERR_COMMON_ERROR, msg); } return TNN_OK; diff --git a/source/tnn/optimizer/graph_matcher/logger.h b/source/tnn/optimizer/graph_matcher/logger.h index be2df52b1..f414c8f60 100644 --- a/source/tnn/optimizer/graph_matcher/logger.h +++ b/source/tnn/optimizer/graph_matcher/logger.h @@ -144,6 +144,12 @@ class Logger { ::TNN_NS::Logger::instance().log(std::string(__ss), ::TNN_NS::Logger::kLogLevelDebug());\ } while(0) +#define DEBUGV(f_, vname_, ...) \ + __LOG_FORMAT(vname_, 2000, f_, ##__VA_ARGS__); \ + do { \ + ::TNN_NS::Logger::instance().log(std::string(vname_), ::TNN_NS::Logger::kLogLevelDebug());\ + } while(0) + #define INFO(f_, ...) \ do { \ __LOG_FORMAT(__ss, 2000, f_, ##__VA_ARGS__); \ diff --git a/source/tnn/optimizer/net_optimizer_convert_matmul_to_conv.cc b/source/tnn/optimizer/net_optimizer_convert_matmul_to_conv.cc index ab3c14307..ba896cecb 100644 --- a/source/tnn/optimizer/net_optimizer_convert_matmul_to_conv.cc +++ b/source/tnn/optimizer/net_optimizer_convert_matmul_to_conv.cc @@ -74,7 +74,7 @@ namespace optimizer { std::shared_ptr graph = std::make_shared(); auto status = graph->fromInterpreted(structure, resource); if (status != TNN_OK) { - LOGE("%s", status.description().c_str()); + LOGD("%s", status.description().c_str()); return TNN_OK; } diff --git a/source/tnn/utils/dims_function_utils.cc b/source/tnn/utils/dims_function_utils.cc index dbc7954bc..f6bbe3ed2 100644 --- a/source/tnn/utils/dims_function_utils.cc +++ b/source/tnn/utils/dims_function_utils.cc @@ -51,6 +51,13 @@ DimsVector DimsFunctionUtils::Expand(DimsVector dims0, DimsVector dims1, Status DimsVector DimsFunctionUtils::Upsample(const DimsVector input_dims, std::vector scales, std::vector sizes, int mode, Status *status) { + if (input_dims.size() == 0) { + if (status) { + *status = Status(TNNERR_PARAM_ERR, "unsupport upsample input dims"); + } + return DimsVector(); + } + int num = input_dims[0]; int channels = input_dims[1]; int height = input_dims[2]; @@ -108,6 +115,13 @@ DimsVector DimsFunctionUtils::Reshape(const DimsVector input_dims, const DimsVec int output_size = shape.size() + axis; DimsVector output_dims(output_size, 1); + if (input_dims.size() == 0) { + if (status) { + *status = Status(TNNERR_LAYER_ERR, "reshape input_dims size error"); + } + return DimsVector(); + } + for(int i = 0; i < axis; ++i) { output_dims[i] = input_dims[i]; } diff --git a/test/unit_test/layer_test/test_padv2_layer.cc b/test/unit_test/layer_test/test_padv2_layer.cc index a8b8a8769..02d1edb45 100644 --- a/test/unit_test/layer_test/test_padv2_layer.cc +++ b/test/unit_test/layer_test/test_padv2_layer.cc @@ -34,7 +34,7 @@ INSTANTIATE_TEST_SUITE_P(LayerTest, PadV2LayerTest, // pad_type testing::Values(0, 1), // dim size - testing::Values(3, 4, 5), + testing::Values(3, 4, 5, 6), // pad value testing::Values(-FLT_MAX, 0, 2, FLT_MAX))); @@ -67,8 +67,8 @@ TEST_P(PadV2LayerTest, PadV2Layer) { if (!(DEVICE_CUDA == dev || DEVICE_ARM == dev || DEVICE_OPENCL == dev || DEVICE_METAL == dev)) { GTEST_SKIP(); } - // arm only support dims size 4 - if (DEVICE_ARM == dev && dim_count != 4) { + // arm only support dims size 4 and 6 + if (DEVICE_ARM == dev && (dim_count != 4 && dim_count != 6)) { GTEST_SKIP(); } // opnecl only support dims size 4 @@ -81,6 +81,20 @@ TEST_P(PadV2LayerTest, PadV2Layer) { GTEST_SKIP(); } + if (dim_count == 6) { + // only arm support dims size 6 + if (DEVICE_ARM != dev) { + GTEST_SKIP(); + } + if (pad_type != 0) { + GTEST_SKIP(); + } + // reduce test time + if (input_size >= 10) { + GTEST_SKIP(); + } + } + // param std::shared_ptr param(new PadLayerParam()); param->name = "PadV2"; @@ -93,6 +107,8 @@ TEST_P(PadV2LayerTest, PadV2Layer) { param->pads = {0, pad_c, pad_h, pad_w, 0, pad_c, pad_h, pad_w}; } else if (dim_count == 5) { param->pads = {0, pad_c, pad_h, pad_w, pad_w, 0, pad_c, pad_h, pad_w, pad_w}; + } else if (dim_count == 6) { + param->pads = {0, 0, 0, pad_h, 0, pad_w, 0, 0, 0, pad_h, 0, pad_w}; } param->value = value;