Skip to content

Commit

Permalink
[CPU] Fix issue in reducing HW with small channel size in npsc layout (
Browse files Browse the repository at this point in the history
  • Loading branch information
xuchen-intel committed Mar 22, 2023
1 parent 9010045 commit 57c91e0
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 17 deletions.
36 changes: 19 additions & 17 deletions src/plugins/intel_cpu/src/nodes/reduce.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2147,22 +2147,24 @@ void Reduce::reduce_PLN(const uint8_t *in_ptr, uint8_t *out_ptr) {
} else if (!ReduceC && ReduceD && ReduceH && !ReduceW) {
size_t IWB = IW / blk_size;
if (ReduceDH_opt) {
// reduce parallelly in D dimension
// step1: !ReduceD && ReduceH && !ReduceW
uint8_t *prc_ptr_n = &vec_reduceDH_prc[0];
init_dst_data(prc_ptr_n, prc_size);
parallel_for2d(ID, IWB, [&](size_t id, size_t iwb){
size_t pd = id, pwb = iwb;
reduce_kernel_process(in_ptr_n + (id * IH * IW + iwb * blk_size) * src_data_size,
prc_ptr_n + (pd * PW + pwb * blk_size) * prc_data_size, blk_size, 0, IH);
});
// step2: ReduceD
reduce_stride = PW;
parallel_for(IWB, [&](size_t iwb){
size_t pwb = iwb, owb = iwb;
reduce_kernel_process(prc_ptr_n + pwb * blk_size * prc_data_size,
out_ptr_n + owb * blk_size * dst_data_size, blk_size, 0, ID);
});
if (IWB > 0) {
// reduce parallelly in D dimension
// step1: !ReduceD && ReduceH && !ReduceW
uint8_t *prc_ptr_n = &vec_reduceDH_prc[0];
init_dst_data(prc_ptr_n, prc_size);
parallel_for2d(ID, IWB, [&](size_t id, size_t iwb){
size_t pd = id, pwb = iwb;
reduce_kernel_process(in_ptr_n + (id * IH * IW + iwb * blk_size) * src_data_size,
prc_ptr_n + (pd * PW + pwb * blk_size) * prc_data_size, blk_size, 0, IH);
});
// step2: ReduceD
reduce_stride = PW;
parallel_for(IWB, [&](size_t iwb){
size_t pwb = iwb, owb = iwb;
reduce_kernel_process(prc_ptr_n + pwb * blk_size * prc_data_size,
out_ptr_n + owb * blk_size * dst_data_size, blk_size, 0, ID);
});
}
// reduce tail
reduce_stride = IW;
size_t tail_start = IWB * blk_size;
Expand Down Expand Up @@ -2740,7 +2742,7 @@ inline void Reduce::set_reduce_dim_flags() {
ReduceH = IH != OH && OH == 1;
ReduceW = IW != OW && OW == 1;

// must be done before the above dimension change
// must be done after the above dimension change
create_DH_working_memory();

// suit for parallel
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,10 @@ const std::vector<std::vector<int>> axes5DFusing = {
{0, 2, 4},
};

const std::vector<std::vector<int>> axesHW = {
{2, 3}
};

std::vector<CommonTestUtils::OpType> opTypes = {
CommonTestUtils::OpType::SCALAR,
CommonTestUtils::OpType::VECTOR,
Expand Down Expand Up @@ -294,6 +298,11 @@ std::vector<std::vector<ov::test::InputShape>> inputShapes_Int32 = {
{{{{1, 5}, 19, {1, 5}, {1, 10}}, {{2, 19, 2, 2}, {2, 19, 2, 3}}}},
};

std::vector<std::vector<ov::test::InputShape>> inputShapes_SmallChannel = {
{{{}, {{2, 3, 2, 9}}}},
{{{{1, 5}, 3, {1, 5}, {1, 10}}, {{2, 3, 2, 2}, {2, 3, 2, 9}}}},
};

std::vector<CPUSpecificParams> cpuParams_4D = {
CPUSpecificParams({nChw16c}, {nChw16c}, {}, {}),
CPUSpecificParams({nchw}, {nchw}, {}, {}),
Expand All @@ -316,6 +325,10 @@ std::vector<CPUSpecificParams> cpuParams_HybridLayout_5D = {
CPUSpecificParams({ndhwc}, {}, {}, {})
};

std::vector<CPUSpecificParams> cpuParams_NHWC_4D = {
CPUSpecificParams({nhwc}, {nhwc}, {}, {})
};

const std::vector<fusingSpecificParams> fusingParamsSet {
/* activations */
fusingSwish,
Expand Down Expand Up @@ -431,6 +444,19 @@ const auto params_Int32 = testing::Combine(
testing::Values(emptyCPUSpec),
testing::Values(emptyFusingSpec));

const auto params_NHWC_SmallChannel = testing::Combine(
testing::Combine(
testing::ValuesIn(axesHW),
testing::Values(CommonTestUtils::OpType::VECTOR),
testing::Values(true),
testing::ValuesIn(reductionTypes),
testing::ValuesIn(inpOutPrc),
testing::Values(ElementType::undefined),
testing::Values(ElementType::undefined),
testing::ValuesIn(inputShapes_SmallChannel)),
testing::ValuesIn(filterCPUSpecificParams(cpuParams_NHWC_4D)),
testing::Values(emptyFusingSpec));

INSTANTIATE_TEST_SUITE_P(
smoke_Reduce_OneAxis_CPU,
ReduceCPULayerTest,
Expand Down Expand Up @@ -480,6 +506,13 @@ INSTANTIATE_TEST_SUITE_P(
ReduceCPULayerTest::getTestCaseName
);

INSTANTIATE_TEST_SUITE_P(
smoke_Reducea_NHWC_SmallChannel_CPU,
ReduceCPULayerTest,
params_NHWC_SmallChannel,
ReduceCPULayerTest::getTestCaseName
);

/* ================================ 1.2 No fusion - Logical ================================ */
const auto params_OneAxis_Logical = testing::Combine(
testing::Combine(
Expand Down

0 comments on commit 57c91e0

Please sign in to comment.