Skip to content

Commit

Permalink
neondot qs8 rsum use const for remainder masking
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 635010053
  • Loading branch information
fbarchard authored and xnnpack-bot committed May 18, 2024
1 parent 22ff33b commit 73ea8d1
Show file tree
Hide file tree
Showing 7 changed files with 15 additions and 15 deletions.
4 changes: 2 additions & 2 deletions src/qs8-rsum/gen/qs8-rsum-minmax-fp32-neondot-u16.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ void xnn_qs8_rsum_minmax_fp32_ukernel__neondot_u16(
vacc0 = vdotq_s32(vacc0, vt, vone);
}
if (XNN_UNLIKELY(batch != 0)) {
int8x16_t vt = vld1q_s8(input);
vone = vld1q_s8(&params->fp32_neon.mask_table[15 - batch]);
const int8x16_t vt = vld1q_s8(input);
const vone = vld1q_s8(&params->fp32_neon.mask_table[15 - batch]);
vacc0 = vdotq_s32(vacc0, vt, vone);
}
}
Expand Down
4 changes: 2 additions & 2 deletions src/qs8-rsum/gen/qs8-rsum-minmax-fp32-neondot-u32-acc2.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ void xnn_qs8_rsum_minmax_fp32_ukernel__neondot_u32_acc2(
vacc0 = vdotq_s32(vacc0, vt, vone);
}
if (XNN_UNLIKELY(batch != 0)) {
int8x16_t vt = vld1q_s8(input);
vone = vld1q_s8(&params->fp32_neon.mask_table[15 - batch]);
const int8x16_t vt = vld1q_s8(input);
const vone = vld1q_s8(&params->fp32_neon.mask_table[15 - batch]);
vacc0 = vdotq_s32(vacc0, vt, vone);
}
}
Expand Down
4 changes: 2 additions & 2 deletions src/qs8-rsum/gen/qs8-rsum-minmax-fp32-neondot-u32.c
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ void xnn_qs8_rsum_minmax_fp32_ukernel__neondot_u32(
vacc0 = vdotq_s32(vacc0, vt, vone);
}
if (XNN_UNLIKELY(batch != 0)) {
int8x16_t vt = vld1q_s8(input);
vone = vld1q_s8(&params->fp32_neon.mask_table[15 - batch]);
const int8x16_t vt = vld1q_s8(input);
const vone = vld1q_s8(&params->fp32_neon.mask_table[15 - batch]);
vacc0 = vdotq_s32(vacc0, vt, vone);
}
}
Expand Down
4 changes: 2 additions & 2 deletions src/qs8-rsum/gen/qs8-rsum-minmax-fp32-neondot-u64-acc2.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ void xnn_qs8_rsum_minmax_fp32_ukernel__neondot_u64_acc2(
vacc0 = vdotq_s32(vacc0, vt, vone);
}
if (XNN_UNLIKELY(batch != 0)) {
int8x16_t vt = vld1q_s8(input);
vone = vld1q_s8(&params->fp32_neon.mask_table[15 - batch]);
const int8x16_t vt = vld1q_s8(input);
const vone = vld1q_s8(&params->fp32_neon.mask_table[15 - batch]);
vacc0 = vdotq_s32(vacc0, vt, vone);
}
}
Expand Down
6 changes: 3 additions & 3 deletions src/qs8-rsum/gen/qs8-rsum-minmax-fp32-neondot-u64-acc4.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,14 @@ void xnn_qs8_rsum_minmax_fp32_ukernel__neondot_u64_acc4(
vacc0 = vdotq_s32(vacc0, vt, vone);
}
if (XNN_UNLIKELY(batch != 0)) {
int8x16_t vt = vld1q_s8(input);
vone = vld1q_s8(&params->fp32_neon.mask_table[15 - batch]);
const int8x16_t vt = vld1q_s8(input);
const vone = vld1q_s8(&params->fp32_neon.mask_table[15 - batch]);
vacc0 = vdotq_s32(vacc0, vt, vone);
}
}
vacc0 = vaddq_s32(vacc0, vacc1);
vacc2 = vaddq_s32(vacc2, vacc3);
vacc0 = vaddq_s32(vacc0, vacc2);
vacc0 = vaddq_s32(vacc0, vacc3);
int32x2_t vacc_lo = vadd_s32(vget_low_s32(vacc0), vget_high_s32(vacc0));
vacc_lo = vpadd_s32(vacc_lo, vacc_lo);

Expand Down
4 changes: 2 additions & 2 deletions src/qs8-rsum/gen/qs8-rsum-minmax-fp32-neondot-u64.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ void xnn_qs8_rsum_minmax_fp32_ukernel__neondot_u64(
vacc0 = vdotq_s32(vacc0, vt, vone);
}
if (XNN_UNLIKELY(batch != 0)) {
int8x16_t vt = vld1q_s8(input);
vone = vld1q_s8(&params->fp32_neon.mask_table[15 - batch]);
const int8x16_t vt = vld1q_s8(input);
const vone = vld1q_s8(&params->fp32_neon.mask_table[15 - batch]);
vacc0 = vdotq_s32(vacc0, vt, vone);
}
}
Expand Down
4 changes: 2 additions & 2 deletions src/qs8-rsum/neondot.c.in
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ void xnn_qs8_rsum_minmax_${REQUANTIZATION.lower()}_ukernel__neondot_u${CHANNEL_T
vacc0 = vdotq_s32(vacc0, vt, vone);
}
if (XNN_UNLIKELY(batch != 0)) {
int8x16_t vt = vld1q_s8(input);
vone = vld1q_s8(&params->fp32_neon.mask_table[15 - batch]);
const int8x16_t vt = vld1q_s8(input);
const vone = vld1q_s8(&params->fp32_neon.mask_table[15 - batch]);
vacc0 = vdotq_s32(vacc0, vt, vone);
}
}
Expand Down

0 comments on commit 73ea8d1

Please sign in to comment.