Skip to content

Commit 73ea8d1

Browse files
fbarchardxnnpack-bot
authored andcommitted
neondot qs8 rsum use const for remainder masking
PiperOrigin-RevId: 635010053
1 parent 22ff33b commit 73ea8d1

7 files changed

+15
-15
lines changed

src/qs8-rsum/gen/qs8-rsum-minmax-fp32-neondot-u16.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,8 @@ void xnn_qs8_rsum_minmax_fp32_ukernel__neondot_u16(
3838
vacc0 = vdotq_s32(vacc0, vt, vone);
3939
}
4040
if (XNN_UNLIKELY(batch != 0)) {
41-
int8x16_t vt = vld1q_s8(input);
42-
vone = vld1q_s8(&params->fp32_neon.mask_table[15 - batch]);
41+
const int8x16_t vt = vld1q_s8(input);
42+
const vone = vld1q_s8(&params->fp32_neon.mask_table[15 - batch]);
4343
vacc0 = vdotq_s32(vacc0, vt, vone);
4444
}
4545
}

src/qs8-rsum/gen/qs8-rsum-minmax-fp32-neondot-u32-acc2.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,8 @@ void xnn_qs8_rsum_minmax_fp32_ukernel__neondot_u32_acc2(
4141
vacc0 = vdotq_s32(vacc0, vt, vone);
4242
}
4343
if (XNN_UNLIKELY(batch != 0)) {
44-
int8x16_t vt = vld1q_s8(input);
45-
vone = vld1q_s8(&params->fp32_neon.mask_table[15 - batch]);
44+
const int8x16_t vt = vld1q_s8(input);
45+
const vone = vld1q_s8(&params->fp32_neon.mask_table[15 - batch]);
4646
vacc0 = vdotq_s32(vacc0, vt, vone);
4747
}
4848
}

src/qs8-rsum/gen/qs8-rsum-minmax-fp32-neondot-u32.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,8 @@ void xnn_qs8_rsum_minmax_fp32_ukernel__neondot_u32(
4040
vacc0 = vdotq_s32(vacc0, vt, vone);
4141
}
4242
if (XNN_UNLIKELY(batch != 0)) {
43-
int8x16_t vt = vld1q_s8(input);
44-
vone = vld1q_s8(&params->fp32_neon.mask_table[15 - batch]);
43+
const int8x16_t vt = vld1q_s8(input);
44+
const vone = vld1q_s8(&params->fp32_neon.mask_table[15 - batch]);
4545
vacc0 = vdotq_s32(vacc0, vt, vone);
4646
}
4747
}

src/qs8-rsum/gen/qs8-rsum-minmax-fp32-neondot-u64-acc2.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,8 @@ void xnn_qs8_rsum_minmax_fp32_ukernel__neondot_u64_acc2(
4545
vacc0 = vdotq_s32(vacc0, vt, vone);
4646
}
4747
if (XNN_UNLIKELY(batch != 0)) {
48-
int8x16_t vt = vld1q_s8(input);
49-
vone = vld1q_s8(&params->fp32_neon.mask_table[15 - batch]);
48+
const int8x16_t vt = vld1q_s8(input);
49+
const vone = vld1q_s8(&params->fp32_neon.mask_table[15 - batch]);
5050
vacc0 = vdotq_s32(vacc0, vt, vone);
5151
}
5252
}

src/qs8-rsum/gen/qs8-rsum-minmax-fp32-neondot-u64-acc4.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,14 +47,14 @@ void xnn_qs8_rsum_minmax_fp32_ukernel__neondot_u64_acc4(
4747
vacc0 = vdotq_s32(vacc0, vt, vone);
4848
}
4949
if (XNN_UNLIKELY(batch != 0)) {
50-
int8x16_t vt = vld1q_s8(input);
51-
vone = vld1q_s8(&params->fp32_neon.mask_table[15 - batch]);
50+
const int8x16_t vt = vld1q_s8(input);
51+
const vone = vld1q_s8(&params->fp32_neon.mask_table[15 - batch]);
5252
vacc0 = vdotq_s32(vacc0, vt, vone);
5353
}
5454
}
5555
vacc0 = vaddq_s32(vacc0, vacc1);
56+
vacc2 = vaddq_s32(vacc2, vacc3);
5657
vacc0 = vaddq_s32(vacc0, vacc2);
57-
vacc0 = vaddq_s32(vacc0, vacc3);
5858
int32x2_t vacc_lo = vadd_s32(vget_low_s32(vacc0), vget_high_s32(vacc0));
5959
vacc_lo = vpadd_s32(vacc_lo, vacc_lo);
6060

src/qs8-rsum/gen/qs8-rsum-minmax-fp32-neondot-u64.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,8 @@ void xnn_qs8_rsum_minmax_fp32_ukernel__neondot_u64(
4444
vacc0 = vdotq_s32(vacc0, vt, vone);
4545
}
4646
if (XNN_UNLIKELY(batch != 0)) {
47-
int8x16_t vt = vld1q_s8(input);
48-
vone = vld1q_s8(&params->fp32_neon.mask_table[15 - batch]);
47+
const int8x16_t vt = vld1q_s8(input);
48+
const vone = vld1q_s8(&params->fp32_neon.mask_table[15 - batch]);
4949
vacc0 = vdotq_s32(vacc0, vt, vone);
5050
}
5151
}

src/qs8-rsum/neondot.c.in

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,8 @@ void xnn_qs8_rsum_minmax_${REQUANTIZATION.lower()}_ukernel__neondot_u${CHANNEL_T
4545
vacc0 = vdotq_s32(vacc0, vt, vone);
4646
}
4747
if (XNN_UNLIKELY(batch != 0)) {
48-
int8x16_t vt = vld1q_s8(input);
49-
vone = vld1q_s8(&params->fp32_neon.mask_table[15 - batch]);
48+
const int8x16_t vt = vld1q_s8(input);
49+
const vone = vld1q_s8(&params->fp32_neon.mask_table[15 - batch]);
5050
vacc0 = vdotq_s32(vacc0, vt, vone);
5151
}
5252
}

0 commit comments

Comments
 (0)