Fix f16-vclamp element count

PiperOrigin-RevId: 634133132
google · May 16, 2024 · 9b338a6 · 9b338a6
1 parent 8d5f4e0
commit 9b338a6
Show file tree

Hide file tree

Showing 5 changed files with 10 additions and 15 deletions.
diff --git a/src/f16-vclamp/gen/f16-vclamp-rvvfp16arith-u1v.c b/src/f16-vclamp/gen/f16-vclamp-rvvfp16arith-u1v.c
@@ -12,7 +12,6 @@
 #include <riscv_vector.h>
 
 #include <xnnpack/common.h>
-#include <xnnpack/intrinsics-polyfill.h>
 #include <xnnpack/vunary.h>
 
 
@@ -23,7 +22,7 @@ void xnn_f16_vclamp_ukernel__rvvfp16arith_u1v(
     const union xnn_f16_minmax_params params[restrict XNN_MIN_ELEMENTS(1)])
 {
   assert(batch != 0);
-  assert(batch % sizeof(float) == 0);
+  assert(batch % sizeof(_Float16) == 0);
   assert(input != NULL);
   assert(output != NULL);
 
@@ -33,7 +32,7 @@ void xnn_f16_vclamp_ukernel__rvvfp16arith_u1v(
   const _Float16 vmin = params->fp16arith.min;
   const _Float16 vmax = params->fp16arith.max;
 
-  batch >>= XNN_LOG2_SIZEOF_FLOAT;
+  batch >>= XNN_LOG2_SIZEOF_HALF;
   do {
     const size_t n = __riscv_vsetvl_e16m1(batch);
     vfloat16m1_t vacc = __riscv_vle16_v_f16m1((const void*) i, n);

diff --git a/src/f16-vclamp/gen/f16-vclamp-rvvfp16arith-u2v.c b/src/f16-vclamp/gen/f16-vclamp-rvvfp16arith-u2v.c
@@ -12,7 +12,6 @@
 #include <riscv_vector.h>
 
 #include <xnnpack/common.h>
-#include <xnnpack/intrinsics-polyfill.h>
 #include <xnnpack/vunary.h>
 
 
@@ -23,7 +22,7 @@ void xnn_f16_vclamp_ukernel__rvvfp16arith_u2v(
     const union xnn_f16_minmax_params params[restrict XNN_MIN_ELEMENTS(1)])
 {
   assert(batch != 0);
-  assert(batch % sizeof(float) == 0);
+  assert(batch % sizeof(_Float16) == 0);
   assert(input != NULL);
   assert(output != NULL);
 
@@ -33,7 +32,7 @@ void xnn_f16_vclamp_ukernel__rvvfp16arith_u2v(
   const _Float16 vmin = params->fp16arith.min;
   const _Float16 vmax = params->fp16arith.max;
 
-  batch >>= XNN_LOG2_SIZEOF_FLOAT;
+  batch >>= XNN_LOG2_SIZEOF_HALF;
   do {
     const size_t n = __riscv_vsetvl_e16m2(batch);
     vfloat16m2_t vacc = __riscv_vle16_v_f16m2((const void*) i, n);

diff --git a/src/f16-vclamp/gen/f16-vclamp-rvvfp16arith-u4v.c b/src/f16-vclamp/gen/f16-vclamp-rvvfp16arith-u4v.c
@@ -12,7 +12,6 @@
 #include <riscv_vector.h>
 
 #include <xnnpack/common.h>
-#include <xnnpack/intrinsics-polyfill.h>
 #include <xnnpack/vunary.h>
 
 
@@ -23,7 +22,7 @@ void xnn_f16_vclamp_ukernel__rvvfp16arith_u4v(
     const union xnn_f16_minmax_params params[restrict XNN_MIN_ELEMENTS(1)])
 {
   assert(batch != 0);
-  assert(batch % sizeof(float) == 0);
+  assert(batch % sizeof(_Float16) == 0);
   assert(input != NULL);
   assert(output != NULL);
 
@@ -33,7 +32,7 @@ void xnn_f16_vclamp_ukernel__rvvfp16arith_u4v(
   const _Float16 vmin = params->fp16arith.min;
   const _Float16 vmax = params->fp16arith.max;
 
-  batch >>= XNN_LOG2_SIZEOF_FLOAT;
+  batch >>= XNN_LOG2_SIZEOF_HALF;
   do {
     const size_t n = __riscv_vsetvl_e16m4(batch);
     vfloat16m4_t vacc = __riscv_vle16_v_f16m4((const void*) i, n);

diff --git a/src/f16-vclamp/gen/f16-vclamp-rvvfp16arith-u8v.c b/src/f16-vclamp/gen/f16-vclamp-rvvfp16arith-u8v.c
@@ -12,7 +12,6 @@
 #include <riscv_vector.h>
 
 #include <xnnpack/common.h>
-#include <xnnpack/intrinsics-polyfill.h>
 #include <xnnpack/vunary.h>
 
 
@@ -23,7 +22,7 @@ void xnn_f16_vclamp_ukernel__rvvfp16arith_u8v(
     const union xnn_f16_minmax_params params[restrict XNN_MIN_ELEMENTS(1)])
 {
   assert(batch != 0);
-  assert(batch % sizeof(float) == 0);
+  assert(batch % sizeof(_Float16) == 0);
   assert(input != NULL);
   assert(output != NULL);
 
@@ -33,7 +32,7 @@ void xnn_f16_vclamp_ukernel__rvvfp16arith_u8v(
   const _Float16 vmin = params->fp16arith.min;
   const _Float16 vmax = params->fp16arith.max;
 
-  batch >>= XNN_LOG2_SIZEOF_FLOAT;
+  batch >>= XNN_LOG2_SIZEOF_HALF;
   do {
     const size_t n = __riscv_vsetvl_e16m8(batch);
     vfloat16m8_t vacc = __riscv_vle16_v_f16m8((const void*) i, n);

diff --git a/src/f16-vclamp/rvvfp16arith.c.in b/src/f16-vclamp/rvvfp16arith.c.in
@@ -8,7 +8,6 @@
 #include <riscv_vector.h>
 
 #include <xnnpack/common.h>
-#include <xnnpack/intrinsics-polyfill.h>
 #include <xnnpack/vunary.h>
 
 
@@ -19,7 +18,7 @@ void xnn_f16_vclamp_ukernel__rvvfp16arith_u${LMUL}v(
     const union xnn_f16_minmax_params params[restrict XNN_MIN_ELEMENTS(1)])
 {
   assert(batch != 0);
-  assert(batch % sizeof(float) == 0);
+  assert(batch % sizeof(_Float16) == 0);
   assert(input != NULL);
   assert(output != NULL);
 
@@ -29,7 +28,7 @@ void xnn_f16_vclamp_ukernel__rvvfp16arith_u${LMUL}v(
   const _Float16 vmin = params->fp16arith.min;
   const _Float16 vmax = params->fp16arith.max;
 
-  batch >>= XNN_LOG2_SIZEOF_FLOAT;
+  batch >>= XNN_LOG2_SIZEOF_HALF;
   do {
     const size_t n = __riscv_vsetvl_e16m${LMUL}(batch);
     vfloat16m${LMUL}_t vacc = __riscv_vle16_v_f16m${LMUL}((const void*) i, n);