Skip to content

Commit

Permalink
RVV: replace word_type to size_t (#4100, #4118)
Browse files Browse the repository at this point in the history
  • Loading branch information
thelastlin committed Oct 1, 2022
1 parent c7262aa commit d80731a
Showing 1 changed file with 12 additions and 12 deletions.
24 changes: 12 additions & 12 deletions src/layer/riscv/instancenorm_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ int InstanceNorm_riscv::forward_inplace(Mat& bottom_top_blob, const Option& opt)
float* ptr_sum = ptr;
while (n > 0)
{
word_type vl = vsetvl_e32m8(n);
size_t vl = vsetvl_e32m8(n);
vfloat32m8_t _p = vle32_v_f32m8(ptr_sum, vl);
_sum = vfredusum_vs_f32m8_f32m1(_sum, _p, /* scalar */ _sum, vl);
// _sqsum = vfredosum_vs_f32m8_f32m1(_sqsum, vfmul_vv_f32m8(_p, _p, vl), /* scalar */ _sqsum, vl);
Expand All @@ -99,7 +99,7 @@ int InstanceNorm_riscv::forward_inplace(Mat& bottom_top_blob, const Option& opt)
float* ptr_sqsum = ptr;
while (n > 0)
{
word_type vl = vsetvl_e32m8(n);
size_t vl = vsetvl_e32m8(n);
vfloat32m8_t _p = vle32_v_f32m8(ptr_sqsum, vl);
_p = vfsub_vf_f32m8(_p, mean, vl);
_sqsum = vfredosum_vs_f32m8_f32m1(_sqsum, vfmul_vv_f32m8(_p, _p, vl), /* scalar */ _sqsum, vl);
Expand Down Expand Up @@ -141,7 +141,7 @@ int InstanceNorm_riscv::forward_inplace(Mat& bottom_top_blob, const Option& opt)
float* ptr_store = ptr;
while (n > 0)
{
word_type vl = vsetvl_e32m8(n);
size_t vl = vsetvl_e32m8(n);
vfloat32m8_t _p = vle32_v_f32m8(ptr_store, vl);
_p = vfmul_vf_f32m8(_p, a, vl);
_p = vfadd_vf_f32m8(_p, b, vl);
Expand All @@ -164,7 +164,7 @@ int InstanceNorm_riscv::forward_inplace(Mat& bottom_top_blob, const Option& opt)
const int packn = csrr_vlenb() / 4;
if (elempack == packn)
{
const word_type vl = vsetvl_e32m1(packn);
const size_t vl = vsetvl_e32m1(packn);
#pragma omp parallel for num_threads(opt.num_threads)
for (int q = 0; q < c; q++)
{
Expand Down Expand Up @@ -248,7 +248,7 @@ int InstanceNorm_riscv::forward_inplace_fp16s(Mat& bottom_top_blob, const Option
__fp16* ptr_sum = ptr;
while (n > 0)
{
word_type vl = vsetvl_e32m8(n);
size_t vl = vsetvl_e32m8(n);
vfloat32m8_t _p = vfwcvt_f_f_v_f32m8(vle16_v_f16m4(ptr_sum, vl), vl);
_sum = vfredusum_vs_f32m8_f32m1(_sum, _p, /* scalar */ _sum, vl);
// _sqsum = vfredosum_vs_f32m8_f32m1(_sqsum, vfmul_vv_f32m8(_p, _p, vl), /* scalar */ _sqsum, vl);
Expand All @@ -263,7 +263,7 @@ int InstanceNorm_riscv::forward_inplace_fp16s(Mat& bottom_top_blob, const Option
__fp16* ptr_sqsum = ptr;
while (n > 0)
{
word_type vl = vsetvl_e32m8(n);
size_t vl = vsetvl_e32m8(n);
vfloat32m8_t _p = vfwcvt_f_f_v_f32m8(vle16_v_f16m4(ptr_sqsum, vl), vl);
_p = vfsub_vf_f32m8(_p, mean, vl);
_sqsum = vfredosum_vs_f32m8_f32m1(_sqsum, vfmul_vv_f32m8(_p, _p, vl), /* scalar */ _sqsum, vl);
Expand Down Expand Up @@ -296,7 +296,7 @@ int InstanceNorm_riscv::forward_inplace_fp16s(Mat& bottom_top_blob, const Option
__fp16* ptr_store = ptr;
while (n > 0)
{
word_type vl = vsetvl_e32m8(n);
size_t vl = vsetvl_e32m8(n);
vfloat32m8_t _p = vfwcvt_f_f_v_f32m8(vle16_v_f16m4(ptr_store, vl), vl);
_p = vfmul_vf_f32m8(_p, a, vl);
_p = vfadd_vf_f32m8(_p, b, vl);
Expand All @@ -312,7 +312,7 @@ int InstanceNorm_riscv::forward_inplace_fp16s(Mat& bottom_top_blob, const Option
const int packn = csrr_vlenb() / 2;
if (elempack == packn)
{
const word_type vl = vsetvl_e16m1(packn);
const size_t vl = vsetvl_e16m1(packn);
#pragma omp parallel for num_threads(opt.num_threads)
for (int q = 0; q < c; q++)
{
Expand Down Expand Up @@ -393,7 +393,7 @@ int InstanceNorm_riscv::forward_inplace_fp16sa(Mat& bottom_top_blob, const Optio
__fp16* ptr_sum = ptr;
while (n > 0)
{
word_type vl = vsetvl_e16m8(n);
size_t vl = vsetvl_e16m8(n);
vfloat16m8_t _p = vle16_v_f16m8(ptr_sum, vl);
_sum = vfredusum_vs_f16m8_f16m1(_sum, _p, /* scalar */ _sum, vl);
// _sqsum = vfredosum_vs_f16m8_f16m1(_sqsum, vfmul_vv_f16m8(_p, _p, vl), /* scalar */ _sqsum, vl);
Expand All @@ -408,7 +408,7 @@ int InstanceNorm_riscv::forward_inplace_fp16sa(Mat& bottom_top_blob, const Optio
__fp16* ptr_sqsum = ptr;
while (n > 0)
{
word_type vl = vsetvl_e16m8(n);
size_t vl = vsetvl_e16m8(n);
vfloat16m8_t _p = vle16_v_f16m8(ptr_sqsum, vl);
_p = vfsub_vf_f16m8(_p, mean, vl);
_sqsum = vfredosum_vs_f16m8_f16m1(_sqsum, vfmul_vv_f16m8(_p, _p, vl), /* scalar */ _sqsum, vl);
Expand Down Expand Up @@ -441,7 +441,7 @@ int InstanceNorm_riscv::forward_inplace_fp16sa(Mat& bottom_top_blob, const Optio
__fp16* ptr_store = ptr;
while (n > 0)
{
word_type vl = vsetvl_e32m8(n);
size_t vl = vsetvl_e32m8(n);
vfloat16m8_t _p = vle16_v_f16m8(ptr_store, vl);
_p = vfmul_vf_f16m8(_p, a, vl);
_p = vfadd_vf_f16m8(_p, b, vl);
Expand All @@ -457,7 +457,7 @@ int InstanceNorm_riscv::forward_inplace_fp16sa(Mat& bottom_top_blob, const Optio
const int packn = csrr_vlenb() / 2;
if (elempack == packn)
{
const word_type vl = vsetvl_e16m1(packn);
const size_t vl = vsetvl_e16m1(packn);
#pragma omp parallel for num_threads(opt.num_threads)
for (int q = 0; q < c; q++)
{
Expand Down

0 comments on commit d80731a

Please sign in to comment.