Skip to content

Commit

Permalink
update im2col_fp32_3x3 stride channel loop, reduce instructions
Browse files Browse the repository at this point in the history
  • Loading branch information
Conley Lee committed Jun 26, 2023
1 parent 75f4099 commit 55f26da
Showing 1 changed file with 6 additions and 17 deletions.
23 changes: 6 additions & 17 deletions source/device/cpu/op/conv/risc-v/lp64dv/im2col_fp32_3x3.S
Original file line number Diff line number Diff line change
Expand Up @@ -129,37 +129,26 @@ stride1_channel_loop:
j finish

stride2_channel_loop:
la t0, mask_32b
vle32.v v0, (t0)
addi t0, a0, 0
vlse32.v v16, (t0), t2
addi t0, a0, 0x4
vlse32.v v17, (t0), t2
addi t0, a0, 32
vle32.v v18, (t0)
vslidedown.vi v1, v16, 1
vslideup.vi v2, v18, 3
vmerge.vvm v18, v1, v2, v0
addi t0, a0, 0x8
vlse32.v v18, (t0), t2

addi t0, t5, 0
vlse32.v v19, (t0), t2
addi t0, t5, 0x4
vlse32.v v20, (t0), t2
addi t0, t5, 0x20
vle32.v v21, (t0)
vslidedown.vi v1, v19, 1
vslideup.vi v2, v21, 3
vmerge.vvm v21, v1, v2, v0
addi t0, t5, 0x8
vlse32.v v21, (t0), t2

addi t0, t6, 0
vlse32.v v22, (t0), t2
addi t0, t6, 0x4
vlse32.v v23, (t0), t2
addi t0, t6, 0x20
vle32.v v24, (t0)
vslidedown.vi v1, v22, 1
vslideup.vi v2, v24, 3
vmerge.vvm v24, v1, v2, v0
addi t0, t6, 0x8
vlse32.v v24, (t0), t2

addi a3, a3, -1

Expand Down

0 comments on commit 55f26da

Please sign in to comment.