1,150 changes: 654 additions & 496 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll

Large diffs are not rendered by default.

523 changes: 299 additions & 224 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll

Large diffs are not rendered by default.

1,068 changes: 613 additions & 455 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,16 @@ define {<16 x i1>, <16 x i1>} @vector_deinterleave_load_v16i1_v32i1(ptr %p) {
; CHECK-NEXT: vmerge.vim v10, v8, 1, v0
; CHECK-NEXT: vid.v v9
; CHECK-NEXT: vadd.vv v11, v9, v9
; CHECK-NEXT: vrgather.vv v9, v10, v11
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vi v0, v0, 2
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vrgather.vv v9, v10, v11
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vadd.vi v12, v11, -16
; CHECK-NEXT: li a0, -256
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vmv.s.x v0, a0
; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu
; CHECK-NEXT: vadd.vi v12, v11, -16
; CHECK-NEXT: vrgather.vv v9, v8, v12, v0.t
; CHECK-NEXT: vmsne.vi v9, v9, 0
; CHECK-NEXT: vadd.vi v12, v11, 1
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll
Original file line number Diff line number Diff line change
Expand Up @@ -326,9 +326,9 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind {
; RV32-NEXT: andi sp, sp, -128
; RV32-NEXT: andi a1, a1, 255
; RV32-NEXT: li a2, 128
; RV32-NEXT: addi a3, a0, 128
; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; RV32-NEXT: addi a2, a0, 128
; RV32-NEXT: vle8.v v16, (a2)
; RV32-NEXT: vle8.v v16, (a3)
; RV32-NEXT: vle8.v v24, (a0)
; RV32-NEXT: mv a0, sp
; RV32-NEXT: add a1, a0, a1
Expand Down Expand Up @@ -357,9 +357,9 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind {
; RV64-NEXT: andi sp, sp, -128
; RV64-NEXT: andi a1, a1, 255
; RV64-NEXT: li a2, 128
; RV64-NEXT: addi a3, a0, 128
; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; RV64-NEXT: addi a2, a0, 128
; RV64-NEXT: vle8.v v16, (a2)
; RV64-NEXT: vle8.v v16, (a3)
; RV64-NEXT: vle8.v v24, (a0)
; RV64-NEXT: mv a0, sp
; RV64-NEXT: add a1, a0, a1
Expand Down Expand Up @@ -388,9 +388,9 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind {
; RV32ZBS-NEXT: andi sp, sp, -128
; RV32ZBS-NEXT: andi a1, a1, 255
; RV32ZBS-NEXT: li a2, 128
; RV32ZBS-NEXT: addi a3, a0, 128
; RV32ZBS-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; RV32ZBS-NEXT: addi a2, a0, 128
; RV32ZBS-NEXT: vle8.v v16, (a2)
; RV32ZBS-NEXT: vle8.v v16, (a3)
; RV32ZBS-NEXT: vle8.v v24, (a0)
; RV32ZBS-NEXT: mv a0, sp
; RV32ZBS-NEXT: add a1, a0, a1
Expand Down Expand Up @@ -419,9 +419,9 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind {
; RV64ZBS-NEXT: andi sp, sp, -128
; RV64ZBS-NEXT: andi a1, a1, 255
; RV64ZBS-NEXT: li a2, 128
; RV64ZBS-NEXT: addi a3, a0, 128
; RV64ZBS-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; RV64ZBS-NEXT: addi a2, a0, 128
; RV64ZBS-NEXT: vle8.v v16, (a2)
; RV64ZBS-NEXT: vle8.v v16, (a3)
; RV64ZBS-NEXT: vle8.v v24, (a0)
; RV64ZBS-NEXT: mv a0, sp
; RV64ZBS-NEXT: add a1, a0, a1
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll
Original file line number Diff line number Diff line change
Expand Up @@ -721,9 +721,9 @@ define i32 @extractelt_v64i32_idx(ptr %x, i32 zeroext %idx) nounwind {
; RV32-NEXT: andi a1, a1, 63
; RV32-NEXT: slli a1, a1, 2
; RV32-NEXT: li a2, 32
; RV32-NEXT: addi a3, a0, 128
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV32-NEXT: addi a2, a0, 128
; RV32-NEXT: vle32.v v8, (a2)
; RV32-NEXT: vle32.v v8, (a3)
; RV32-NEXT: vle32.v v16, (a0)
; RV32-NEXT: mv a0, sp
; RV32-NEXT: add a1, a0, a1
Expand All @@ -749,9 +749,9 @@ define i32 @extractelt_v64i32_idx(ptr %x, i32 zeroext %idx) nounwind {
; RV64-NEXT: andi a1, a1, 63
; RV64-NEXT: slli a1, a1, 2
; RV64-NEXT: li a2, 32
; RV64-NEXT: addi a3, a0, 128
; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV64-NEXT: addi a2, a0, 128
; RV64-NEXT: vle32.v v8, (a2)
; RV64-NEXT: vle32.v v8, (a3)
; RV64-NEXT: vle32.v v16, (a0)
; RV64-NEXT: mv a0, sp
; RV64-NEXT: add a1, a0, a1
Expand Down
94 changes: 42 additions & 52 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -204,8 +204,8 @@ define <8 x half> @vp_floor_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl)
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; ZVFHMIN-NEXT: vmflt.vf v9, v12, fa5, v0.t
; ZVFHMIN-NEXT: fsrmi a0, 2
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; ZVFHMIN-NEXT: vmv1r.v v0, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v10, v0.t
; ZVFHMIN-NEXT: fsrm a0
; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t
Expand Down Expand Up @@ -261,16 +261,16 @@ declare <16 x half> @llvm.vp.floor.v16f16(<16 x half>, <16 x i1>, i32)
define <16 x half> @vp_floor_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_floor_v16f16:
; ZVFH: # %bb.0:
; ZVFH-NEXT: vmv1r.v v10, v0
; ZVFH-NEXT: lui a1, %hi(.LCPI6_0)
; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1)
; ZVFH-NEXT: vmv1r.v v10, v0
; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFH-NEXT: vfabs.v v12, v8, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu
; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t
; ZVFH-NEXT: fsrmi a0, 2
; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFH-NEXT: vmv1r.v v0, v10
; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t
; ZVFH-NEXT: fsrm a0
; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t
Expand All @@ -290,8 +290,8 @@ define <16 x half> @vp_floor_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; ZVFHMIN-NEXT: vmflt.vf v10, v16, fa5, v0.t
; ZVFHMIN-NEXT: fsrmi a0, 2
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; ZVFHMIN-NEXT: vmv1r.v v0, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v12, v0.t
; ZVFHMIN-NEXT: fsrm a0
; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t
Expand Down Expand Up @@ -439,8 +439,8 @@ define <8 x float> @vp_floor_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %ev
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
Expand Down Expand Up @@ -483,8 +483,8 @@ define <16 x float> @vp_floor_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
Expand Down Expand Up @@ -561,16 +561,16 @@ declare <4 x double> @llvm.vp.floor.v4f64(<4 x double>, <4 x i1>, i32)
define <4 x double> @vp_floor_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_v4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: lui a1, %hi(.LCPI18_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a1)
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
Expand Down Expand Up @@ -605,16 +605,16 @@ declare <8 x double> @llvm.vp.floor.v8f64(<8 x double>, <8 x i1>, i32)
define <8 x double> @vp_floor_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_v8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: lui a1, %hi(.LCPI20_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a1)
; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
Expand Down Expand Up @@ -649,16 +649,16 @@ declare <15 x double> @llvm.vp.floor.v15f64(<15 x double>, <15 x i1>, i32)
define <15 x double> @vp_floor_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_v15f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI22_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1)
; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
Expand Down Expand Up @@ -693,16 +693,16 @@ declare <16 x double> @llvm.vp.floor.v16f64(<16 x double>, <16 x i1>, i32)
define <16 x double> @vp_floor_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_v16f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI24_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1)
; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
Expand Down Expand Up @@ -737,69 +737,59 @@ declare <32 x double> @llvm.vp.floor.v32f64(<32 x double>, <32 x i1>, i32)
define <32 x double> @vp_floor_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_v32f64:
; CHECK: # %bb.0:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
; CHECK-NEXT: vmv1r.v v25, v0
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-NEXT: vmv1r.v v6, v0
; CHECK-NEXT: li a2, 16
; CHECK-NEXT: vslidedown.vi v24, v0, 2
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vi v7, v0, 2
; CHECK-NEXT: mv a1, a0
; CHECK-NEXT: bltu a0, a2, .LBB26_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: .LBB26_2:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 3
; CHECK-NEXT: sub sp, sp, a2
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
; CHECK-NEXT: lui a2, %hi(.LCPI26_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a2)
; CHECK-NEXT: vmv1r.v v0, v6
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t
; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t
; CHECK-NEXT: fsrmi a1, 2
; CHECK-NEXT: vmv1r.v v0, v6
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: addi a2, sp, 16
; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: fsrm a1
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: addi a1, a0, -16
; CHECK-NEXT: sltu a0, a0, a1
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: and a0, a0, a1
; CHECK-NEXT: vmv1r.v v0, v7
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vfabs.v v8, v16, v0.t
; CHECK-NEXT: vfabs.v v24, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmflt.vf v24, v8, fa5, v0.t
; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vmv1r.v v0, v7
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t
; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
Expand Down
31 changes: 15 additions & 16 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -177,8 +177,8 @@ define <8 x half> @vfmax_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i
; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12, v0.t
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
; ZVFHMIN-NEXT: vmv1r.v v0, v8
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
; ZVFHMIN-NEXT: vmerge.vvm v16, v12, v14, v0
; ZVFHMIN-NEXT: vmv1r.v v0, v10
; ZVFHMIN-NEXT: vmfeq.vv v8, v14, v14, v0.t
Expand Down Expand Up @@ -253,8 +253,8 @@ define <16 x half> @vfmax_vv_v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1>
; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16, v0.t
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma
; ZVFHMIN-NEXT: vmv1r.v v0, v8
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma
; ZVFHMIN-NEXT: vmerge.vvm v24, v16, v20, v0
; ZVFHMIN-NEXT: vmv1r.v v0, v12
; ZVFHMIN-NEXT: vmfeq.vv v8, v20, v20, v0.t
Expand Down Expand Up @@ -608,7 +608,6 @@ define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32
; CHECK-NEXT: slli a1, a1, 5
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
; CHECK-NEXT: vmv1r.v v6, v0
; CHECK-NEXT: addi a1, a0, 128
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vle64.v v24, (a1)
Expand All @@ -618,28 +617,28 @@ define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vi v7, v0, 2
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vle64.v v24, (a0)
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: vmv1r.v v6, v0
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vi v7, v0, 2
; CHECK-NEXT: mv a0, a2
; CHECK-NEXT: bltu a2, a1, .LBB24_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a0, 16
; CHECK-NEXT: .LBB24_2:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v6
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vmfeq.vv v26, v8, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v26
; CHECK-NEXT: csrr a0, vlenb
Expand All @@ -666,13 +665,13 @@ define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32
; CHECK-NEXT: sltu a1, a2, a0
; CHECK-NEXT: addi a1, a1, -1
; CHECK-NEXT: and a0, a1, a0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v7
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vmfeq.vv v25, v16, v16, v0.t
; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: csrr a0, vlenb
Expand Down Expand Up @@ -759,9 +758,9 @@ define <32 x double> @vfmax_vv_v32f64_unmasked(<32 x double> %va, <32 x double>
; CHECK-NEXT: sltu a1, a2, a0
; CHECK-NEXT: addi a1, a1, -1
; CHECK-NEXT: and a0, a1, a0
; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vmfeq.vv v0, v16, v16
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
Expand Down
31 changes: 15 additions & 16 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -177,8 +177,8 @@ define <8 x half> @vfmin_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i
; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12, v0.t
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
; ZVFHMIN-NEXT: vmv1r.v v0, v8
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
; ZVFHMIN-NEXT: vmerge.vvm v16, v12, v14, v0
; ZVFHMIN-NEXT: vmv1r.v v0, v10
; ZVFHMIN-NEXT: vmfeq.vv v8, v14, v14, v0.t
Expand Down Expand Up @@ -253,8 +253,8 @@ define <16 x half> @vfmin_vv_v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1>
; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16, v0.t
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma
; ZVFHMIN-NEXT: vmv1r.v v0, v8
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma
; ZVFHMIN-NEXT: vmerge.vvm v24, v16, v20, v0
; ZVFHMIN-NEXT: vmv1r.v v0, v12
; ZVFHMIN-NEXT: vmfeq.vv v8, v20, v20, v0.t
Expand Down Expand Up @@ -608,7 +608,6 @@ define <32 x double> @vfmin_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32
; CHECK-NEXT: slli a1, a1, 5
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
; CHECK-NEXT: vmv1r.v v6, v0
; CHECK-NEXT: addi a1, a0, 128
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vle64.v v24, (a1)
Expand All @@ -618,28 +617,28 @@ define <32 x double> @vfmin_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vi v7, v0, 2
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vle64.v v24, (a0)
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: vmv1r.v v6, v0
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vi v7, v0, 2
; CHECK-NEXT: mv a0, a2
; CHECK-NEXT: bltu a2, a1, .LBB24_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a0, 16
; CHECK-NEXT: .LBB24_2:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v6
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vmfeq.vv v26, v8, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v26
; CHECK-NEXT: csrr a0, vlenb
Expand All @@ -666,13 +665,13 @@ define <32 x double> @vfmin_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32
; CHECK-NEXT: sltu a1, a2, a0
; CHECK-NEXT: addi a1, a1, -1
; CHECK-NEXT: and a0, a1, a0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v7
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vmfeq.vv v25, v16, v16, v0.t
; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: csrr a0, vlenb
Expand Down Expand Up @@ -759,9 +758,9 @@ define <32 x double> @vfmin_vv_v32f64_unmasked(<32 x double> %va, <32 x double>
; CHECK-NEXT: sltu a1, a2, a0
; CHECK-NEXT: addi a1, a1, -1
; CHECK-NEXT: and a0, a1, a0
; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vmfeq.vv v0, v16, v16
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@ define <2 x half> @nearbyint_v2f16(<2 x half> %v) strictfp {
; CHECK-NEXT: frflags a0
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: fsflags a0
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: fsflags a0
; CHECK-NEXT: ret
%r = call <2 x half> @llvm.experimental.constrained.nearbyint.v2f16(<2 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <2 x half> %r
Expand All @@ -42,9 +42,9 @@ define <4 x half> @nearbyint_v4f16(<4 x half> %v) strictfp {
; CHECK-NEXT: frflags a0
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: fsflags a0
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: fsflags a0
; CHECK-NEXT: ret
%r = call <4 x half> @llvm.experimental.constrained.nearbyint.v4f16(<4 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <4 x half> %r
Expand All @@ -65,9 +65,9 @@ define <8 x half> @nearbyint_v8f16(<8 x half> %v) strictfp {
; CHECK-NEXT: frflags a0
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: fsflags a0
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: fsflags a0
; CHECK-NEXT: ret
%r = call <8 x half> @llvm.experimental.constrained.nearbyint.v8f16(<8 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <8 x half> %r
Expand All @@ -88,9 +88,9 @@ define <16 x half> @nearbyint_v16f16(<16 x half> %v) strictfp {
; CHECK-NEXT: frflags a0
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: fsflags a0
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; CHECK-NEXT: fsflags a0
; CHECK-NEXT: ret
%r = call <16 x half> @llvm.experimental.constrained.nearbyint.v16f16(<16 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <16 x half> %r
Expand All @@ -112,9 +112,9 @@ define <32 x half> @nearbyint_v32f16(<32 x half> %v) strictfp {
; CHECK-NEXT: frflags a0
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: fsflags a0
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: fsflags a0
; CHECK-NEXT: ret
%r = call <32 x half> @llvm.experimental.constrained.nearbyint.v32f16(<32 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <32 x half> %r
Expand All @@ -135,9 +135,9 @@ define <2 x float> @nearbyint_v2f32(<2 x float> %v) strictfp {
; CHECK-NEXT: frflags a0
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: fsflags a0
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: fsflags a0
; CHECK-NEXT: ret
%r = call <2 x float> @llvm.experimental.constrained.nearbyint.v2f32(<2 x float> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <2 x float> %r
Expand All @@ -158,9 +158,9 @@ define <4 x float> @nearbyint_v4f32(<4 x float> %v) strictfp {
; CHECK-NEXT: frflags a0
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: fsflags a0
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: fsflags a0
; CHECK-NEXT: ret
%r = call <4 x float> @llvm.experimental.constrained.nearbyint.v4f32(<4 x float> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <4 x float> %r
Expand All @@ -181,9 +181,9 @@ define <8 x float> @nearbyint_v8f32(<8 x float> %v) strictfp {
; CHECK-NEXT: frflags a0
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: fsflags a0
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; CHECK-NEXT: fsflags a0
; CHECK-NEXT: ret
%r = call <8 x float> @llvm.experimental.constrained.nearbyint.v8f32(<8 x float> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <8 x float> %r
Expand All @@ -204,9 +204,9 @@ define <16 x float> @nearbyint_v16f32(<16 x float> %v) strictfp {
; CHECK-NEXT: frflags a0
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: fsflags a0
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: fsflags a0
; CHECK-NEXT: ret
%r = call <16 x float> @llvm.experimental.constrained.nearbyint.v16f32(<16 x float> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <16 x float> %r
Expand All @@ -227,9 +227,9 @@ define <2 x double> @nearbyint_v2f64(<2 x double> %v) strictfp {
; CHECK-NEXT: frflags a0
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: fsflags a0
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: fsflags a0
; CHECK-NEXT: ret
%r = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <2 x double> %r
Expand All @@ -250,9 +250,9 @@ define <4 x double> @nearbyint_v4f64(<4 x double> %v) strictfp {
; CHECK-NEXT: frflags a0
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: fsflags a0
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; CHECK-NEXT: fsflags a0
; CHECK-NEXT: ret
%r = call <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(<4 x double> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <4 x double> %r
Expand All @@ -273,9 +273,9 @@ define <8 x double> @nearbyint_v8f64(<8 x double> %v) strictfp {
; CHECK-NEXT: frflags a0
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: fsflags a0
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: fsflags a0
; CHECK-NEXT: ret
%r = call <8 x double> @llvm.experimental.constrained.nearbyint.v8f64(<8 x double> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <8 x double> %r
Expand Down
9 changes: 5 additions & 4 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,10 @@ define <4 x float> @hang_when_merging_stores_after_legalization(<8 x float> %x,
; CHECK-NEXT: vmul.vx v14, v12, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vrgatherei16.vv v12, v8, v14
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vadd.vi v8, v14, -14
; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
; CHECK-NEXT: vmv.v.i v0, 12
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vadd.vi v8, v14, -14
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; CHECK-NEXT: vrgatherei16.vv v12, v10, v8, v0.t
; CHECK-NEXT: vmv1r.v v8, v12
Expand Down Expand Up @@ -1407,8 +1408,8 @@ define <8 x float> @buildvec_v8f32_zvl256(float %e0, float %e1, float %e2, float
; CHECK-NEXT: vfmv.v.f v8, fa4
; CHECK-NEXT: vfslide1down.vf v8, v8, fa5
; CHECK-NEXT: vfslide1down.vf v8, v8, fa6
; CHECK-NEXT: vfslide1down.vf v8, v8, fa7
; CHECK-NEXT: vmv.v.i v0, 15
; CHECK-NEXT: vfslide1down.vf v8, v8, fa7
; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t
; CHECK-NEXT: ret
%v0 = insertelement <8 x float> poison, float %e0, i64 0
Expand Down Expand Up @@ -1458,8 +1459,8 @@ define <8 x double> @buildvec_v8f64_zvl512(double %e0, double %e1, double %e2, d
; CHECK-NEXT: vfmv.v.f v8, fa4
; CHECK-NEXT: vfslide1down.vf v8, v8, fa5
; CHECK-NEXT: vfslide1down.vf v8, v8, fa6
; CHECK-NEXT: vfslide1down.vf v8, v8, fa7
; CHECK-NEXT: vmv.v.i v0, 15
; CHECK-NEXT: vfslide1down.vf v8, v8, fa7
; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t
; CHECK-NEXT: ret
%v0 = insertelement <8 x double> poison, double %e0, i64 0
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,9 @@ define <4 x double> @interleave_v2f64(<2 x double> %x, <2 x double> %y) {
; RV32-V512-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
; RV32-V512-NEXT: vid.v v10
; RV32-V512-NEXT: vsrl.vi v11, v10, 1
; RV32-V512-NEXT: vmv.v.i v0, 10
; RV32-V512-NEXT: vsetvli zero, zero, e64, m1, ta, mu
; RV32-V512-NEXT: vrgatherei16.vv v10, v8, v11
; RV32-V512-NEXT: vmv.v.i v0, 10
; RV32-V512-NEXT: vrgatherei16.vv v10, v9, v11, v0.t
; RV32-V512-NEXT: vmv.v.v v8, v10
; RV32-V512-NEXT: ret
Expand All @@ -68,8 +68,8 @@ define <4 x double> @interleave_v2f64(<2 x double> %x, <2 x double> %y) {
; RV64-V512-NEXT: vsetivli zero, 4, e64, m1, ta, mu
; RV64-V512-NEXT: vid.v v10
; RV64-V512-NEXT: vsrl.vi v11, v10, 1
; RV64-V512-NEXT: vrgather.vv v10, v8, v11
; RV64-V512-NEXT: vmv.v.i v0, 10
; RV64-V512-NEXT: vrgather.vv v10, v8, v11
; RV64-V512-NEXT: vrgather.vv v10, v9, v11, v0.t
; RV64-V512-NEXT: vmv.v.v v8, v10
; RV64-V512-NEXT: ret
Expand Down Expand Up @@ -261,13 +261,13 @@ define <64 x float> @interleave_v32f32(<32 x float> %x, <32 x float> %y) {
; V128-NEXT: vwmaccu.vx v8, a0, v16
; V128-NEXT: lui a1, 699051
; V128-NEXT: addi a1, a1, -1366
; V128-NEXT: li a2, 32
; V128-NEXT: vmv.s.x v0, a1
; V128-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; V128-NEXT: li a1, 32
; V128-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; V128-NEXT: vmerge.vvm v24, v8, v24, v0
; V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; V128-NEXT: addi a1, sp, 16
; V128-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
; V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; V128-NEXT: vwaddu.vv v0, v16, v8
; V128-NEXT: vwmaccu.vx v0, a0, v8
; V128-NEXT: vmv8r.v v8, v0
Expand Down
21 changes: 10 additions & 11 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll
Original file line number Diff line number Diff line change
Expand Up @@ -93,12 +93,11 @@ define <4 x double> @vrgather_shuffle_vv_v4f64(<4 x double> %x, <4 x double> %y)
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI6_0)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI6_0)
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v14, (a0)
; CHECK-NEXT: vrgatherei16.vv v12, v8, v14
; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
; CHECK-NEXT: vmv.v.i v0, 8
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; CHECK-NEXT: vrgatherei16.vv v12, v8, v14
; CHECK-NEXT: vrgather.vi v12, v10, 1, v0.t
; CHECK-NEXT: vmv.v.v v8, v12
; CHECK-NEXT: ret
Expand All @@ -109,13 +108,13 @@ define <4 x double> @vrgather_shuffle_vv_v4f64(<4 x double> %x, <4 x double> %y)
define <4 x double> @vrgather_shuffle_xv_v4f64(<4 x double> %x) {
; CHECK-LABEL: vrgather_shuffle_xv_v4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vid.v v12
; CHECK-NEXT: lui a0, %hi(.LCPI7_0)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI7_0)
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vlse64.v v10, (a0), zero
; CHECK-NEXT: vrsub.vi v12, v12, 4
; CHECK-NEXT: vid.v v12
; CHECK-NEXT: vmv.v.i v0, 12
; CHECK-NEXT: vrsub.vi v12, v12, 4
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; CHECK-NEXT: vrgatherei16.vv v10, v8, v12, v0.t
; CHECK-NEXT: vmv.v.v v8, v10
Expand All @@ -129,12 +128,12 @@ define <4 x double> @vrgather_shuffle_vx_v4f64(<4 x double> %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vid.v v12
; CHECK-NEXT: lui a0, %hi(.LCPI8_0)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI8_0)
; CHECK-NEXT: vlse64.v v10, (a0), zero
; CHECK-NEXT: li a0, 3
; CHECK-NEXT: lui a1, %hi(.LCPI8_0)
; CHECK-NEXT: addi a1, a1, %lo(.LCPI8_0)
; CHECK-NEXT: vlse64.v v10, (a1), zero
; CHECK-NEXT: vmul.vx v12, v12, a0
; CHECK-NEXT: vmv.v.i v0, 3
; CHECK-NEXT: vmul.vx v12, v12, a0
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; CHECK-NEXT: vrgatherei16.vv v10, v8, v12, v0.t
; CHECK-NEXT: vmv.v.v v8, v10
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1199,7 +1199,7 @@ declare <4 x half> @llvm.copysign.v4f16(<4 x half>, <4 x half>)
define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) {
; ZVFH-LABEL: copysign_neg_trunc_v3f16_v3f32:
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetivli zero, 3, e16, mf2, ta, ma
; ZVFH-NEXT: vsetivli zero, 3, e32, m1, ta, ma
; ZVFH-NEXT: vle32.v v8, (a1)
; ZVFH-NEXT: vle16.v v9, (a0)
; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
Expand Down
130 changes: 60 additions & 70 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -351,25 +351,23 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) {
; RV32-NEXT: fmin.d fa3, fa3, fa4
; RV32-NEXT: fcvt.w.d a2, fa3, rtz
; RV32-NEXT: and a0, a0, a2
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV32-NEXT: vslide1down.vx v10, v10, a0
; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
; RV32-NEXT: vslidedown.vi v8, v8, 3
; RV32-NEXT: vfmv.f.s fa3, v8
; RV32-NEXT: feq.d a0, fa3, fa3
; RV32-NEXT: feq.d a2, fa3, fa3
; RV32-NEXT: fmax.d fa3, fa3, fa5
; RV32-NEXT: fmin.d fa3, fa3, fa4
; RV32-NEXT: fcvt.w.d a3, fa3, rtz
; RV32-NEXT: fld fa3, 40(sp)
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; RV32-NEXT: fld fa2, 40(sp)
; RV32-NEXT: fcvt.w.d a2, fa3, rtz
; RV32-NEXT: neg a0, a0
; RV32-NEXT: and a0, a0, a2
; RV32-NEXT: feq.d a2, fa2, fa2
; RV32-NEXT: fmax.d fa3, fa2, fa5
; RV32-NEXT: vslide1down.vx v8, v10, a0
; RV32-NEXT: neg a0, a2
; RV32-NEXT: and a0, a0, a3
; RV32-NEXT: feq.d a2, fa3, fa3
; RV32-NEXT: fmax.d fa3, fa3, fa5
; RV32-NEXT: fmin.d fa3, fa3, fa4
; RV32-NEXT: fcvt.w.d a3, fa3, rtz
; RV32-NEXT: fld fa3, 32(sp)
; RV32-NEXT: vslide1down.vx v8, v10, a0
; RV32-NEXT: vslide1down.vx v8, v8, a0
; RV32-NEXT: neg a0, a2
; RV32-NEXT: and a0, a0, a3
; RV32-NEXT: feq.d a2, fa3, fa3
Expand All @@ -395,8 +393,8 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) {
; RV32-NEXT: fmin.d fa5, fa5, fa4
; RV32-NEXT: fcvt.w.d a2, fa5, rtz
; RV32-NEXT: and a0, a0, a2
; RV32-NEXT: vslide1down.vx v9, v9, a0
; RV32-NEXT: vmv.v.i v0, 15
; RV32-NEXT: vslide1down.vx v9, v9, a0
; RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t
; RV32-NEXT: vse8.v v9, (a1)
; RV32-NEXT: addi sp, s0, -128
Expand Down Expand Up @@ -452,25 +450,23 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) {
; RV64-NEXT: fmin.d fa3, fa3, fa4
; RV64-NEXT: fcvt.l.d a2, fa3, rtz
; RV64-NEXT: and a0, a0, a2
; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV64-NEXT: vslide1down.vx v10, v10, a0
; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
; RV64-NEXT: vslidedown.vi v8, v8, 3
; RV64-NEXT: vfmv.f.s fa3, v8
; RV64-NEXT: feq.d a0, fa3, fa3
; RV64-NEXT: feq.d a2, fa3, fa3
; RV64-NEXT: fmax.d fa3, fa3, fa5
; RV64-NEXT: fmin.d fa3, fa3, fa4
; RV64-NEXT: fcvt.l.d a3, fa3, rtz
; RV64-NEXT: fld fa3, 40(sp)
; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; RV64-NEXT: fld fa2, 40(sp)
; RV64-NEXT: fcvt.l.d a2, fa3, rtz
; RV64-NEXT: neg a0, a0
; RV64-NEXT: and a0, a0, a2
; RV64-NEXT: feq.d a2, fa2, fa2
; RV64-NEXT: fmax.d fa3, fa2, fa5
; RV64-NEXT: vslide1down.vx v8, v10, a0
; RV64-NEXT: neg a0, a2
; RV64-NEXT: and a0, a0, a3
; RV64-NEXT: feq.d a2, fa3, fa3
; RV64-NEXT: fmax.d fa3, fa3, fa5
; RV64-NEXT: fmin.d fa3, fa3, fa4
; RV64-NEXT: fcvt.l.d a3, fa3, rtz
; RV64-NEXT: fld fa3, 32(sp)
; RV64-NEXT: vslide1down.vx v8, v10, a0
; RV64-NEXT: vslide1down.vx v8, v8, a0
; RV64-NEXT: neg a0, a2
; RV64-NEXT: and a0, a0, a3
; RV64-NEXT: feq.d a2, fa3, fa3
Expand All @@ -496,8 +492,8 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) {
; RV64-NEXT: fmin.d fa5, fa5, fa4
; RV64-NEXT: fcvt.l.d a2, fa5, rtz
; RV64-NEXT: and a0, a0, a2
; RV64-NEXT: vslide1down.vx v9, v9, a0
; RV64-NEXT: vmv.v.i v0, 15
; RV64-NEXT: vslide1down.vx v9, v9, a0
; RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t
; RV64-NEXT: vse8.v v9, (a1)
; RV64-NEXT: addi sp, s0, -128
Expand Down Expand Up @@ -542,46 +538,43 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) {
; RV32-NEXT: fmax.d fa4, fa4, fa3
; RV32-NEXT: fmin.d fa4, fa4, fa5
; RV32-NEXT: fcvt.wu.d a2, fa4, rtz
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV32-NEXT: vmv.v.x v10, a2
; RV32-NEXT: vslide1down.vx v10, v10, a0
; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
; RV32-NEXT: vslidedown.vi v12, v8, 2
; RV32-NEXT: vfmv.f.s fa4, v12
; RV32-NEXT: vslidedown.vi v10, v8, 2
; RV32-NEXT: vfmv.f.s fa4, v10
; RV32-NEXT: fmax.d fa4, fa4, fa3
; RV32-NEXT: fmin.d fa4, fa4, fa5
; RV32-NEXT: fcvt.wu.d a0, fa4, rtz
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV32-NEXT: vslide1down.vx v10, v10, a0
; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
; RV32-NEXT: fcvt.wu.d a3, fa4, rtz
; RV32-NEXT: vslidedown.vi v8, v8, 3
; RV32-NEXT: vfmv.f.s fa4, v8
; RV32-NEXT: fmax.d fa4, fa4, fa3
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; RV32-NEXT: fld fa2, 40(sp)
; RV32-NEXT: fmin.d fa4, fa4, fa5
; RV32-NEXT: fcvt.wu.d a0, fa4, rtz
; RV32-NEXT: fld fa4, 32(sp)
; RV32-NEXT: fmax.d fa2, fa2, fa3
; RV32-NEXT: fmin.d fa2, fa2, fa5
; RV32-NEXT: fcvt.wu.d a2, fa2, rtz
; RV32-NEXT: fmax.d fa4, fa4, fa3
; RV32-NEXT: fld fa2, 48(sp)
; RV32-NEXT: fmin.d fa4, fa4, fa5
; RV32-NEXT: fcvt.wu.d a3, fa4, rtz
; RV32-NEXT: vslide1down.vx v8, v10, a0
; RV32-NEXT: fcvt.wu.d a4, fa4, rtz
; RV32-NEXT: fmax.d fa4, fa2, fa3
; RV32-NEXT: fld fa2, 32(sp)
; RV32-NEXT: fmin.d fa4, fa4, fa5
; RV32-NEXT: fcvt.wu.d a5, fa4, rtz
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; RV32-NEXT: vmv.v.x v8, a2
; RV32-NEXT: fmax.d fa4, fa2, fa3
; RV32-NEXT: fmin.d fa4, fa4, fa5
; RV32-NEXT: fcvt.wu.d a2, fa4, rtz
; RV32-NEXT: fld fa4, 48(sp)
; RV32-NEXT: vslide1down.vx v8, v8, a0
; RV32-NEXT: vslide1down.vx v8, v8, a3
; RV32-NEXT: vslide1down.vx v8, v8, a4
; RV32-NEXT: fmax.d fa4, fa4, fa3
; RV32-NEXT: fmin.d fa4, fa4, fa5
; RV32-NEXT: fcvt.wu.d a0, fa4, rtz
; RV32-NEXT: fld fa4, 56(sp)
; RV32-NEXT: vmv.v.x v9, a3
; RV32-NEXT: vslide1down.vx v9, v9, a2
; RV32-NEXT: vmv.v.x v9, a2
; RV32-NEXT: vslide1down.vx v9, v9, a5
; RV32-NEXT: vslide1down.vx v9, v9, a0
; RV32-NEXT: fmax.d fa4, fa4, fa3
; RV32-NEXT: fmin.d fa5, fa4, fa5
; RV32-NEXT: fcvt.wu.d a0, fa5, rtz
; RV32-NEXT: vslide1down.vx v9, v9, a0
; RV32-NEXT: vmv.v.i v0, 15
; RV32-NEXT: vslide1down.vx v9, v9, a0
; RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t
; RV32-NEXT: vse8.v v9, (a1)
; RV32-NEXT: addi sp, s0, -128
Expand Down Expand Up @@ -618,46 +611,43 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) {
; RV64-NEXT: fmax.d fa4, fa4, fa3
; RV64-NEXT: fmin.d fa4, fa4, fa5
; RV64-NEXT: fcvt.lu.d a2, fa4, rtz
; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV64-NEXT: vmv.v.x v10, a2
; RV64-NEXT: vslide1down.vx v10, v10, a0
; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
; RV64-NEXT: vslidedown.vi v12, v8, 2
; RV64-NEXT: vfmv.f.s fa4, v12
; RV64-NEXT: vslidedown.vi v10, v8, 2
; RV64-NEXT: vfmv.f.s fa4, v10
; RV64-NEXT: fmax.d fa4, fa4, fa3
; RV64-NEXT: fmin.d fa4, fa4, fa5
; RV64-NEXT: fcvt.lu.d a0, fa4, rtz
; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV64-NEXT: vslide1down.vx v10, v10, a0
; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
; RV64-NEXT: fcvt.lu.d a3, fa4, rtz
; RV64-NEXT: vslidedown.vi v8, v8, 3
; RV64-NEXT: vfmv.f.s fa4, v8
; RV64-NEXT: fmax.d fa4, fa4, fa3
; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; RV64-NEXT: fld fa2, 40(sp)
; RV64-NEXT: fmin.d fa4, fa4, fa5
; RV64-NEXT: fcvt.lu.d a0, fa4, rtz
; RV64-NEXT: fld fa4, 32(sp)
; RV64-NEXT: fmax.d fa2, fa2, fa3
; RV64-NEXT: fmin.d fa2, fa2, fa5
; RV64-NEXT: fcvt.lu.d a2, fa2, rtz
; RV64-NEXT: fmax.d fa4, fa4, fa3
; RV64-NEXT: fld fa2, 48(sp)
; RV64-NEXT: fmin.d fa4, fa4, fa5
; RV64-NEXT: fcvt.lu.d a3, fa4, rtz
; RV64-NEXT: vslide1down.vx v8, v10, a0
; RV64-NEXT: fcvt.lu.d a4, fa4, rtz
; RV64-NEXT: fmax.d fa4, fa2, fa3
; RV64-NEXT: fld fa2, 32(sp)
; RV64-NEXT: fmin.d fa4, fa4, fa5
; RV64-NEXT: fcvt.lu.d a5, fa4, rtz
; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; RV64-NEXT: vmv.v.x v8, a2
; RV64-NEXT: fmax.d fa4, fa2, fa3
; RV64-NEXT: fmin.d fa4, fa4, fa5
; RV64-NEXT: fcvt.lu.d a2, fa4, rtz
; RV64-NEXT: fld fa4, 48(sp)
; RV64-NEXT: vslide1down.vx v8, v8, a0
; RV64-NEXT: vslide1down.vx v8, v8, a3
; RV64-NEXT: vslide1down.vx v8, v8, a4
; RV64-NEXT: fmax.d fa4, fa4, fa3
; RV64-NEXT: fmin.d fa4, fa4, fa5
; RV64-NEXT: fcvt.lu.d a0, fa4, rtz
; RV64-NEXT: fld fa4, 56(sp)
; RV64-NEXT: vmv.v.x v9, a3
; RV64-NEXT: vslide1down.vx v9, v9, a2
; RV64-NEXT: vmv.v.x v9, a2
; RV64-NEXT: vslide1down.vx v9, v9, a5
; RV64-NEXT: vslide1down.vx v9, v9, a0
; RV64-NEXT: fmax.d fa4, fa4, fa3
; RV64-NEXT: fmin.d fa5, fa4, fa5
; RV64-NEXT: fcvt.lu.d a0, fa5, rtz
; RV64-NEXT: vslide1down.vx v9, v9, a0
; RV64-NEXT: vmv.v.i v0, 15
; RV64-NEXT: vslide1down.vx v9, v9, a0
; RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t
; RV64-NEXT: vse8.v v9, (a1)
; RV64-NEXT: addi sp, s0, -128
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpext-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,8 @@ declare <32 x double> @llvm.vp.fpext.v32f64.v32f32(<32 x float>, <32 x i1>, i32)
define <32 x double> @vfpext_v32f32_v32f64(<32 x float> %a, <32 x i1> %m, i32 zeroext %vl) {
; CHECK-LABEL: vfpext_v32f32_v32f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-NEXT: li a2, 16
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vi v16, v0, 2
; CHECK-NEXT: mv a1, a0
; CHECK-NEXT: bltu a0, a2, .LBB7_2
Expand All @@ -112,8 +112,8 @@ define <32 x double> @vfpext_v32f32_v32f64(<32 x float> %a, <32 x i1> %m, i32 ze
; CHECK-NEXT: and a0, a0, a1
; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma
; CHECK-NEXT: vslidedown.vi v8, v8, 16
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
; CHECK-NEXT: vfwcvt.f.f.v v16, v8, v0.t
; CHECK-NEXT: vmv8r.v v8, v24
; CHECK-NEXT: ret
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -394,8 +394,8 @@ declare <32 x i64> @llvm.vp.fptosi.v32i64.v32f64(<32 x double>, <32 x i1>, i32)
define <32 x i64> @vfptosi_v32i64_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vfptosi_v32i64_v32f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-NEXT: li a2, 16
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vi v24, v0, 2
; CHECK-NEXT: mv a1, a0
; CHECK-NEXT: bltu a0, a2, .LBB25_2
Expand All @@ -408,8 +408,8 @@ define <32 x i64> @vfptosi_v32i64_v32f64(<32 x double> %va, <32 x i1> %m, i32 ze
; CHECK-NEXT: sltu a0, a0, a1
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: and a0, a0, a1
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v16, v0.t
; CHECK-NEXT: ret
%v = call <32 x i64> @llvm.vp.fptosi.v32i64.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl)
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -394,8 +394,8 @@ declare <32 x i64> @llvm.vp.fptoui.v32i64.v32f64(<32 x double>, <32 x i1>, i32)
define <32 x i64> @vfptoui_v32i64_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vfptoui_v32i64_v32f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-NEXT: li a2, 16
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vi v24, v0, 2
; CHECK-NEXT: mv a1, a0
; CHECK-NEXT: bltu a0, a2, .LBB25_2
Expand All @@ -408,8 +408,8 @@ define <32 x i64> @vfptoui_v32i64_v32f64(<32 x double> %va, <32 x i1> %m, i32 ze
; CHECK-NEXT: sltu a0, a0, a1
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: and a0, a0, a1
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfcvt.rtz.xu.f.v v16, v16, v0.t
; CHECK-NEXT: ret
%v = call <32 x i64> @llvm.vp.fptoui.v32i64.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl)
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptrunc-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,8 @@ define <32 x float> @vfptrunc_v32f32_v32f64(<32 x double> %a, <32 x i1> %m, i32
; CHECK-LABEL: vfptrunc_v32f32_v32f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv8r.v v24, v8
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-NEXT: li a2, 16
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vi v12, v0, 2
; CHECK-NEXT: mv a1, a0
; CHECK-NEXT: bltu a0, a2, .LBB7_2
Expand All @@ -112,8 +112,8 @@ define <32 x float> @vfptrunc_v32f32_v32f64(<32 x double> %a, <32 x i1> %m, i32
; CHECK-NEXT: sltu a0, a0, a1
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: and a0, a0, a1
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
; CHECK-NEXT: vfncvt.f.f.w v24, v16, v0.t
; CHECK-NEXT: li a0, 32
; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
Expand Down
80 changes: 40 additions & 40 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -132,12 +132,12 @@ define <3 x float> @si2fp_v3i1_v3f32(<3 x i1> %x) {
define <3 x float> @si2fp_v3i7_v3f32(<3 x i7> %x) {
; ZVFH32-LABEL: si2fp_v3i7_v3f32:
; ZVFH32: # %bb.0:
; ZVFH32-NEXT: lw a1, 4(a0)
; ZVFH32-NEXT: lw a2, 0(a0)
; ZVFH32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; ZVFH32-NEXT: lw a1, 0(a0)
; ZVFH32-NEXT: lw a2, 4(a0)
; ZVFH32-NEXT: lw a0, 8(a0)
; ZVFH32-NEXT: vmv.v.x v8, a2
; ZVFH32-NEXT: vslide1down.vx v8, v8, a1
; ZVFH32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; ZVFH32-NEXT: vmv.v.x v8, a1
; ZVFH32-NEXT: vslide1down.vx v8, v8, a2
; ZVFH32-NEXT: vslide1down.vx v8, v8, a0
; ZVFH32-NEXT: vslidedown.vi v8, v8, 1
; ZVFH32-NEXT: vadd.vv v8, v8, v8
Expand All @@ -149,12 +149,12 @@ define <3 x float> @si2fp_v3i7_v3f32(<3 x i7> %x) {
;
; ZVFH64-LABEL: si2fp_v3i7_v3f32:
; ZVFH64: # %bb.0:
; ZVFH64-NEXT: ld a1, 8(a0)
; ZVFH64-NEXT: ld a2, 0(a0)
; ZVFH64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; ZVFH64-NEXT: ld a1, 0(a0)
; ZVFH64-NEXT: ld a2, 8(a0)
; ZVFH64-NEXT: ld a0, 16(a0)
; ZVFH64-NEXT: vmv.v.x v8, a2
; ZVFH64-NEXT: vslide1down.vx v8, v8, a1
; ZVFH64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; ZVFH64-NEXT: vmv.v.x v8, a1
; ZVFH64-NEXT: vslide1down.vx v8, v8, a2
; ZVFH64-NEXT: vslide1down.vx v8, v8, a0
; ZVFH64-NEXT: vslidedown.vi v8, v8, 1
; ZVFH64-NEXT: vadd.vv v8, v8, v8
Expand All @@ -166,12 +166,12 @@ define <3 x float> @si2fp_v3i7_v3f32(<3 x i7> %x) {
;
; ZVFHMIN32-LABEL: si2fp_v3i7_v3f32:
; ZVFHMIN32: # %bb.0:
; ZVFHMIN32-NEXT: lw a1, 4(a0)
; ZVFHMIN32-NEXT: lw a2, 0(a0)
; ZVFHMIN32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; ZVFHMIN32-NEXT: lw a1, 0(a0)
; ZVFHMIN32-NEXT: lw a2, 4(a0)
; ZVFHMIN32-NEXT: lw a0, 8(a0)
; ZVFHMIN32-NEXT: vmv.v.x v8, a2
; ZVFHMIN32-NEXT: vslide1down.vx v8, v8, a1
; ZVFHMIN32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; ZVFHMIN32-NEXT: vmv.v.x v8, a1
; ZVFHMIN32-NEXT: vslide1down.vx v8, v8, a2
; ZVFHMIN32-NEXT: vslide1down.vx v8, v8, a0
; ZVFHMIN32-NEXT: vslidedown.vi v8, v8, 1
; ZVFHMIN32-NEXT: vadd.vv v8, v8, v8
Expand All @@ -183,12 +183,12 @@ define <3 x float> @si2fp_v3i7_v3f32(<3 x i7> %x) {
;
; ZVFHMIN64-LABEL: si2fp_v3i7_v3f32:
; ZVFHMIN64: # %bb.0:
; ZVFHMIN64-NEXT: ld a1, 8(a0)
; ZVFHMIN64-NEXT: ld a2, 0(a0)
; ZVFHMIN64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; ZVFHMIN64-NEXT: ld a1, 0(a0)
; ZVFHMIN64-NEXT: ld a2, 8(a0)
; ZVFHMIN64-NEXT: ld a0, 16(a0)
; ZVFHMIN64-NEXT: vmv.v.x v8, a2
; ZVFHMIN64-NEXT: vslide1down.vx v8, v8, a1
; ZVFHMIN64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; ZVFHMIN64-NEXT: vmv.v.x v8, a1
; ZVFHMIN64-NEXT: vslide1down.vx v8, v8, a2
; ZVFHMIN64-NEXT: vslide1down.vx v8, v8, a0
; ZVFHMIN64-NEXT: vslidedown.vi v8, v8, 1
; ZVFHMIN64-NEXT: vadd.vv v8, v8, v8
Expand All @@ -205,12 +205,12 @@ define <3 x float> @si2fp_v3i7_v3f32(<3 x i7> %x) {
define <3 x float> @ui2fp_v3i7_v3f32(<3 x i7> %x) {
; ZVFH32-LABEL: ui2fp_v3i7_v3f32:
; ZVFH32: # %bb.0:
; ZVFH32-NEXT: lw a1, 4(a0)
; ZVFH32-NEXT: lw a2, 0(a0)
; ZVFH32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; ZVFH32-NEXT: lw a1, 0(a0)
; ZVFH32-NEXT: lw a2, 4(a0)
; ZVFH32-NEXT: lw a0, 8(a0)
; ZVFH32-NEXT: vmv.v.x v8, a2
; ZVFH32-NEXT: vslide1down.vx v8, v8, a1
; ZVFH32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; ZVFH32-NEXT: vmv.v.x v8, a1
; ZVFH32-NEXT: vslide1down.vx v8, v8, a2
; ZVFH32-NEXT: vslide1down.vx v8, v8, a0
; ZVFH32-NEXT: vslidedown.vi v8, v8, 1
; ZVFH32-NEXT: li a0, 127
Expand All @@ -222,12 +222,12 @@ define <3 x float> @ui2fp_v3i7_v3f32(<3 x i7> %x) {
;
; ZVFH64-LABEL: ui2fp_v3i7_v3f32:
; ZVFH64: # %bb.0:
; ZVFH64-NEXT: ld a1, 8(a0)
; ZVFH64-NEXT: ld a2, 0(a0)
; ZVFH64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; ZVFH64-NEXT: ld a1, 0(a0)
; ZVFH64-NEXT: ld a2, 8(a0)
; ZVFH64-NEXT: ld a0, 16(a0)
; ZVFH64-NEXT: vmv.v.x v8, a2
; ZVFH64-NEXT: vslide1down.vx v8, v8, a1
; ZVFH64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; ZVFH64-NEXT: vmv.v.x v8, a1
; ZVFH64-NEXT: vslide1down.vx v8, v8, a2
; ZVFH64-NEXT: vslide1down.vx v8, v8, a0
; ZVFH64-NEXT: vslidedown.vi v8, v8, 1
; ZVFH64-NEXT: li a0, 127
Expand All @@ -239,12 +239,12 @@ define <3 x float> @ui2fp_v3i7_v3f32(<3 x i7> %x) {
;
; ZVFHMIN32-LABEL: ui2fp_v3i7_v3f32:
; ZVFHMIN32: # %bb.0:
; ZVFHMIN32-NEXT: lw a1, 4(a0)
; ZVFHMIN32-NEXT: lw a2, 0(a0)
; ZVFHMIN32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; ZVFHMIN32-NEXT: lw a1, 0(a0)
; ZVFHMIN32-NEXT: lw a2, 4(a0)
; ZVFHMIN32-NEXT: lw a0, 8(a0)
; ZVFHMIN32-NEXT: vmv.v.x v8, a2
; ZVFHMIN32-NEXT: vslide1down.vx v8, v8, a1
; ZVFHMIN32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; ZVFHMIN32-NEXT: vmv.v.x v8, a1
; ZVFHMIN32-NEXT: vslide1down.vx v8, v8, a2
; ZVFHMIN32-NEXT: vslide1down.vx v8, v8, a0
; ZVFHMIN32-NEXT: vslidedown.vi v8, v8, 1
; ZVFHMIN32-NEXT: li a0, 127
Expand All @@ -256,12 +256,12 @@ define <3 x float> @ui2fp_v3i7_v3f32(<3 x i7> %x) {
;
; ZVFHMIN64-LABEL: ui2fp_v3i7_v3f32:
; ZVFHMIN64: # %bb.0:
; ZVFHMIN64-NEXT: ld a1, 8(a0)
; ZVFHMIN64-NEXT: ld a2, 0(a0)
; ZVFHMIN64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; ZVFHMIN64-NEXT: ld a1, 0(a0)
; ZVFHMIN64-NEXT: ld a2, 8(a0)
; ZVFHMIN64-NEXT: ld a0, 16(a0)
; ZVFHMIN64-NEXT: vmv.v.x v8, a2
; ZVFHMIN64-NEXT: vslide1down.vx v8, v8, a1
; ZVFHMIN64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; ZVFHMIN64-NEXT: vmv.v.x v8, a1
; ZVFHMIN64-NEXT: vslide1down.vx v8, v8, a2
; ZVFHMIN64-NEXT: vslide1down.vx v8, v8, a0
; ZVFHMIN64-NEXT: vslidedown.vi v8, v8, 1
; ZVFHMIN64-NEXT: li a0, 127
Expand Down
19 changes: 10 additions & 9 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll
Original file line number Diff line number Diff line change
Expand Up @@ -277,14 +277,14 @@ define void @insert_v8i32_v2i32_0(ptr %vp, ptr %svp) {
define void @insert_v8i32_v2i32_2(ptr %vp, ptr %svp) {
; VLA-LABEL: insert_v8i32_v2i32_2:
; VLA: # %bb.0:
; VLA-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; VLA-NEXT: vle32.v v8, (a1)
; VLA-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; VLA-NEXT: vle32.v v10, (a0)
; VLA-NEXT: vle32.v v8, (a0)
; VLA-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; VLA-NEXT: vle32.v v10, (a1)
; VLA-NEXT: vsetivli zero, 4, e32, m2, tu, ma
; VLA-NEXT: vslideup.vi v10, v8, 2
; VLA-NEXT: vslideup.vi v8, v10, 2
; VLA-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; VLA-NEXT: vse32.v v10, (a0)
; VLA-NEXT: vse32.v v8, (a0)
; VLA-NEXT: ret
;
; VLS-LABEL: insert_v8i32_v2i32_2:
Expand All @@ -306,12 +306,13 @@ define void @insert_v8i32_v2i32_2(ptr %vp, ptr %svp) {
define void @insert_v8i32_v2i32_6(ptr %vp, ptr %svp) {
; VLA-LABEL: insert_v8i32_v2i32_6:
; VLA: # %bb.0:
; VLA-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; VLA-NEXT: vle32.v v8, (a0)
; VLA-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; VLA-NEXT: vle32.v v8, (a1)
; VLA-NEXT: vle32.v v10, (a1)
; VLA-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; VLA-NEXT: vle32.v v10, (a0)
; VLA-NEXT: vslideup.vi v10, v8, 6
; VLA-NEXT: vse32.v v10, (a0)
; VLA-NEXT: vslideup.vi v8, v10, 6
; VLA-NEXT: vse32.v v8, (a0)
; VLA-NEXT: ret
;
; VLS-LABEL: insert_v8i32_v2i32_6:
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
Original file line number Diff line number Diff line change
Expand Up @@ -533,11 +533,11 @@ define void @insertelt_c6_v8i64_0_add(ptr %x, ptr %y) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: li a2, 6
; CHECK-NEXT: vle64.v v12, (a1)
; CHECK-NEXT: li a1, 6
; CHECK-NEXT: vsetvli zero, zero, e64, m4, tu, ma
; CHECK-NEXT: vmv.s.x v8, a2
; CHECK-NEXT: vmv.s.x v8, a1
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; CHECK-NEXT: vle64.v v12, (a1)
; CHECK-NEXT: vadd.vv v8, v8, v12
; CHECK-NEXT: vse64.v v8, (a0)
; CHECK-NEXT: ret
Expand Down
177 changes: 89 additions & 88 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -669,13 +669,14 @@ define void @buildvec_seq_v9i8(ptr %x) {
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vmv.s.x v0, a1
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vmv.v.i v8, 3
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vmv.v.i v9, 3
; CHECK-NEXT: li a1, 146
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vmv.s.x v0, a1
; CHECK-NEXT: vmv.s.x v8, a1
; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
; CHECK-NEXT: vmerge.vim v8, v8, 2, v0
; CHECK-NEXT: vmerge.vim v9, v9, 1, v0
; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmerge.vim v8, v9, 2, v0
; CHECK-NEXT: vsetivli zero, 9, e8, m1, ta, ma
; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: ret
Expand Down Expand Up @@ -1183,42 +1184,42 @@ define <8 x i64> @v8xi64_exact_undef_prefix(i64 %a, i64 %b, i64 %c, i64 %d) vsca
define <16 x i8> @buildvec_v16i8_loads_contigous(ptr %p) {
; CHECK-LABEL: buildvec_v16i8_loads_contigous:
; CHECK: # %bb.0:
; CHECK-NEXT: lbu a1, 1(a0)
; CHECK-NEXT: lbu a2, 2(a0)
; CHECK-NEXT: lbu a3, 3(a0)
; CHECK-NEXT: lbu a4, 4(a0)
; CHECK-NEXT: lbu a5, 5(a0)
; CHECK-NEXT: lbu a6, 6(a0)
; CHECK-NEXT: lbu a7, 7(a0)
; CHECK-NEXT: lbu t0, 9(a0)
; CHECK-NEXT: lbu t1, 10(a0)
; CHECK-NEXT: lbu t2, 11(a0)
; CHECK-NEXT: lbu t3, 12(a0)
; CHECK-NEXT: lbu t4, 13(a0)
; CHECK-NEXT: lbu t5, 14(a0)
; CHECK-NEXT: lbu t6, 15(a0)
; CHECK-NEXT: addi a1, a0, 8
; CHECK-NEXT: lbu a2, 1(a0)
; CHECK-NEXT: lbu a3, 2(a0)
; CHECK-NEXT: lbu a4, 3(a0)
; CHECK-NEXT: lbu a5, 4(a0)
; CHECK-NEXT: lbu a6, 5(a0)
; CHECK-NEXT: lbu a7, 6(a0)
; CHECK-NEXT: lbu t0, 7(a0)
; CHECK-NEXT: lbu t1, 9(a0)
; CHECK-NEXT: lbu t2, 10(a0)
; CHECK-NEXT: lbu t3, 11(a0)
; CHECK-NEXT: lbu t4, 12(a0)
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vlse8.v v8, (a0), zero
; CHECK-NEXT: addi a0, a0, 8
; CHECK-NEXT: vslide1down.vx v8, v8, a1
; CHECK-NEXT: lbu t5, 13(a0)
; CHECK-NEXT: lbu t6, 14(a0)
; CHECK-NEXT: lbu a0, 15(a0)
; CHECK-NEXT: vslide1down.vx v8, v8, a2
; CHECK-NEXT: vslide1down.vx v8, v8, a3
; CHECK-NEXT: vslide1down.vx v8, v8, a4
; CHECK-NEXT: vlse8.v v9, (a0), zero
; CHECK-NEXT: vslide1down.vx v8, v8, a5
; CHECK-NEXT: vlse8.v v9, (a1), zero
; CHECK-NEXT: vslide1down.vx v8, v8, a6
; CHECK-NEXT: vslide1down.vx v10, v8, a7
; CHECK-NEXT: vslide1down.vx v8, v9, t0
; CHECK-NEXT: vslide1down.vx v8, v8, t1
; CHECK-NEXT: vslide1down.vx v8, v8, a7
; CHECK-NEXT: vslide1down.vx v10, v8, t0
; CHECK-NEXT: vslide1down.vx v8, v9, t1
; CHECK-NEXT: vslide1down.vx v8, v8, t2
; CHECK-NEXT: vslide1down.vx v8, v8, t3
; CHECK-NEXT: vslide1down.vx v8, v8, t4
; CHECK-NEXT: vslide1down.vx v8, v8, t5
; CHECK-NEXT: vslide1down.vx v8, v8, t6
; CHECK-NEXT: li a0, 255
; CHECK-NEXT: li a1, 255
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vmv.s.x v0, a0
; CHECK-NEXT: vmv.s.x v0, a1
; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu
; CHECK-NEXT: vslide1down.vx v8, v8, a0
; CHECK-NEXT: vslidedown.vi v8, v10, 8, v0.t
; CHECK-NEXT: ret
%p2 = getelementptr i8, ptr %p, i32 1
Expand Down Expand Up @@ -1277,42 +1278,42 @@ define <16 x i8> @buildvec_v16i8_loads_contigous(ptr %p) {
define <16 x i8> @buildvec_v16i8_loads_gather(ptr %p) {
; CHECK-LABEL: buildvec_v16i8_loads_gather:
; CHECK: # %bb.0:
; CHECK-NEXT: lbu a1, 1(a0)
; CHECK-NEXT: lbu a2, 22(a0)
; CHECK-NEXT: lbu a3, 31(a0)
; CHECK-NEXT: lbu a4, 44(a0)
; CHECK-NEXT: lbu a5, 55(a0)
; CHECK-NEXT: lbu a6, 623(a0)
; CHECK-NEXT: lbu a7, 75(a0)
; CHECK-NEXT: lbu t0, 93(a0)
; CHECK-NEXT: lbu t1, 105(a0)
; CHECK-NEXT: lbu t2, 161(a0)
; CHECK-NEXT: lbu t3, 124(a0)
; CHECK-NEXT: lbu t4, 163(a0)
; CHECK-NEXT: lbu t5, 144(a0)
; CHECK-NEXT: lbu t6, 154(a0)
; CHECK-NEXT: addi a1, a0, 82
; CHECK-NEXT: lbu a2, 1(a0)
; CHECK-NEXT: lbu a3, 22(a0)
; CHECK-NEXT: lbu a4, 31(a0)
; CHECK-NEXT: lbu a5, 44(a0)
; CHECK-NEXT: lbu a6, 55(a0)
; CHECK-NEXT: lbu a7, 623(a0)
; CHECK-NEXT: lbu t0, 75(a0)
; CHECK-NEXT: lbu t1, 93(a0)
; CHECK-NEXT: lbu t2, 105(a0)
; CHECK-NEXT: lbu t3, 161(a0)
; CHECK-NEXT: lbu t4, 124(a0)
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vlse8.v v8, (a0), zero
; CHECK-NEXT: addi a0, a0, 82
; CHECK-NEXT: vslide1down.vx v8, v8, a1
; CHECK-NEXT: lbu t5, 163(a0)
; CHECK-NEXT: lbu t6, 144(a0)
; CHECK-NEXT: lbu a0, 154(a0)
; CHECK-NEXT: vslide1down.vx v8, v8, a2
; CHECK-NEXT: vslide1down.vx v8, v8, a3
; CHECK-NEXT: vslide1down.vx v8, v8, a4
; CHECK-NEXT: vlse8.v v9, (a0), zero
; CHECK-NEXT: vslide1down.vx v8, v8, a5
; CHECK-NEXT: vlse8.v v9, (a1), zero
; CHECK-NEXT: vslide1down.vx v8, v8, a6
; CHECK-NEXT: vslide1down.vx v10, v8, a7
; CHECK-NEXT: vslide1down.vx v8, v9, t0
; CHECK-NEXT: vslide1down.vx v8, v8, t1
; CHECK-NEXT: vslide1down.vx v8, v8, a7
; CHECK-NEXT: vslide1down.vx v10, v8, t0
; CHECK-NEXT: vslide1down.vx v8, v9, t1
; CHECK-NEXT: vslide1down.vx v8, v8, t2
; CHECK-NEXT: vslide1down.vx v8, v8, t3
; CHECK-NEXT: vslide1down.vx v8, v8, t4
; CHECK-NEXT: vslide1down.vx v8, v8, t5
; CHECK-NEXT: vslide1down.vx v8, v8, t6
; CHECK-NEXT: li a0, 255
; CHECK-NEXT: li a1, 255
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vmv.s.x v0, a0
; CHECK-NEXT: vmv.s.x v0, a1
; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu
; CHECK-NEXT: vslide1down.vx v8, v8, a0
; CHECK-NEXT: vslidedown.vi v8, v10, 8, v0.t
; CHECK-NEXT: ret
%p2 = getelementptr i8, ptr %p, i32 1
Expand Down Expand Up @@ -1375,17 +1376,17 @@ define <16 x i8> @buildvec_v16i8_undef_low_half(ptr %p) {
; CHECK-NEXT: lbu a3, 105(a0)
; CHECK-NEXT: lbu a4, 161(a0)
; CHECK-NEXT: lbu a5, 124(a0)
; CHECK-NEXT: lbu a6, 163(a0)
; CHECK-NEXT: lbu a7, 144(a0)
; CHECK-NEXT: lbu a0, 154(a0)
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vlse8.v v8, (a1), zero
; CHECK-NEXT: lbu a1, 163(a0)
; CHECK-NEXT: lbu a6, 144(a0)
; CHECK-NEXT: lbu a0, 154(a0)
; CHECK-NEXT: vslide1down.vx v8, v8, a2
; CHECK-NEXT: vslide1down.vx v8, v8, a3
; CHECK-NEXT: vslide1down.vx v8, v8, a4
; CHECK-NEXT: vslide1down.vx v8, v8, a5
; CHECK-NEXT: vslide1down.vx v8, v8, a1
; CHECK-NEXT: vslide1down.vx v8, v8, a6
; CHECK-NEXT: vslide1down.vx v8, v8, a7
; CHECK-NEXT: vslide1down.vx v8, v8, a0
; CHECK-NEXT: ret
%p9 = getelementptr i8, ptr %p, i32 82
Expand Down Expand Up @@ -1424,18 +1425,18 @@ define <16 x i8> @buildvec_v16i8_undef_high_half(ptr %p) {
; CHECK-NEXT: lbu a2, 22(a0)
; CHECK-NEXT: lbu a3, 31(a0)
; CHECK-NEXT: lbu a4, 44(a0)
; CHECK-NEXT: lbu a5, 55(a0)
; CHECK-NEXT: lbu a6, 623(a0)
; CHECK-NEXT: lbu a7, 75(a0)
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vlse8.v v8, (a0), zero
; CHECK-NEXT: lbu a5, 55(a0)
; CHECK-NEXT: lbu a6, 623(a0)
; CHECK-NEXT: lbu a0, 75(a0)
; CHECK-NEXT: vslide1down.vx v8, v8, a1
; CHECK-NEXT: vslide1down.vx v8, v8, a2
; CHECK-NEXT: vslide1down.vx v8, v8, a3
; CHECK-NEXT: vslide1down.vx v8, v8, a4
; CHECK-NEXT: vslide1down.vx v8, v8, a5
; CHECK-NEXT: vslide1down.vx v8, v8, a6
; CHECK-NEXT: vslide1down.vx v8, v8, a7
; CHECK-NEXT: vslide1down.vx v8, v8, a0
; CHECK-NEXT: vslidedown.vi v8, v8, 8
; CHECK-NEXT: ret
%p2 = getelementptr i8, ptr %p, i32 1
Expand Down Expand Up @@ -1470,29 +1471,29 @@ define <16 x i8> @buildvec_v16i8_undef_edges(ptr %p) {
; CHECK-LABEL: buildvec_v16i8_undef_edges:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a1, a0, 31
; CHECK-NEXT: lbu a2, 44(a0)
; CHECK-NEXT: lbu a3, 55(a0)
; CHECK-NEXT: lbu a4, 623(a0)
; CHECK-NEXT: lbu a5, 75(a0)
; CHECK-NEXT: lbu a6, 93(a0)
; CHECK-NEXT: lbu a7, 105(a0)
; CHECK-NEXT: lbu t0, 161(a0)
; CHECK-NEXT: addi a2, a0, 82
; CHECK-NEXT: lbu a3, 44(a0)
; CHECK-NEXT: lbu a4, 55(a0)
; CHECK-NEXT: lbu a5, 623(a0)
; CHECK-NEXT: lbu a6, 75(a0)
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vlse8.v v8, (a1), zero
; CHECK-NEXT: addi a0, a0, 82
; CHECK-NEXT: vslide1down.vx v8, v8, a2
; CHECK-NEXT: vlse8.v v9, (a0), zero
; CHECK-NEXT: lbu a1, 93(a0)
; CHECK-NEXT: lbu a7, 105(a0)
; CHECK-NEXT: lbu a0, 161(a0)
; CHECK-NEXT: vslide1down.vx v8, v8, a3
; CHECK-NEXT: vlse8.v v9, (a2), zero
; CHECK-NEXT: vslide1down.vx v8, v8, a4
; CHECK-NEXT: vslide1down.vx v10, v8, a5
; CHECK-NEXT: vslide1down.vx v8, v9, a6
; CHECK-NEXT: vslide1down.vx v8, v8, a5
; CHECK-NEXT: vslide1down.vx v10, v8, a6
; CHECK-NEXT: vslide1down.vx v8, v9, a1
; CHECK-NEXT: vslide1down.vx v8, v8, a7
; CHECK-NEXT: vslide1down.vx v8, v8, t0
; CHECK-NEXT: vslidedown.vi v8, v8, 4
; CHECK-NEXT: vslide1down.vx v8, v8, a0
; CHECK-NEXT: li a0, 255
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vmv.s.x v0, a0
; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu
; CHECK-NEXT: vslidedown.vi v8, v8, 4
; CHECK-NEXT: vslidedown.vi v8, v10, 8, v0.t
; CHECK-NEXT: ret
%p4 = getelementptr i8, ptr %p, i32 31
Expand Down Expand Up @@ -1530,34 +1531,34 @@ define <16 x i8> @buildvec_v16i8_undef_edges(ptr %p) {
define <16 x i8> @buildvec_v16i8_loads_undef_scattered(ptr %p) {
; CHECK-LABEL: buildvec_v16i8_loads_undef_scattered:
; CHECK: # %bb.0:
; CHECK-NEXT: lbu a1, 1(a0)
; CHECK-NEXT: lbu a2, 44(a0)
; CHECK-NEXT: lbu a3, 55(a0)
; CHECK-NEXT: lbu a4, 75(a0)
; CHECK-NEXT: lbu a5, 93(a0)
; CHECK-NEXT: lbu a6, 124(a0)
; CHECK-NEXT: lbu a7, 144(a0)
; CHECK-NEXT: lbu t0, 154(a0)
; CHECK-NEXT: addi a1, a0, 82
; CHECK-NEXT: lbu a2, 1(a0)
; CHECK-NEXT: lbu a3, 44(a0)
; CHECK-NEXT: lbu a4, 55(a0)
; CHECK-NEXT: lbu a5, 75(a0)
; CHECK-NEXT: lbu a6, 93(a0)
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vlse8.v v8, (a0), zero
; CHECK-NEXT: addi a0, a0, 82
; CHECK-NEXT: vslide1down.vx v8, v8, a1
; CHECK-NEXT: vslidedown.vi v8, v8, 2
; CHECK-NEXT: lbu a7, 124(a0)
; CHECK-NEXT: lbu t0, 144(a0)
; CHECK-NEXT: lbu a0, 154(a0)
; CHECK-NEXT: vslide1down.vx v8, v8, a2
; CHECK-NEXT: vlse8.v v9, (a0), zero
; CHECK-NEXT: vslidedown.vi v8, v8, 2
; CHECK-NEXT: vslide1down.vx v8, v8, a3
; CHECK-NEXT: vlse8.v v9, (a1), zero
; CHECK-NEXT: vslide1down.vx v8, v8, a4
; CHECK-NEXT: vslidedown.vi v8, v8, 1
; CHECK-NEXT: vslide1down.vx v10, v8, a4
; CHECK-NEXT: vslide1down.vx v8, v9, a5
; CHECK-NEXT: vslide1down.vx v10, v8, a5
; CHECK-NEXT: vslide1down.vx v8, v9, a6
; CHECK-NEXT: vslidedown.vi v8, v8, 2
; CHECK-NEXT: vslide1down.vx v8, v8, a6
; CHECK-NEXT: vslidedown.vi v8, v8, 1
; CHECK-NEXT: vslide1down.vx v8, v8, a7
; CHECK-NEXT: vslidedown.vi v8, v8, 1
; CHECK-NEXT: vslide1down.vx v8, v8, t0
; CHECK-NEXT: li a0, 255
; CHECK-NEXT: li a1, 255
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vmv.s.x v0, a0
; CHECK-NEXT: vmv.s.x v0, a1
; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu
; CHECK-NEXT: vslide1down.vx v8, v8, a0
; CHECK-NEXT: vslidedown.vi v8, v10, 8, v0.t
; CHECK-NEXT: ret
%p2 = getelementptr i8, ptr %p, i32 1
Expand Down
181 changes: 88 additions & 93 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-explodevector.ll
Original file line number Diff line number Diff line change
Expand Up @@ -828,112 +828,104 @@ define i64 @explode_8xi64(<8 x i64> %v) {
define i64 @explode_16xi64(<16 x i64> %v) {
; RV32-LABEL: explode_16xi64:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -64
; RV32-NEXT: .cfi_def_cfa_offset 64
; RV32-NEXT: sw ra, 60(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s0, 56(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s1, 52(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s2, 48(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s3, 44(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s4, 40(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s5, 36(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s6, 32(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s7, 28(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s8, 24(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s9, 20(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s10, 16(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s11, 12(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset ra, -4
; RV32-NEXT: .cfi_offset s0, -8
; RV32-NEXT: .cfi_offset s1, -12
; RV32-NEXT: .cfi_offset s2, -16
; RV32-NEXT: .cfi_offset s3, -20
; RV32-NEXT: .cfi_offset s4, -24
; RV32-NEXT: .cfi_offset s5, -28
; RV32-NEXT: .cfi_offset s6, -32
; RV32-NEXT: .cfi_offset s7, -36
; RV32-NEXT: .cfi_offset s8, -40
; RV32-NEXT: .cfi_offset s9, -44
; RV32-NEXT: .cfi_offset s10, -48
; RV32-NEXT: .cfi_offset s11, -52
; RV32-NEXT: addi sp, sp, -48
; RV32-NEXT: .cfi_def_cfa_offset 48
; RV32-NEXT: sw s0, 44(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s1, 40(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s2, 36(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s3, 32(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s4, 28(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s5, 24(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s6, 20(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s7, 16(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s8, 12(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s9, 8(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s10, 4(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s11, 0(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset s0, -4
; RV32-NEXT: .cfi_offset s1, -8
; RV32-NEXT: .cfi_offset s2, -12
; RV32-NEXT: .cfi_offset s3, -16
; RV32-NEXT: .cfi_offset s4, -20
; RV32-NEXT: .cfi_offset s5, -24
; RV32-NEXT: .cfi_offset s6, -28
; RV32-NEXT: .cfi_offset s7, -32
; RV32-NEXT: .cfi_offset s8, -36
; RV32-NEXT: .cfi_offset s9, -40
; RV32-NEXT: .cfi_offset s10, -44
; RV32-NEXT: .cfi_offset s11, -48
; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma
; RV32-NEXT: vslidedown.vi v16, v8, 2
; RV32-NEXT: li a3, 32
; RV32-NEXT: vsrl.vx v24, v16, a3
; RV32-NEXT: vmv.x.s a0, v24
; RV32-NEXT: vmv.x.s a1, v16
; RV32-NEXT: sw a1, 8(sp) # 4-byte Folded Spill
; RV32-NEXT: vslidedown.vi v16, v8, 3
; RV32-NEXT: vsrl.vx v24, v16, a3
; RV32-NEXT: li a0, 32
; RV32-NEXT: vsrl.vx v24, v16, a0
; RV32-NEXT: vmv.x.s a1, v24
; RV32-NEXT: sw a1, 4(sp) # 4-byte Folded Spill
; RV32-NEXT: vmv.x.s a2, v16
; RV32-NEXT: vslidedown.vi v16, v8, 3
; RV32-NEXT: vsrl.vx v24, v16, a0
; RV32-NEXT: vmv.x.s a3, v24
; RV32-NEXT: vmv.x.s a4, v16
; RV32-NEXT: vslidedown.vi v16, v8, 4
; RV32-NEXT: vsrl.vx v24, v16, a3
; RV32-NEXT: vsrl.vx v24, v16, a0
; RV32-NEXT: vmv.x.s a5, v24
; RV32-NEXT: vmv.x.s a6, v16
; RV32-NEXT: vslidedown.vi v16, v8, 5
; RV32-NEXT: vsrl.vx v24, v16, a3
; RV32-NEXT: vsrl.vx v24, v16, a0
; RV32-NEXT: vmv.x.s a7, v24
; RV32-NEXT: vmv.x.s t0, v16
; RV32-NEXT: vslidedown.vi v16, v8, 6
; RV32-NEXT: vsrl.vx v24, v16, a3
; RV32-NEXT: vsrl.vx v24, v16, a0
; RV32-NEXT: vmv.x.s t1, v24
; RV32-NEXT: vmv.x.s t2, v16
; RV32-NEXT: vslidedown.vi v16, v8, 7
; RV32-NEXT: vsrl.vx v24, v16, a3
; RV32-NEXT: vsrl.vx v24, v16, a0
; RV32-NEXT: vmv.x.s t3, v24
; RV32-NEXT: vmv.x.s t4, v16
; RV32-NEXT: vslidedown.vi v16, v8, 8
; RV32-NEXT: vsrl.vx v24, v16, a3
; RV32-NEXT: vsrl.vx v24, v16, a0
; RV32-NEXT: vmv.x.s t5, v24
; RV32-NEXT: vmv.x.s t6, v16
; RV32-NEXT: vslidedown.vi v16, v8, 9
; RV32-NEXT: vsrl.vx v24, v16, a3
; RV32-NEXT: vsrl.vx v24, v16, a0
; RV32-NEXT: vmv.x.s s0, v24
; RV32-NEXT: vmv.x.s s1, v16
; RV32-NEXT: vslidedown.vi v16, v8, 10
; RV32-NEXT: vsrl.vx v24, v16, a3
; RV32-NEXT: vsrl.vx v24, v16, a0
; RV32-NEXT: vmv.x.s s2, v24
; RV32-NEXT: vmv.x.s s3, v16
; RV32-NEXT: vslidedown.vi v16, v8, 11
; RV32-NEXT: vsrl.vx v24, v16, a3
; RV32-NEXT: vsrl.vx v24, v16, a0
; RV32-NEXT: vmv.x.s s4, v24
; RV32-NEXT: vmv.x.s s5, v16
; RV32-NEXT: vslidedown.vi v16, v8, 12
; RV32-NEXT: vsrl.vx v24, v16, a3
; RV32-NEXT: vsrl.vx v24, v16, a0
; RV32-NEXT: vmv.x.s s6, v24
; RV32-NEXT: vmv.x.s s7, v16
; RV32-NEXT: vslidedown.vi v16, v8, 13
; RV32-NEXT: vsrl.vx v24, v16, a3
; RV32-NEXT: vmv.x.s s8, v24
; RV32-NEXT: vmv.x.s s9, v16
; RV32-NEXT: vsrl.vx v24, v16, a0
; RV32-NEXT: vmv.x.s s9, v24
; RV32-NEXT: vmv.x.s s8, v16
; RV32-NEXT: vslidedown.vi v16, v8, 14
; RV32-NEXT: vsrl.vx v24, v16, a3
; RV32-NEXT: vmv.x.s s10, v24
; RV32-NEXT: vmv.x.s s11, v16
; RV32-NEXT: vslidedown.vi v16, v8, 15
; RV32-NEXT: vsrl.vx v24, v16, a3
; RV32-NEXT: vmv.x.s ra, v24
; RV32-NEXT: vmv.s.x v9, zero
; RV32-NEXT: vmv.x.s a2, v16
; RV32-NEXT: vsrl.vx v24, v16, a0
; RV32-NEXT: vmv.s.x v17, zero
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vredxor.vs v8, v8, v9
; RV32-NEXT: vredxor.vs v17, v8, v17
; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma
; RV32-NEXT: vslidedown.vi v8, v8, 15
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vsrl.vx v9, v8, a3
; RV32-NEXT: vmv.x.s a3, v9
; RV32-NEXT: add a3, a3, a0
; RV32-NEXT: vmv.x.s a1, v8
; RV32-NEXT: lw a0, 8(sp) # 4-byte Folded Reload
; RV32-NEXT: add a0, a1, a0
; RV32-NEXT: sltu a1, a0, a1
; RV32-NEXT: add a1, a3, a1
; RV32-NEXT: lw a3, 4(sp) # 4-byte Folded Reload
; RV32-NEXT: add a1, a1, a3
; RV32-NEXT: add a4, a0, a4
; RV32-NEXT: sltu a0, a4, a0
; RV32-NEXT: add a0, a0, a5
; RV32-NEXT: vsrl.vx v18, v17, a0
; RV32-NEXT: vmv.x.s s10, v18
; RV32-NEXT: vmv.x.s s11, v17
; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma
; RV32-NEXT: vsrl.vx v0, v8, a0
; RV32-NEXT: add a1, s10, a1
; RV32-NEXT: add a2, s11, a2
; RV32-NEXT: sltu a0, a2, s11
; RV32-NEXT: add a0, a1, a0
; RV32-NEXT: add a0, a0, a3
; RV32-NEXT: add a4, a2, a4
; RV32-NEXT: sltu a1, a4, a2
; RV32-NEXT: add a1, a1, a5
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a6, a4, a6
; RV32-NEXT: sltu a1, a6, a4
; RV32-NEXT: add a1, a1, a7
Expand Down Expand Up @@ -968,33 +960,36 @@ define i64 @explode_16xi64(<16 x i64> %v) {
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add s7, s5, s7
; RV32-NEXT: sltu a1, s7, s5
; RV32-NEXT: add a1, a1, s8
; RV32-NEXT: add a1, a1, s9
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add s9, s7, s9
; RV32-NEXT: sltu a1, s9, s7
; RV32-NEXT: add a1, a1, s10
; RV32-NEXT: vmv.x.s a1, v24
; RV32-NEXT: add s8, s7, s8
; RV32-NEXT: sltu a2, s8, s7
; RV32-NEXT: add a1, a2, a1
; RV32-NEXT: vmv.x.s a2, v16
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add s11, s9, s11
; RV32-NEXT: sltu a1, s11, s9
; RV32-NEXT: add a1, a1, ra
; RV32-NEXT: vmv.x.s a1, v0
; RV32-NEXT: add a2, s8, a2
; RV32-NEXT: sltu a3, a2, s8
; RV32-NEXT: add a1, a3, a1
; RV32-NEXT: add a1, a0, a1
; RV32-NEXT: add a0, s11, a2
; RV32-NEXT: sltu a2, a0, s11
; RV32-NEXT: vmv.x.s a0, v8
; RV32-NEXT: add a0, a2, a0
; RV32-NEXT: sltu a2, a0, a2
; RV32-NEXT: add a1, a1, a2
; RV32-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 56(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s1, 52(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s2, 48(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s3, 44(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s4, 40(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s5, 36(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s6, 32(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s7, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s8, 24(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s9, 20(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s10, 16(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s11, 12(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 64
; RV32-NEXT: lw s0, 44(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s1, 40(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s2, 36(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s3, 32(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s4, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s5, 24(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s6, 20(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s7, 16(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s8, 12(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s9, 8(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s10, 4(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s11, 0(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 48
; RV32-NEXT: ret
;
; RV64-LABEL: explode_16xi64:
Expand Down
14 changes: 7 additions & 7 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,9 @@ define <4 x i64> @interleave_v2i64(<2 x i64> %x, <2 x i64> %y) {
; RV32-V512-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
; RV32-V512-NEXT: vid.v v10
; RV32-V512-NEXT: vsrl.vi v11, v10, 1
; RV32-V512-NEXT: vmv.v.i v0, 10
; RV32-V512-NEXT: vsetvli zero, zero, e64, m1, ta, mu
; RV32-V512-NEXT: vrgatherei16.vv v10, v8, v11
; RV32-V512-NEXT: vmv.v.i v0, 10
; RV32-V512-NEXT: vrgatherei16.vv v10, v9, v11, v0.t
; RV32-V512-NEXT: vmv.v.v v8, v10
; RV32-V512-NEXT: ret
Expand All @@ -81,8 +81,8 @@ define <4 x i64> @interleave_v2i64(<2 x i64> %x, <2 x i64> %y) {
; RV64-V512-NEXT: vsetivli zero, 4, e64, m1, ta, mu
; RV64-V512-NEXT: vid.v v10
; RV64-V512-NEXT: vsrl.vi v11, v10, 1
; RV64-V512-NEXT: vrgather.vv v10, v8, v11
; RV64-V512-NEXT: vmv.v.i v0, 10
; RV64-V512-NEXT: vrgather.vv v10, v8, v11
; RV64-V512-NEXT: vrgather.vv v10, v9, v11, v0.t
; RV64-V512-NEXT: vmv.v.v v8, v10
; RV64-V512-NEXT: ret
Expand Down Expand Up @@ -195,8 +195,8 @@ define <4 x i32> @interleave_v4i32_offset_1(<4 x i32> %x, <4 x i32> %y) {
; V128-NEXT: vsetivli zero, 4, e32, m1, ta, mu
; V128-NEXT: vid.v v8
; V128-NEXT: vsrl.vi v8, v8, 1
; V128-NEXT: vadd.vi v8, v8, 1
; V128-NEXT: vmv.v.i v0, 10
; V128-NEXT: vadd.vi v8, v8, 1
; V128-NEXT: vrgather.vv v10, v9, v8, v0.t
; V128-NEXT: vmv.v.v v8, v10
; V128-NEXT: ret
Expand All @@ -210,8 +210,8 @@ define <4 x i32> @interleave_v4i32_offset_1(<4 x i32> %x, <4 x i32> %y) {
; V512-NEXT: vsetivli zero, 4, e32, mf2, ta, mu
; V512-NEXT: vid.v v8
; V512-NEXT: vsrl.vi v8, v8, 1
; V512-NEXT: vadd.vi v8, v8, 1
; V512-NEXT: vmv.v.i v0, 10
; V512-NEXT: vadd.vi v8, v8, 1
; V512-NEXT: vrgather.vv v10, v9, v8, v0.t
; V512-NEXT: vmv1r.v v8, v10
; V512-NEXT: ret
Expand Down Expand Up @@ -426,13 +426,13 @@ define <64 x i32> @interleave_v32i32(<32 x i32> %x, <32 x i32> %y) {
; V128-NEXT: vwmaccu.vx v8, a0, v16
; V128-NEXT: lui a1, 699051
; V128-NEXT: addi a1, a1, -1366
; V128-NEXT: li a2, 32
; V128-NEXT: vmv.s.x v0, a1
; V128-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; V128-NEXT: li a1, 32
; V128-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; V128-NEXT: vmerge.vvm v24, v8, v24, v0
; V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; V128-NEXT: addi a1, sp, 16
; V128-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
; V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; V128-NEXT: vwaddu.vv v0, v16, v8
; V128-NEXT: vwmaccu.vx v0, a0, v8
; V128-NEXT: vmv8r.v v8, v0
Expand Down
60 changes: 30 additions & 30 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,8 @@ define <4 x i16> @vrgather_shuffle_vv_v4i16(<4 x i16> %x, <4 x i16> %y) {
; CHECK-NEXT: addi a0, a0, %lo(.LCPI6_0)
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
; CHECK-NEXT: vle16.v v11, (a0)
; CHECK-NEXT: vrgather.vv v10, v8, v11
; CHECK-NEXT: vmv.v.i v0, 8
; CHECK-NEXT: vrgather.vv v10, v8, v11
; CHECK-NEXT: vrgather.vi v10, v9, 1, v0.t
; CHECK-NEXT: vmv1r.v v8, v10
; CHECK-NEXT: ret
Expand Down Expand Up @@ -162,22 +162,21 @@ define <8 x i64> @vrgather_shuffle_vv_v8i64(<8 x i64> %x, <8 x i64> %y) {
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV32-NEXT: vmv.v.i v16, 2
; RV32-NEXT: lui a0, %hi(.LCPI11_0)
; RV32-NEXT: addi a0, a0, %lo(.LCPI11_0)
; RV32-NEXT: vle16.v v20, (a0)
; RV32-NEXT: li a0, 5
; RV32-NEXT: lui a1, %hi(.LCPI11_0)
; RV32-NEXT: addi a1, a1, %lo(.LCPI11_0)
; RV32-NEXT: vle16.v v20, (a1)
; RV32-NEXT: vslide1down.vx v21, v16, a0
; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; RV32-NEXT: vrgatherei16.vv v16, v8, v20
; RV32-NEXT: li a0, 164
; RV32-NEXT: vmv.s.x v0, a0
; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; RV32-NEXT: vrgatherei16.vv v16, v8, v20
; RV32-NEXT: vrgatherei16.vv v16, v12, v21, v0.t
; RV32-NEXT: vmv.v.v v8, v16
; RV32-NEXT: ret
;
; RV64-LABEL: vrgather_shuffle_vv_v8i64:
; RV64: # %bb.0:
; RV64-NEXT: vmv4r.v v16, v8
; RV64-NEXT: lui a0, 327683
; RV64-NEXT: slli a0, a0, 3
; RV64-NEXT: addi a0, a0, 1
Expand All @@ -186,17 +185,18 @@ define <8 x i64> @vrgather_shuffle_vv_v8i64(<8 x i64> %x, <8 x i64> %y) {
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV64-NEXT: vmv.v.x v20, a0
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64-NEXT: vrgatherei16.vv v8, v16, v20
; RV64-NEXT: vrgatherei16.vv v16, v8, v20
; RV64-NEXT: li a0, 164
; RV64-NEXT: vmv.s.x v0, a0
; RV64-NEXT: lui a0, 163841
; RV64-NEXT: slli a0, a0, 4
; RV64-NEXT: addi a0, a0, 1
; RV64-NEXT: slli a0, a0, 17
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV64-NEXT: vmv.v.x v16, a0
; RV64-NEXT: vmv.v.x v8, a0
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64-NEXT: vrgatherei16.vv v8, v12, v16, v0.t
; RV64-NEXT: vrgatherei16.vv v16, v12, v8, v0.t
; RV64-NEXT: vmv.v.v v8, v16
; RV64-NEXT: ret
%s = shufflevector <8 x i64> %x, <8 x i64> %y, <8 x i32> <i32 1, i32 2, i32 10, i32 5, i32 1, i32 10, i32 3, i32 13>
ret <8 x i64> %s
Expand All @@ -210,13 +210,13 @@ define <8 x i64> @vrgather_shuffle_xv_v8i64(<8 x i64> %x) {
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV32-NEXT: vle16.v v16, (a0)
; RV32-NEXT: vmv.v.i v20, -1
; RV32-NEXT: vrgatherei16.vv v12, v20, v16
; RV32-NEXT: lui a0, %hi(.LCPI12_1)
; RV32-NEXT: addi a0, a0, %lo(.LCPI12_1)
; RV32-NEXT: vle16.v v16, (a0)
; RV32-NEXT: vle16.v v17, (a0)
; RV32-NEXT: li a0, 113
; RV32-NEXT: vmv.s.x v0, a0
; RV32-NEXT: vrgatherei16.vv v12, v8, v16, v0.t
; RV32-NEXT: vrgatherei16.vv v12, v20, v16
; RV32-NEXT: vrgatherei16.vv v12, v8, v17, v0.t
; RV32-NEXT: vmv.v.v v8, v12
; RV32-NEXT: ret
;
Expand Down Expand Up @@ -367,10 +367,10 @@ define <8 x i8> @splat_ve4_ins_i1ve3(<8 x i8> %v) {
define <8 x i8> @splat_ve2_we0(<8 x i8> %v, <8 x i8> %w) {
; CHECK-LABEL: splat_ve2_we0:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vrgather.vi v10, v8, 2
; CHECK-NEXT: li a0, 66
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vmv.s.x v0, a0
; CHECK-NEXT: vrgather.vi v10, v8, 2
; CHECK-NEXT: vrgather.vi v10, v9, 0, v0.t
; CHECK-NEXT: vmv1r.v v8, v10
; CHECK-NEXT: ret
Expand All @@ -386,10 +386,10 @@ define <8 x i8> @splat_ve2_we0_ins_i0ve4(<8 x i8> %v, <8 x i8> %w) {
; CHECK-NEXT: li a0, 4
; CHECK-NEXT: vsetvli zero, zero, e8, mf2, tu, ma
; CHECK-NEXT: vmv.s.x v11, a0
; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu
; CHECK-NEXT: vrgather.vv v10, v8, v11
; CHECK-NEXT: li a0, 66
; CHECK-NEXT: vmv.s.x v0, a0
; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu
; CHECK-NEXT: vrgather.vv v10, v8, v11
; CHECK-NEXT: vrgather.vi v10, v9, 0, v0.t
; CHECK-NEXT: vmv1r.v v8, v10
; CHECK-NEXT: ret
Expand All @@ -402,10 +402,10 @@ define <8 x i8> @splat_ve2_we0_ins_i0we4(<8 x i8> %v, <8 x i8> %w) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vrgather.vi v10, v8, 2
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vmv.v.i v8, 4
; CHECK-NEXT: li a0, 67
; CHECK-NEXT: vmv.s.x v0, a0
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vmv.v.i v8, 4
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t
; CHECK-NEXT: vmv1r.v v8, v10
Expand All @@ -421,10 +421,10 @@ define <8 x i8> @splat_ve2_we0_ins_i2ve4(<8 x i8> %v, <8 x i8> %w) {
; CHECK-NEXT: addi a0, a0, 514
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vmv.v.x v11, a0
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vrgather.vv v10, v8, v11
; CHECK-NEXT: li a0, 66
; CHECK-NEXT: vmv.s.x v0, a0
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vrgather.vv v10, v8, v11
; CHECK-NEXT: vrgather.vi v10, v9, 0, v0.t
; CHECK-NEXT: vmv1r.v v8, v10
; CHECK-NEXT: ret
Expand All @@ -440,10 +440,10 @@ define <8 x i8> @splat_ve2_we0_ins_i2we4(<8 x i8> %v, <8 x i8> %w) {
; CHECK-NEXT: vmv.v.i v11, 0
; CHECK-NEXT: vsetivli zero, 3, e8, mf2, tu, ma
; CHECK-NEXT: vslideup.vi v11, v10, 2
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vrgather.vi v10, v8, 2
; CHECK-NEXT: li a0, 70
; CHECK-NEXT: vmv.s.x v0, a0
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vrgather.vi v10, v8, 2
; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t
; CHECK-NEXT: vmv1r.v v8, v10
; CHECK-NEXT: ret
Expand All @@ -463,10 +463,10 @@ define <8 x i8> @splat_ve2_we0_ins_i2ve4_i5we6(<8 x i8> %v, <8 x i8> %w) {
; CHECK-NEXT: addi a0, a0, 2
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vmv.v.x v12, a0
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vrgather.vv v10, v8, v12
; CHECK-NEXT: li a0, 98
; CHECK-NEXT: vmv.s.x v0, a0
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vrgather.vv v10, v8, v12
; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t
; CHECK-NEXT: vmv1r.v v8, v10
; CHECK-NEXT: ret
Expand Down Expand Up @@ -666,8 +666,8 @@ define <8 x i8> @merge_start_into_start(<8 x i8> %v, <8 x i8> %w) {
define <8 x i8> @merge_slidedown(<8 x i8> %v, <8 x i8> %w) {
; CHECK-LABEL: merge_slidedown:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: li a0, 195
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vmv.s.x v0, a0
; CHECK-NEXT: vslidedown.vi v8, v8, 1
; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0
Expand All @@ -680,10 +680,10 @@ define <8 x i8> @merge_slidedown(<8 x i8> %v, <8 x i8> %w) {
define <8 x i8> @merge_non_contiguous_slideup_slidedown(<8 x i8> %v, <8 x i8> %w) {
; CHECK-LABEL: merge_non_contiguous_slideup_slidedown:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vslidedown.vi v8, v8, 2
; CHECK-NEXT: li a0, 234
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vmv.s.x v0, a0
; CHECK-NEXT: vslidedown.vi v8, v8, 2
; CHECK-NEXT: vslideup.vi v8, v9, 1, v0.t
; CHECK-NEXT: ret
%res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 2, i32 8, i32 4, i32 10, i32 6, i32 12, i32 13, i32 14>
Expand All @@ -694,13 +694,13 @@ define <8 x i8> @merge_non_contiguous_slideup_slidedown(<8 x i8> %v, <8 x i8> %w
define <8 x i8> @unmergable(<8 x i8> %v, <8 x i8> %w) {
; CHECK-LABEL: unmergable:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vslidedown.vi v8, v8, 2
; CHECK-NEXT: lui a0, %hi(.LCPI46_0)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI46_0)
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vle8.v v10, (a0)
; CHECK-NEXT: li a0, 234
; CHECK-NEXT: vmv.s.x v0, a0
; CHECK-NEXT: vslidedown.vi v8, v8, 2
; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t
; CHECK-NEXT: ret
%res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 2, i32 9, i32 4, i32 11, i32 6, i32 13, i32 8, i32 15>
Expand Down
Loading