-
Notifications
You must be signed in to change notification settings - Fork 14k
[RISCV] Set the exact flag on the SRL created for converting vscale to a read of vlenb. #144571
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
…o a read of vlenb. We know that vlenb is a multiple of RVVBytesPerBlock so we aren't shifting out any non-zero bits.
@llvm/pr-subscribers-backend-risc-v Author: Craig Topper (topperc) ChangesWe know that vlenb is a multiple of RVVBytesPerBlock so we aren't shifting out any non-zero bits. Patch is 22.98 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/144571.diff 8 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 779786fa400fc..a6679d726622c 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -7372,8 +7372,11 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
Res = DAG.getNode(ISD::MUL, DL, XLenVT, Res,
DAG.getConstant(Val / 8, DL, XLenVT));
} else {
+ SDNodeFlags Flags;
+ Flags.setExact(true);
SDValue VScale = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
- DAG.getConstant(3, DL, XLenVT));
+ DAG.getConstant(3, DL, XLenVT),
+ Flags);
Res = DAG.getNode(ISD::MUL, DL, XLenVT, VScale,
DAG.getConstant(Val, DL, XLenVT));
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll
index 83637e4a71d45..d42c42c7ce036 100644
--- a/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll
@@ -290,8 +290,7 @@ define <vscale x 2 x i8> @extract_nxv32i8_nxv2i8_6(<vscale x 32 x i8> %vec) {
; CHECK-LABEL: extract_nxv32i8_nxv2i8_6:
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: srli a1, a0, 3
-; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: srli a1, a0, 2
; CHECK-NEXT: sub a0, a0, a1
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v8, v8, a0
@@ -314,8 +313,7 @@ define <vscale x 2 x i8> @extract_nxv32i8_nxv2i8_22(<vscale x 32 x i8> %vec) {
; CHECK-LABEL: extract_nxv32i8_nxv2i8_22:
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: srli a1, a0, 3
-; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: srli a1, a0, 2
; CHECK-NEXT: sub a0, a0, a1
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v8, v10, a0
@@ -341,9 +339,9 @@ define <vscale x 1 x i8> @extract_nxv4i8_nxv1i8_3(<vscale x 4 x i8> %vec) {
; CHECK-LABEL: extract_nxv4i8_nxv1i8_3:
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: srli a0, a0, 3
-; CHECK-NEXT: slli a1, a0, 1
-; CHECK-NEXT: add a0, a1, a0
+; CHECK-NEXT: srli a1, a0, 3
+; CHECK-NEXT: srli a0, a0, 2
+; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
; CHECK-NEXT: vslidedown.vx v8, v8, a0
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/get_vector_length.ll b/llvm/test/CodeGen/RISCV/rvv/get_vector_length.ll
index bd0fecd285515..aea688f03cf72 100644
--- a/llvm/test/CodeGen/RISCV/rvv/get_vector_length.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/get_vector_length.ll
@@ -257,9 +257,9 @@ define i32 @vector_length_vf3_i32(i32 zeroext %tc) {
; RV32-LABEL: vector_length_vf3_i32:
; RV32: # %bb.0:
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: srli a1, a1, 3
-; RV32-NEXT: slli a2, a1, 1
-; RV32-NEXT: add a1, a2, a1
+; RV32-NEXT: srli a2, a1, 3
+; RV32-NEXT: srli a1, a1, 2
+; RV32-NEXT: add a1, a1, a2
; RV32-NEXT: bltu a0, a1, .LBB22_2
; RV32-NEXT: # %bb.1:
; RV32-NEXT: mv a0, a1
@@ -270,9 +270,9 @@ define i32 @vector_length_vf3_i32(i32 zeroext %tc) {
; RV64: # %bb.0:
; RV64-NEXT: sext.w a0, a0
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: srli a1, a1, 3
-; RV64-NEXT: slli a2, a1, 1
-; RV64-NEXT: add a1, a2, a1
+; RV64-NEXT: srli a2, a1, 3
+; RV64-NEXT: srli a1, a1, 2
+; RV64-NEXT: add a1, a1, a2
; RV64-NEXT: bltu a0, a1, .LBB22_2
; RV64-NEXT: # %bb.1:
; RV64-NEXT: mv a0, a1
@@ -286,9 +286,9 @@ define i32 @vector_length_vf3_XLen(iXLen zeroext %tc) {
; RV32-LABEL: vector_length_vf3_XLen:
; RV32: # %bb.0:
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: srli a1, a1, 3
-; RV32-NEXT: slli a2, a1, 1
-; RV32-NEXT: add a1, a2, a1
+; RV32-NEXT: srli a2, a1, 3
+; RV32-NEXT: srli a1, a1, 2
+; RV32-NEXT: add a1, a1, a2
; RV32-NEXT: bltu a0, a1, .LBB23_2
; RV32-NEXT: # %bb.1:
; RV32-NEXT: mv a0, a1
@@ -299,9 +299,9 @@ define i32 @vector_length_vf3_XLen(iXLen zeroext %tc) {
; RV64: # %bb.0:
; RV64-NEXT: sext.w a0, a0
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: srli a1, a1, 3
-; RV64-NEXT: slli a2, a1, 1
-; RV64-NEXT: add a1, a2, a1
+; RV64-NEXT: srli a2, a1, 3
+; RV64-NEXT: srli a1, a1, 2
+; RV64-NEXT: add a1, a1, a2
; RV64-NEXT: bltu a0, a1, .LBB23_2
; RV64-NEXT: # %bb.1:
; RV64-NEXT: mv a0, a1
diff --git a/llvm/test/CodeGen/RISCV/rvv/legalize-load-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/legalize-load-sdnode.ll
index e9e1303d10768..f847ccafefdaf 100644
--- a/llvm/test/CodeGen/RISCV/rvv/legalize-load-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/legalize-load-sdnode.ll
@@ -8,9 +8,9 @@ define <vscale x 3 x i8> @load_nxv3i8(ptr %ptr) {
; CHECK-LABEL: load_nxv3i8:
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a1, a1, 3
-; CHECK-NEXT: slli a2, a1, 1
-; CHECK-NEXT: add a1, a2, a1
+; CHECK-NEXT: srli a2, a1, 3
+; CHECK-NEXT: srli a1, a1, 2
+; CHECK-NEXT: add a1, a1, a2
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: ret
@@ -22,9 +22,9 @@ define <vscale x 5 x half> @load_nxv5f16(ptr %ptr) {
; CHECK-LABEL: load_nxv5f16:
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a1, a1, 3
-; CHECK-NEXT: slli a2, a1, 2
-; CHECK-NEXT: add a1, a2, a1
+; CHECK-NEXT: srli a2, a1, 3
+; CHECK-NEXT: srli a1, a1, 1
+; CHECK-NEXT: add a1, a1, a2
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/legalize-store-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/legalize-store-sdnode.ll
index 77438ee53b634..03b84ec177ee9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/legalize-store-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/legalize-store-sdnode.ll
@@ -8,9 +8,9 @@ define void @store_nxv3i8(<vscale x 3 x i8> %val, ptr %ptr) {
; CHECK-LABEL: store_nxv3i8:
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a1, a1, 3
-; CHECK-NEXT: slli a2, a1, 1
-; CHECK-NEXT: add a1, a2, a1
+; CHECK-NEXT: srli a2, a1, 3
+; CHECK-NEXT: srli a1, a1, 2
+; CHECK-NEXT: add a1, a1, a2
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vandn-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vandn-sdnode.ll
index aef46e1f5cf1b..bfd7fc5d04cd6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vandn-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vandn-sdnode.ll
@@ -2300,10 +2300,9 @@ define void @vand_vx_loop_hoisted_not(ptr %a, i32 noundef signext %mask) {
; CHECK-RV64-NEXT: li a2, 0
; CHECK-RV64-NEXT: j .LBB98_5
; CHECK-RV64-NEXT: .LBB98_2: # %vector.ph
-; CHECK-RV64-NEXT: slli a2, a2, 2
-; CHECK-RV64-NEXT: negw a2, a2
-; CHECK-RV64-NEXT: andi a2, a2, 256
; CHECK-RV64-NEXT: srli a3, a4, 1
+; CHECK-RV64-NEXT: negw a2, a3
+; CHECK-RV64-NEXT: andi a2, a2, 256
; CHECK-RV64-NEXT: slli a4, a4, 1
; CHECK-RV64-NEXT: mv a5, a0
; CHECK-RV64-NEXT: mv a6, a2
@@ -2395,10 +2394,9 @@ define void @vand_vx_loop_hoisted_not(ptr %a, i32 noundef signext %mask) {
; CHECK-ZVKB-NOZBB64-NEXT: li a2, 0
; CHECK-ZVKB-NOZBB64-NEXT: j .LBB98_5
; CHECK-ZVKB-NOZBB64-NEXT: .LBB98_2: # %vector.ph
-; CHECK-ZVKB-NOZBB64-NEXT: slli a2, a2, 2
-; CHECK-ZVKB-NOZBB64-NEXT: negw a2, a2
-; CHECK-ZVKB-NOZBB64-NEXT: andi a2, a2, 256
; CHECK-ZVKB-NOZBB64-NEXT: srli a3, a4, 1
+; CHECK-ZVKB-NOZBB64-NEXT: negw a2, a3
+; CHECK-ZVKB-NOZBB64-NEXT: andi a2, a2, 256
; CHECK-ZVKB-NOZBB64-NEXT: slli a4, a4, 1
; CHECK-ZVKB-NOZBB64-NEXT: mv a5, a0
; CHECK-ZVKB-NOZBB64-NEXT: mv a6, a2
@@ -2489,10 +2487,9 @@ define void @vand_vx_loop_hoisted_not(ptr %a, i32 noundef signext %mask) {
; CHECK-ZVKB-ZBB64-NEXT: li a2, 0
; CHECK-ZVKB-ZBB64-NEXT: j .LBB98_5
; CHECK-ZVKB-ZBB64-NEXT: .LBB98_2: # %vector.ph
-; CHECK-ZVKB-ZBB64-NEXT: slli a2, a2, 2
-; CHECK-ZVKB-ZBB64-NEXT: negw a2, a2
-; CHECK-ZVKB-ZBB64-NEXT: andi a2, a2, 256
; CHECK-ZVKB-ZBB64-NEXT: srli a3, a4, 1
+; CHECK-ZVKB-ZBB64-NEXT: negw a2, a3
+; CHECK-ZVKB-ZBB64-NEXT: andi a2, a2, 256
; CHECK-ZVKB-ZBB64-NEXT: slli a4, a4, 1
; CHECK-ZVKB-ZBB64-NEXT: mv a5, a0
; CHECK-ZVKB-ZBB64-NEXT: mv a6, a2
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll
index 6a08f5a28a295..75f92c86ff09f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll
@@ -477,27 +477,26 @@ define {<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16
; CHECK-NEXT: sub sp, sp, a0
; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
; CHECK-NEXT: vmv1r.v v8, v0
-; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: vmerge.vim v16, v10, 1, v0
+; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: srli a1, a0, 2
-; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
-; CHECK-NEXT: vslidedown.vx v0, v0, a1
-; CHECK-NEXT: srli a1, a0, 1
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a2
+; CHECK-NEXT: srli a0, a0, 1
; CHECK-NEXT: vsetvli a2, zero, e8, m2, ta, ma
-; CHECK-NEXT: vmerge.vim v18, v10, 1, v0
+; CHECK-NEXT: vmerge.vim v22, v10, 1, v0
; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
-; CHECK-NEXT: vslidedown.vx v0, v8, a1
-; CHECK-NEXT: srli a1, a0, 3
-; CHECK-NEXT: slli a1, a1, 1
-; CHECK-NEXT: sub a0, a0, a1
-; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma
-; CHECK-NEXT: vmerge.vim v20, v10, 1, v0
-; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v0, v8, a0
+; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
+; CHECK-NEXT: vmerge.vim v20, v10, 1, v0
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vmerge.vim v16, v10, 1, v0
+; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v8, a1
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma
-; CHECK-NEXT: vmerge.vim v22, v10, 1, v0
+; CHECK-NEXT: vmerge.vim v18, v10, 1, v0
; CHECK-NEXT: vs8r.v v16, (a0)
; CHECK-NEXT: vlseg4e8.v v8, (a0)
; CHECK-NEXT: vmsne.vi v0, v8, 0
@@ -606,11 +605,9 @@ define {<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: vmerge.vim v16, v12, 1, v0
; CHECK-NEXT: srli a1, a0, 2
-; CHECK-NEXT: srli a2, a0, 1
-; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v0, v0, a1
-; CHECK-NEXT: srli a1, a0, 3
-; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: srli a2, a0, 1
; CHECK-NEXT: vsetvli a3, zero, e8, m2, ta, ma
; CHECK-NEXT: vmerge.vim v18, v12, 1, v0
; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma
@@ -836,39 +833,37 @@ define {<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16
; CHECK-NEXT: vmv1r.v v9, v0
; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: vmerge.vim v16, v10, 1, v0
-; CHECK-NEXT: srli a2, a0, 2
-; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma
-; CHECK-NEXT: vslidedown.vx v0, v0, a2
-; CHECK-NEXT: srli a3, a0, 1
-; CHECK-NEXT: vsetvli a4, zero, e8, m2, ta, ma
+; CHECK-NEXT: srli a1, a0, 2
+; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a1
+; CHECK-NEXT: srli a2, a0, 1
+; CHECK-NEXT: vsetvli a3, zero, e8, m2, ta, ma
; CHECK-NEXT: vmerge.vim v18, v10, 1, v0
-; CHECK-NEXT: vsetvli a4, zero, e8, m1, ta, ma
-; CHECK-NEXT: vslidedown.vx v0, v9, a3
-; CHECK-NEXT: srli a3, a0, 3
-; CHECK-NEXT: slli a3, a3, 1
-; CHECK-NEXT: sub a0, a0, a3
+; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v9, a2
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: sub a0, a0, a1
; CHECK-NEXT: vsetvli a3, zero, e8, m2, ta, ma
; CHECK-NEXT: vmerge.vim v20, v10, 1, v0
; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmerge.vim v26, v10, 1, v0
; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v0, v9, a0
-; CHECK-NEXT: vs8r.v v16, (a1)
+; CHECK-NEXT: vs8r.v v16, (a2)
; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
; CHECK-NEXT: vmerge.vim v24, v10, 1, v0
; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
-; CHECK-NEXT: vslidedown.vx v0, v8, a2
+; CHECK-NEXT: vslidedown.vx v0, v8, a1
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vsetvli a2, zero, e8, m2, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma
; CHECK-NEXT: vmerge.vim v28, v10, 1, v0
; CHECK-NEXT: vs8r.v v24, (a0)
-; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
-; CHECK-NEXT: vlseg6e8.v v16, (a1)
+; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
+; CHECK-NEXT: vlseg6e8.v v16, (a2)
; CHECK-NEXT: vlseg6e8.v v10, (a0)
; CHECK-NEXT: vmv2r.v v8, v16
; CHECK-NEXT: vmv2r.v v22, v18
@@ -1068,36 +1063,35 @@ define {<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16
; CHECK-NEXT: sub sp, sp, a0
; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
; CHECK-NEXT: vmv1r.v v9, v0
-; CHECK-NEXT: vmv.v.i v12, 0
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: vmerge.vim v16, v12, 1, v0
+; CHECK-NEXT: vmv.v.i v12, 0
; CHECK-NEXT: srli a1, a0, 2
-; CHECK-NEXT: srli a2, a0, 1
-; CHECK-NEXT: srli a3, a0, 3
-; CHECK-NEXT: vsetvli a4, zero, e8, m1, ta, ma
-; CHECK-NEXT: vslidedown.vx v0, v0, a1
-; CHECK-NEXT: slli a3, a3, 1
-; CHECK-NEXT: vsetvli a4, zero, e8, m2, ta, ma
-; CHECK-NEXT: vmerge.vim v18, v12, 1, v0
-; CHECK-NEXT: vsetvli a4, zero, e8, m1, ta, ma
-; CHECK-NEXT: vslidedown.vx v0, v9, a2
-; CHECK-NEXT: sub a0, a0, a3
-; CHECK-NEXT: vsetvli a3, zero, e8, m2, ta, ma
-; CHECK-NEXT: vmerge.vim v20, v12, 1, v0
+; CHECK-NEXT: sub a2, a0, a1
; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma
-; CHECK-NEXT: vslidedown.vx v0, v9, a0
-; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a2
+; CHECK-NEXT: srli a0, a0, 1
+; CHECK-NEXT: vsetvli a2, zero, e8, m2, ta, ma
; CHECK-NEXT: vmerge.vim v22, v12, 1, v0
+; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v9, a0
+; CHECK-NEXT: vsetvli a2, zero, e8, m2, ta, ma
+; CHECK-NEXT: vmerge.vim v20, v12, 1, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vmerge.vim v16, v12, 1, v0
+; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v9, a1
+; CHECK-NEXT: vsetvli a2, zero, e8, m2, ta, ma
+; CHECK-NEXT: vmerge.vim v18, v12, 1, v0
; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmerge.vim v14, v12, 1, v0
-; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v0, v8, a1
; CHECK-NEXT: vmv1r.v v10, v15
-; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma
; CHECK-NEXT: vmerge.vim v24, v12, 1, v0
; CHECK-NEXT: vmv1r.v v11, v24
-; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
-; CHECK-NEXT: vslidedown.vx v0, v8, a2
+; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v8, a0
; CHECK-NEXT: vmv1r.v v8, v23
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vmv1r.v v9, v14
@@ -1339,49 +1333,48 @@ define {<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16
; CHECK-NEXT: sub sp, sp, a0
; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
; CHECK-NEXT: vmv1r.v v9, v0
-; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: vmerge.vim v16, v10, 1, v0
+; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: srli a1, a0, 2
-; CHECK-NEXT: srli a2, a0, 1
-; CHECK-NEXT: srli a3, a0, 3
-; CHECK-NEXT: vsetvli a4, zero, e8, m1, ta, ma
-; CHECK-NEXT: vslidedown.vx v0, v0, a1
-; CHECK-NEXT: slli a3, a3, 1
-; CHECK-NEXT: vsetvli a4, zero, e8, m2, ta, ma
-; CHECK-NEXT: vmerge.vim v18, v10, 1, v0
-; CHECK-NEXT: vsetvli a4, zero, e8, m1, ta, ma
-; CHECK-NEXT: vslidedown.vx v0, v9, a2
-; CHECK-NEXT: sub a0, a0, a3
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a2
+; CHECK-NEXT: srli a0, a0, 1
; CHECK-NEXT: vsetvli a3, zero, e8, m2, ta, ma
-; CHECK-NEXT: vmerge.vim v20, v10, 1, v0
+; CHECK-NEXT: vmerge.vim v22, v10, 1, v0
; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v0, v9, a0
; CHECK-NEXT: vsetvli a3, zero, e8, m2, ta, ma
-; CHECK-NEXT: vmerge.vim v22, v10, 1, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vim v24, v10, 1, v0
+; CHECK-NEXT: vmerge.vim v20, v10, 1, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vmerge.vim v16, v10, 1, v0
; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma
-; CHECK-NEXT: vslidedown.vx v0, v8, a1
-; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vslidedown.vx v0, v9, a1
; CHECK-NEXT: vsetvli a3, zero, e8, m2, ta, ma
-; CHECK-NEXT: vmerge.vim v26, v10, 1, v0
+; CHECK-NEXT: vmerge.vim v18, v10, 1, v0
; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v0, v8, a2
-; CHECK-NEXT: vs8r.v v16, (a1)
-; CHECK-NEXT: vsetvli a2, zero, e8, m2, ta, ma
-; CHECK-NEXT: vmerge.vim v28, v10, 1, v0
-; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vsetvli a3, zero, e8, m2, ta, ma
+; CHECK-NEXT: vmerge.vim v30, v10, 1, v0
+; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v0, v8, a0
+; CHECK-NEXT: vs8r.v v16, (a2)
+; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
+; CHECK-NEXT: vmerge.vim v28, v10, 1, v0
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vmerge.vim v24, v10, 1, v0
+; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v8, a1
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vsetvli a2, zero, e8, m2, ta, ma
-; CHECK-NEXT: vmerge.vim v30, v10, 1, v0
+; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma
+; CHECK-NEXT: vmerge.vim v26, v10, 1, v0
; CHECK-NEXT: vs8r.v v24, (a0)
-; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
-; CHECK-NEXT: vlseg8e8.v v18, (a1)
+; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
+; CHECK-NEXT: vlseg8e8.v v18, (a2)
; CHECK-NEXT: vlseg8e8.v v10, (a0)
; CHECK-NEXT: vmv2r.v v8, v18
; CHECK-NEXT: vmv2r.v v26, v20
diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll
index 3da04eb7e6abe..78aae96242fd3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll
@@ -887,9 +887,9 @@ define half @vreduce_ord_fadd_nxv3f16(<vscale x 3 x half> %v, half %s) {
; CHECK-LABEL: vreduce_ord_fadd_nxv3f16:
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: srli a0, a0, 3
-; CHECK-NEXT: slli a1, a0, 1
-; CHECK-NEXT: add a0, a1, a0
+; CHECK-NEXT: srli a1, a0, 3
+; CHECK-NEXT: srli a0, a0, 2
+; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
@@ -906,8 +906,7 @@ define half @vreduce_ord_fadd_nxv6f16(<vscale x 6 x half> %v, half %s) {
; CHECK-LABEL: vreduce_ord_fadd_nxv6f16:
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: srli a1, a0, 3
-; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: srli a1, a0, 2
; CHECK-NEXT: sub a0, a0, a1
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v10, fa0
@@ -925,8 +924,7 @@ define half @vreduce_ord_fadd_nxv...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We have target known bits set for READ_VLENB, why aren't we able to infer the exact flag on the select from that? Are we missing some general combine?
DAGCombine basically never infers any flags on already existing nodes. Attempts are made to preserve or add flags during combines that produce new nodes. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
We know that vlenb is a multiple of RVVBytesPerBlock so we aren't shifting out any non-zero bits.