-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[ISel/RISCV] Fix fixed-vector [l]lrint lowering #145898
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Make the fixed-vector lowering of ISD::[L]LRINT use the custom-lowering routine, lowerVectorXRINT, and fix issues in lowerVectorXRINT related to this new functionality.
@llvm/pr-subscribers-backend-risc-v Author: Ramkumar Ramachandra (artagnon) ChangesMake the fixed-vector lowering of ISD::[L]LRINT use the custom-lowering routine, lowerVectorXRINT, and fix issues in lowerVectorXRINT related to this new functionality. Patch is 79.98 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/145898.diff 3 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 13ee3ee63d1a6..71d62c7342328 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1499,7 +1499,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
VT, Custom);
setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND,
- ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT},
+ ISD::FROUNDEVEN, ISD::FRINT, ISD::LRINT,
+ ISD::LLRINT, ISD::FNEARBYINT},
VT, Custom);
setCondCodeAction(VFPCCToExpand, VT, Expand);
@@ -3202,7 +3203,14 @@ static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc) {
case ISD::VP_FROUND:
return RISCVFPRndMode::RMM;
case ISD::FRINT:
+ case ISD::LRINT:
+ case ISD::LLRINT:
+ case ISD::STRICT_FRINT:
+ case ISD::STRICT_LRINT:
+ case ISD::STRICT_LLRINT:
case ISD::VP_FRINT:
+ case ISD::VP_LRINT:
+ case ISD::VP_LLRINT:
return RISCVFPRndMode::DYN;
}
@@ -3452,28 +3460,34 @@ lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,
// Expand vector LRINT and LLRINT by converting to the integer domain.
static SDValue lowerVectorXRINT(SDValue Op, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
- MVT VT = Op.getSimpleValueType();
- assert(VT.isVector() && "Unexpected type");
-
SDLoc DL(Op);
+ MVT DstVT = Op.getSimpleValueType();
SDValue Src = Op.getOperand(0);
- MVT ContainerVT = VT;
+ MVT SrcVT = Src.getSimpleValueType();
+ assert(SrcVT.isVector() && DstVT.isVector() &&
+ !(SrcVT.isFixedLengthVector() ^ DstVT.isFixedLengthVector()) &&
+ "Unexpected type");
- if (VT.isFixedLengthVector()) {
- ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
- Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
+ MVT DstContainerVT = DstVT;
+ MVT SrcContainerVT = SrcVT;
+
+ if (DstVT.isFixedLengthVector()) {
+ DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
+ SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
+ Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
}
- auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
- SDValue Truncated = DAG.getNode(
- RISCVISD::VFCVT_RM_X_F_VL, DL, ContainerVT, Src, Mask,
- DAG.getTargetConstant(RISCVFPRndMode::DYN, DL, Subtarget.getXLenVT()),
- VL);
+ auto [Mask, VL] = getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
+ SDValue Res =
+ DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, DstContainerVT, Src, Mask,
+ DAG.getTargetConstant(matchRoundingOp(Op.getOpcode()), DL,
+ Subtarget.getXLenVT()),
+ VL);
- if (!VT.isFixedLengthVector())
- return Truncated;
+ if (!DstVT.isFixedLengthVector())
+ return Res;
- return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
+ return convertFromScalableVector(DstVT, Res, DAG, Subtarget);
}
static SDValue
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll
index c1252902ede69..0a6f9f5ba0928 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll
@@ -7,31 +7,16 @@
define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) {
; RV32-LABEL: llrint_v1i64_v1f32:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT: .cfi_offset ra, -4
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vfmv.f.s fa0, v8
-; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a0, 0(sp)
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: mv a0, sp
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vlse64.v v8, (a0), zero
-; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT: .cfi_restore ra
-; RV32-NEXT: addi sp, sp, 16
-; RV32-NEXT: .cfi_def_cfa_offset 0
+; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; RV32-NEXT: vfwcvt.x.f.v v9, v8
+; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64-LABEL: llrint_v1i64_v1f32:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vfmv.f.s fa5, v8
-; RV64-NEXT: fcvt.l.s a0, fa5
-; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV64-NEXT: vmv.s.x v8, a0
+; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; RV64-NEXT: vfwcvt.x.f.v v9, v8
+; RV64-NEXT: vmv1r.v v8, v9
; RV64-NEXT: ret
%a = call <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float> %x)
ret <1 x i64> %a
@@ -41,60 +26,16 @@ declare <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float>)
define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) {
; RV32-LABEL: llrint_v2i64_v2f32:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32-NEXT: .cfi_offset ra, -4
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 1
-; RV32-NEXT: sub sp, sp, a0
-; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 2 * vlenb
-; RV32-NEXT: addi a0, sp, 16
-; RV32-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vfmv.f.s fa0, v8
-; RV32-NEXT: call llrintf
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vmv.v.x v8, a0
-; RV32-NEXT: vslide1down.vx v8, v8, a1
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 16
-; RV32-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT: addi a0, sp, 16
-; RV32-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 1
-; RV32-NEXT: vfmv.f.s fa0, v8
-; RV32-NEXT: call llrintf
-; RV32-NEXT: csrr a2, vlenb
-; RV32-NEXT: add a2, sp, a2
-; RV32-NEXT: addi a2, a2, 16
-; RV32-NEXT: vl1r.v v8, (a2) # vscale x 8-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v8, a0
-; RV32-NEXT: vslide1down.vx v8, v8, a1
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 1
-; RV32-NEXT: add sp, sp, a0
-; RV32-NEXT: .cfi_def_cfa sp, 32
-; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32-NEXT: .cfi_restore ra
-; RV32-NEXT: addi sp, sp, 32
-; RV32-NEXT: .cfi_def_cfa_offset 0
+; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; RV32-NEXT: vfwcvt.x.f.v v9, v8
+; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64-LABEL: llrint_v2i64_v2f32:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; RV64-NEXT: vslidedown.vi v9, v8, 1
-; RV64-NEXT: vfmv.f.s fa5, v8
-; RV64-NEXT: fcvt.l.s a0, fa5
-; RV64-NEXT: vfmv.f.s fa5, v9
-; RV64-NEXT: fcvt.l.s a1, fa5
-; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV64-NEXT: vmv.v.x v8, a0
-; RV64-NEXT: vslide1down.vx v8, v8, a1
+; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; RV64-NEXT: vfwcvt.x.f.v v9, v8
+; RV64-NEXT: vmv1r.v v8, v9
; RV64-NEXT: ret
%a = call <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float> %x)
ret <2 x i64> %a
@@ -104,106 +45,16 @@ declare <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float>)
define <3 x i64> @llrint_v3i64_v3f32(<3 x float> %x) {
; RV32-LABEL: llrint_v3i64_v3f32:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32-NEXT: .cfi_offset ra, -4
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a1, a0, 1
-; RV32-NEXT: add a0, a1, a0
-; RV32-NEXT: sub sp, sp, a0
-; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x03, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 3 * vlenb
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 1
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 16
-; RV32-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vfmv.f.s fa0, v8
-; RV32-NEXT: call llrintf
-; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT: vmv.v.x v8, a0
-; RV32-NEXT: vslide1down.vx v8, v8, a1
-; RV32-NEXT: addi a0, sp, 16
-; RV32-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 1
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 16
-; RV32-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 1
-; RV32-NEXT: vfmv.f.s fa0, v8
-; RV32-NEXT: call llrintf
-; RV32-NEXT: addi a2, sp, 16
-; RV32-NEXT: vl2r.v v8, (a2) # vscale x 16-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v8, a0
-; RV32-NEXT: vslide1down.vx v8, v8, a1
-; RV32-NEXT: addi a0, sp, 16
-; RV32-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 1
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 16
-; RV32-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 2
-; RV32-NEXT: vfmv.f.s fa0, v8
-; RV32-NEXT: call llrintf
-; RV32-NEXT: addi a2, sp, 16
-; RV32-NEXT: vl2r.v v8, (a2) # vscale x 16-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v8, a0
-; RV32-NEXT: vslide1down.vx v8, v8, a1
-; RV32-NEXT: addi a0, sp, 16
-; RV32-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 1
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 16
-; RV32-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 3
-; RV32-NEXT: vfmv.f.s fa0, v8
-; RV32-NEXT: call llrintf
-; RV32-NEXT: addi a2, sp, 16
-; RV32-NEXT: vl2r.v v8, (a2) # vscale x 16-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v8, a0
-; RV32-NEXT: vslide1down.vx v8, v8, a1
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a1, a0, 1
-; RV32-NEXT: add a0, a1, a0
-; RV32-NEXT: add sp, sp, a0
-; RV32-NEXT: .cfi_def_cfa sp, 32
-; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32-NEXT: .cfi_restore ra
-; RV32-NEXT: addi sp, sp, 32
-; RV32-NEXT: .cfi_def_cfa_offset 0
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: vmv1r.v v10, v8
+; RV32-NEXT: vfwcvt.x.f.v v8, v10
; RV32-NEXT: ret
;
; RV64-LABEL: llrint_v3i64_v3f32:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v9, v8, 1
-; RV64-NEXT: vfmv.f.s fa5, v8
-; RV64-NEXT: vslidedown.vi v10, v8, 2
-; RV64-NEXT: vslidedown.vi v11, v8, 3
-; RV64-NEXT: fcvt.l.s a0, fa5
-; RV64-NEXT: vfmv.f.s fa5, v9
-; RV64-NEXT: fcvt.l.s a1, fa5
-; RV64-NEXT: vfmv.f.s fa5, v10
-; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV64-NEXT: vmv.v.x v8, a0
-; RV64-NEXT: fcvt.l.s a0, fa5
-; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64-NEXT: vfmv.f.s fa5, v11
-; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV64-NEXT: vslide1down.vx v8, v8, a1
-; RV64-NEXT: vslide1down.vx v8, v8, a0
-; RV64-NEXT: fcvt.l.s a0, fa5
-; RV64-NEXT: vslide1down.vx v8, v8, a0
+; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64-NEXT: vmv1r.v v10, v8
+; RV64-NEXT: vfwcvt.x.f.v v8, v10
; RV64-NEXT: ret
%a = call <3 x i64> @llvm.llrint.v3i64.v3f32(<3 x float> %x)
ret <3 x i64> %a
@@ -213,106 +64,16 @@ declare <3 x i64> @llvm.llrint.v3i64.v3f32(<3 x float>)
define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) {
; RV32-LABEL: llrint_v4i64_v4f32:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32-NEXT: .cfi_offset ra, -4
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a1, a0, 1
-; RV32-NEXT: add a0, a1, a0
-; RV32-NEXT: sub sp, sp, a0
-; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x03, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 3 * vlenb
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 1
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 16
-; RV32-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vfmv.f.s fa0, v8
-; RV32-NEXT: call llrintf
-; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT: vmv.v.x v8, a0
-; RV32-NEXT: vslide1down.vx v8, v8, a1
-; RV32-NEXT: addi a0, sp, 16
-; RV32-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 1
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 16
-; RV32-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 1
-; RV32-NEXT: vfmv.f.s fa0, v8
-; RV32-NEXT: call llrintf
-; RV32-NEXT: addi a2, sp, 16
-; RV32-NEXT: vl2r.v v8, (a2) # vscale x 16-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v8, a0
-; RV32-NEXT: vslide1down.vx v8, v8, a1
-; RV32-NEXT: addi a0, sp, 16
-; RV32-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 1
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 16
-; RV32-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 2
-; RV32-NEXT: vfmv.f.s fa0, v8
-; RV32-NEXT: call llrintf
-; RV32-NEXT: addi a2, sp, 16
-; RV32-NEXT: vl2r.v v8, (a2) # vscale x 16-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v8, a0
-; RV32-NEXT: vslide1down.vx v8, v8, a1
-; RV32-NEXT: addi a0, sp, 16
-; RV32-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 1
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 16
-; RV32-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 3
-; RV32-NEXT: vfmv.f.s fa0, v8
-; RV32-NEXT: call llrintf
-; RV32-NEXT: addi a2, sp, 16
-; RV32-NEXT: vl2r.v v8, (a2) # vscale x 16-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v8, a0
-; RV32-NEXT: vslide1down.vx v8, v8, a1
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a1, a0, 1
-; RV32-NEXT: add a0, a1, a0
-; RV32-NEXT: add sp, sp, a0
-; RV32-NEXT: .cfi_def_cfa sp, 32
-; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32-NEXT: .cfi_restore ra
-; RV32-NEXT: addi sp, sp, 32
-; RV32-NEXT: .cfi_def_cfa_offset 0
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: vmv1r.v v10, v8
+; RV32-NEXT: vfwcvt.x.f.v v8, v10
; RV32-NEXT: ret
;
; RV64-LABEL: llrint_v4i64_v4f32:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v9, v8, 1
-; RV64-NEXT: vfmv.f.s fa5, v8
-; RV64-NEXT: vslidedown.vi v10, v8, 2
-; RV64-NEXT: vslidedown.vi v11, v8, 3
-; RV64-NEXT: fcvt.l.s a0, fa5
-; RV64-NEXT: vfmv.f.s fa5, v9
-; RV64-NEXT: fcvt.l.s a1, fa5
-; RV64-NEXT: vfmv.f.s fa5, v10
-; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV64-NEXT: vmv.v.x v8, a0
-; RV64-NEXT: fcvt.l.s a0, fa5
-; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64-NEXT: vfmv.f.s fa5, v11
-; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV64-NEXT: vslide1down.vx v8, v8, a1
-; RV64-NEXT: vslide1down.vx v8, v8, a0
-; RV64-NEXT: fcvt.l.s a0, fa5
-; RV64-NEXT: vslide1down.vx v8, v8, a0
+; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64-NEXT: vmv1r.v v10, v8
+; RV64-NEXT: vfwcvt.x.f.v v8, v10
; RV64-NEXT: ret
%a = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> %x)
ret <4 x i64> %a
@@ -322,149 +83,16 @@ declare <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float>)
define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) {
; RV32-LABEL: llrint_v8i64_v8f32:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -208
-; RV32-NEXT: .cfi_def_cfa_offset 208
-; RV32-NEXT: sw ra, 204(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s0, 200(sp) # 4-byte Folded Spill
-; RV32-NEXT: .cfi_offset ra, -4
-; RV32-NEXT: .cfi_offset s0, -8
-; RV32-NEXT: addi s0, sp, 208
-; RV32-NEXT: .cfi_def_cfa s0, 0
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 1
-; RV32-NEXT: sub sp, sp, a0
-; RV32-NEXT: andi sp, sp, -64
-; RV32-NEXT: addi a0, sp, 192
-; RV32-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vfmv.f.s fa0, v8
-; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a0, 64(sp)
-; RV32-NEXT: sw a1, 68(sp)
-; RV32-NEXT: addi a0, sp, 192
-; RV32-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 7
-; RV32-NEXT: vfmv.f.s fa0, v8
-; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a0, 120(sp)
-; RV32-NEXT: sw a1, 124(sp)
-; RV32-NEXT: addi a0, sp, 192
-; RV32-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 6
-; RV32-NEXT: vfmv.f.s fa0, v8
-; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a0, 112(sp)
-; RV32-NEXT: sw a1, 116(sp)
-; RV32-NEXT: addi a0, sp, 192
-; RV32-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 5
-; RV32-NEXT: vfmv.f.s fa0, v8
-; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a0, 104(sp)
-; RV32-NEXT: sw a1, 108(sp)
-; RV32-NEXT: addi a0, sp, 192
-; RV32-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 4
-; RV32-NEXT: vfmv.f.s fa0, v8
-; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a0, 96(sp)
-; RV32-NEXT: sw a1, 100(sp)
-; RV32-NEXT: addi a0, sp, 192
-; RV32-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 3
-; RV32-NEXT: vfmv.f.s fa0, v8
-; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a0, 88(sp)
-; RV32-NEXT: sw a1, 92(sp)
-; RV32-NEXT: addi a0, sp, 192
-; RV32-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 2
-; RV32-NEXT: vfmv.f.s fa0, v8
-; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a0, 80(sp)
-; RV32-NEXT: sw a1, 84(sp)
-; RV32-NEXT: addi a0, sp, 192
-; RV32-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 1
-; RV32-NEXT: vfmv.f.s fa0, v8
-; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a0, 72(sp)
-; RV32-NEXT: sw a1, 76(sp)
-; RV32-NEXT: addi a0, sp, 64
-; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT: vle32.v v8, (a0)
-; RV32-NEXT: addi sp, s0, -208
-; RV32-NEXT: .cfi_def_cfa sp, 208
-; RV32-NEXT: lw ra, 204(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s0, 200(sp) # 4-byte Folded Reload
-; RV32-NEXT: .cfi_restore ra
-; RV32-NEXT: .cfi_restore s0
-; RV32-NEXT: addi sp, sp, 208
-; RV32-NEXT: .cfi_def_cf...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
case ISD::STRICT_FRINT: | ||
case ISD::STRICT_LRINT: | ||
case ISD::STRICT_LLRINT: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just checking, we don't have tests for the constrained intrinsics? Not that we need to handle them in this pr, just making a note
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We don't have custom-lowering or tests for strict/vp versions afaik.
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/190/builds/22476 Here is the relevant piece of the build log for the reference
|
Make the fixed-vector lowering of ISD::[L]LRINT use the custom-lowering routine, lowerVectorXRINT, and fix issues in lowerVectorXRINT related to this new functionality.
Make the fixed-vector lowering of ISD::[L]LRINT use the custom-lowering routine, lowerVectorXRINT, and fix issues in lowerVectorXRINT related to this new functionality.
Make the fixed-vector lowering of ISD::[L]LRINT use the custom-lowering routine, lowerVectorXRINT, and fix issues in lowerVectorXRINT related to this new functionality.