Skip to content

[ISel/RISCV] Fix fixed-vector [l]lrint lowering #145898

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 30, 2025

Conversation

artagnon
Copy link
Contributor

Make the fixed-vector lowering of ISD::[L]LRINT use the custom-lowering routine, lowerVectorXRINT, and fix issues in lowerVectorXRINT related to this new functionality.

Make the fixed-vector lowering of ISD::[L]LRINT use the custom-lowering
routine, lowerVectorXRINT, and fix issues in lowerVectorXRINT related to
this new functionality.
@llvmbot
Copy link
Member

llvmbot commented Jun 26, 2025

@llvm/pr-subscribers-backend-risc-v

Author: Ramkumar Ramachandra (artagnon)

Changes

Make the fixed-vector lowering of ISD::[L]LRINT use the custom-lowering routine, lowerVectorXRINT, and fix issues in lowerVectorXRINT related to this new functionality.


Patch is 79.98 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/145898.diff

3 Files Affected:

  • (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+30-16)
  • (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll (+44-887)
  • (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll (+53-728)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 13ee3ee63d1a6..71d62c7342328 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1499,7 +1499,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
                            VT, Custom);
 
         setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND,
-                            ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT},
+                            ISD::FROUNDEVEN, ISD::FRINT, ISD::LRINT,
+                            ISD::LLRINT, ISD::FNEARBYINT},
                            VT, Custom);
 
         setCondCodeAction(VFPCCToExpand, VT, Expand);
@@ -3202,7 +3203,14 @@ static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc) {
   case ISD::VP_FROUND:
     return RISCVFPRndMode::RMM;
   case ISD::FRINT:
+  case ISD::LRINT:
+  case ISD::LLRINT:
+  case ISD::STRICT_FRINT:
+  case ISD::STRICT_LRINT:
+  case ISD::STRICT_LLRINT:
   case ISD::VP_FRINT:
+  case ISD::VP_LRINT:
+  case ISD::VP_LLRINT:
     return RISCVFPRndMode::DYN;
   }
 
@@ -3452,28 +3460,34 @@ lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,
 // Expand vector LRINT and LLRINT by converting to the integer domain.
 static SDValue lowerVectorXRINT(SDValue Op, SelectionDAG &DAG,
                                 const RISCVSubtarget &Subtarget) {
-  MVT VT = Op.getSimpleValueType();
-  assert(VT.isVector() && "Unexpected type");
-
   SDLoc DL(Op);
+  MVT DstVT = Op.getSimpleValueType();
   SDValue Src = Op.getOperand(0);
-  MVT ContainerVT = VT;
+  MVT SrcVT = Src.getSimpleValueType();
+  assert(SrcVT.isVector() && DstVT.isVector() &&
+         !(SrcVT.isFixedLengthVector() ^ DstVT.isFixedLengthVector()) &&
+         "Unexpected type");
 
-  if (VT.isFixedLengthVector()) {
-    ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
-    Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
+  MVT DstContainerVT = DstVT;
+  MVT SrcContainerVT = SrcVT;
+
+  if (DstVT.isFixedLengthVector()) {
+    DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
+    SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
+    Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
   }
 
-  auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
-  SDValue Truncated = DAG.getNode(
-      RISCVISD::VFCVT_RM_X_F_VL, DL, ContainerVT, Src, Mask,
-      DAG.getTargetConstant(RISCVFPRndMode::DYN, DL, Subtarget.getXLenVT()),
-      VL);
+  auto [Mask, VL] = getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
+  SDValue Res =
+      DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, DstContainerVT, Src, Mask,
+                  DAG.getTargetConstant(matchRoundingOp(Op.getOpcode()), DL,
+                                        Subtarget.getXLenVT()),
+                  VL);
 
-  if (!VT.isFixedLengthVector())
-    return Truncated;
+  if (!DstVT.isFixedLengthVector())
+    return Res;
 
-  return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
+  return convertFromScalableVector(DstVT, Res, DAG, Subtarget);
 }
 
 static SDValue
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll
index c1252902ede69..0a6f9f5ba0928 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll
@@ -7,31 +7,16 @@
 define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) {
 ; RV32-LABEL: llrint_v1i64_v1f32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
-; RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT:    .cfi_offset ra, -4
-; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT:    vfmv.f.s fa0, v8
-; RV32-NEXT:    call llrintf
-; RV32-NEXT:    sw a0, 0(sp)
-; RV32-NEXT:    sw a1, 4(sp)
-; RV32-NEXT:    mv a0, sp
-; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT:    vlse64.v v8, (a0), zero
-; RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT:    .cfi_restore ra
-; RV32-NEXT:    addi sp, sp, 16
-; RV32-NEXT:    .cfi_def_cfa_offset 0
+; RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; RV32-NEXT:    vfwcvt.x.f.v v9, v8
+; RV32-NEXT:    vmv1r.v v8, v9
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: llrint_v1i64_v1f32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT:    vfmv.f.s fa5, v8
-; RV64-NEXT:    fcvt.l.s a0, fa5
-; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV64-NEXT:    vmv.s.x v8, a0
+; RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; RV64-NEXT:    vfwcvt.x.f.v v9, v8
+; RV64-NEXT:    vmv1r.v v8, v9
 ; RV64-NEXT:    ret
   %a = call <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float> %x)
   ret <1 x i64> %a
@@ -41,60 +26,16 @@ declare <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float>)
 define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) {
 ; RV32-LABEL: llrint_v2i64_v2f32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
-; RV32-NEXT:    .cfi_offset ra, -4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    sub sp, sp, a0
-; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 2 * vlenb
-; RV32-NEXT:    addi a0, sp, 16
-; RV32-NEXT:    vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT:    vfmv.f.s fa0, v8
-; RV32-NEXT:    call llrintf
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v8, a0
-; RV32-NEXT:    vslide1down.vx v8, v8, a1
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    addi a0, sp, 16
-; RV32-NEXT:    vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; RV32-NEXT:    vslidedown.vi v8, v8, 1
-; RV32-NEXT:    vfmv.f.s fa0, v8
-; RV32-NEXT:    call llrintf
-; RV32-NEXT:    csrr a2, vlenb
-; RV32-NEXT:    add a2, sp, a2
-; RV32-NEXT:    addi a2, a2, 16
-; RV32-NEXT:    vl1r.v v8, (a2) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vslide1down.vx v8, v8, a0
-; RV32-NEXT:    vslide1down.vx v8, v8, a1
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add sp, sp, a0
-; RV32-NEXT:    .cfi_def_cfa sp, 32
-; RV32-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
-; RV32-NEXT:    .cfi_restore ra
-; RV32-NEXT:    addi sp, sp, 32
-; RV32-NEXT:    .cfi_def_cfa_offset 0
+; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
+; RV32-NEXT:    vfwcvt.x.f.v v9, v8
+; RV32-NEXT:    vmv1r.v v8, v9
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: llrint_v2i64_v2f32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; RV64-NEXT:    vslidedown.vi v9, v8, 1
-; RV64-NEXT:    vfmv.f.s fa5, v8
-; RV64-NEXT:    fcvt.l.s a0, fa5
-; RV64-NEXT:    vfmv.f.s fa5, v9
-; RV64-NEXT:    fcvt.l.s a1, fa5
-; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV64-NEXT:    vmv.v.x v8, a0
-; RV64-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
+; RV64-NEXT:    vfwcvt.x.f.v v9, v8
+; RV64-NEXT:    vmv1r.v v8, v9
 ; RV64-NEXT:    ret
   %a = call <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float> %x)
   ret <2 x i64> %a
@@ -104,106 +45,16 @@ declare <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float>)
 define <3 x i64> @llrint_v3i64_v3f32(<3 x float> %x) {
 ; RV32-LABEL: llrint_v3i64_v3f32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
-; RV32-NEXT:    .cfi_offset ra, -4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a1, a0, 1
-; RV32-NEXT:    add a0, a1, a0
-; RV32-NEXT:    sub sp, sp, a0
-; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x03, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 3 * vlenb
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT:    vfmv.f.s fa0, v8
-; RV32-NEXT:    call llrintf
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v8, a0
-; RV32-NEXT:    vslide1down.vx v8, v8, a1
-; RV32-NEXT:    addi a0, sp, 16
-; RV32-NEXT:    vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT:    vslidedown.vi v8, v8, 1
-; RV32-NEXT:    vfmv.f.s fa0, v8
-; RV32-NEXT:    call llrintf
-; RV32-NEXT:    addi a2, sp, 16
-; RV32-NEXT:    vl2r.v v8, (a2) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vslide1down.vx v8, v8, a0
-; RV32-NEXT:    vslide1down.vx v8, v8, a1
-; RV32-NEXT:    addi a0, sp, 16
-; RV32-NEXT:    vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT:    vslidedown.vi v8, v8, 2
-; RV32-NEXT:    vfmv.f.s fa0, v8
-; RV32-NEXT:    call llrintf
-; RV32-NEXT:    addi a2, sp, 16
-; RV32-NEXT:    vl2r.v v8, (a2) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vslide1down.vx v8, v8, a0
-; RV32-NEXT:    vslide1down.vx v8, v8, a1
-; RV32-NEXT:    addi a0, sp, 16
-; RV32-NEXT:    vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT:    vslidedown.vi v8, v8, 3
-; RV32-NEXT:    vfmv.f.s fa0, v8
-; RV32-NEXT:    call llrintf
-; RV32-NEXT:    addi a2, sp, 16
-; RV32-NEXT:    vl2r.v v8, (a2) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vslide1down.vx v8, v8, a0
-; RV32-NEXT:    vslide1down.vx v8, v8, a1
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a1, a0, 1
-; RV32-NEXT:    add a0, a1, a0
-; RV32-NEXT:    add sp, sp, a0
-; RV32-NEXT:    .cfi_def_cfa sp, 32
-; RV32-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
-; RV32-NEXT:    .cfi_restore ra
-; RV32-NEXT:    addi sp, sp, 32
-; RV32-NEXT:    .cfi_def_cfa_offset 0
+; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT:    vmv1r.v v10, v8
+; RV32-NEXT:    vfwcvt.x.f.v v8, v10
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: llrint_v3i64_v3f32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT:    vslidedown.vi v9, v8, 1
-; RV64-NEXT:    vfmv.f.s fa5, v8
-; RV64-NEXT:    vslidedown.vi v10, v8, 2
-; RV64-NEXT:    vslidedown.vi v11, v8, 3
-; RV64-NEXT:    fcvt.l.s a0, fa5
-; RV64-NEXT:    vfmv.f.s fa5, v9
-; RV64-NEXT:    fcvt.l.s a1, fa5
-; RV64-NEXT:    vfmv.f.s fa5, v10
-; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV64-NEXT:    vmv.v.x v8, a0
-; RV64-NEXT:    fcvt.l.s a0, fa5
-; RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; RV64-NEXT:    vfmv.f.s fa5, v11
-; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV64-NEXT:    vslide1down.vx v8, v8, a1
-; RV64-NEXT:    vslide1down.vx v8, v8, a0
-; RV64-NEXT:    fcvt.l.s a0, fa5
-; RV64-NEXT:    vslide1down.vx v8, v8, a0
+; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV64-NEXT:    vmv1r.v v10, v8
+; RV64-NEXT:    vfwcvt.x.f.v v8, v10
 ; RV64-NEXT:    ret
   %a = call <3 x i64> @llvm.llrint.v3i64.v3f32(<3 x float> %x)
   ret <3 x i64> %a
@@ -213,106 +64,16 @@ declare <3 x i64> @llvm.llrint.v3i64.v3f32(<3 x float>)
 define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) {
 ; RV32-LABEL: llrint_v4i64_v4f32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
-; RV32-NEXT:    .cfi_offset ra, -4
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a1, a0, 1
-; RV32-NEXT:    add a0, a1, a0
-; RV32-NEXT:    sub sp, sp, a0
-; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x03, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 3 * vlenb
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT:    vfmv.f.s fa0, v8
-; RV32-NEXT:    call llrintf
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v8, a0
-; RV32-NEXT:    vslide1down.vx v8, v8, a1
-; RV32-NEXT:    addi a0, sp, 16
-; RV32-NEXT:    vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT:    vslidedown.vi v8, v8, 1
-; RV32-NEXT:    vfmv.f.s fa0, v8
-; RV32-NEXT:    call llrintf
-; RV32-NEXT:    addi a2, sp, 16
-; RV32-NEXT:    vl2r.v v8, (a2) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vslide1down.vx v8, v8, a0
-; RV32-NEXT:    vslide1down.vx v8, v8, a1
-; RV32-NEXT:    addi a0, sp, 16
-; RV32-NEXT:    vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT:    vslidedown.vi v8, v8, 2
-; RV32-NEXT:    vfmv.f.s fa0, v8
-; RV32-NEXT:    call llrintf
-; RV32-NEXT:    addi a2, sp, 16
-; RV32-NEXT:    vl2r.v v8, (a2) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vslide1down.vx v8, v8, a0
-; RV32-NEXT:    vslide1down.vx v8, v8, a1
-; RV32-NEXT:    addi a0, sp, 16
-; RV32-NEXT:    vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT:    vslidedown.vi v8, v8, 3
-; RV32-NEXT:    vfmv.f.s fa0, v8
-; RV32-NEXT:    call llrintf
-; RV32-NEXT:    addi a2, sp, 16
-; RV32-NEXT:    vl2r.v v8, (a2) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vslide1down.vx v8, v8, a0
-; RV32-NEXT:    vslide1down.vx v8, v8, a1
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a1, a0, 1
-; RV32-NEXT:    add a0, a1, a0
-; RV32-NEXT:    add sp, sp, a0
-; RV32-NEXT:    .cfi_def_cfa sp, 32
-; RV32-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
-; RV32-NEXT:    .cfi_restore ra
-; RV32-NEXT:    addi sp, sp, 32
-; RV32-NEXT:    .cfi_def_cfa_offset 0
+; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT:    vmv1r.v v10, v8
+; RV32-NEXT:    vfwcvt.x.f.v v8, v10
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: llrint_v4i64_v4f32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT:    vslidedown.vi v9, v8, 1
-; RV64-NEXT:    vfmv.f.s fa5, v8
-; RV64-NEXT:    vslidedown.vi v10, v8, 2
-; RV64-NEXT:    vslidedown.vi v11, v8, 3
-; RV64-NEXT:    fcvt.l.s a0, fa5
-; RV64-NEXT:    vfmv.f.s fa5, v9
-; RV64-NEXT:    fcvt.l.s a1, fa5
-; RV64-NEXT:    vfmv.f.s fa5, v10
-; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV64-NEXT:    vmv.v.x v8, a0
-; RV64-NEXT:    fcvt.l.s a0, fa5
-; RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; RV64-NEXT:    vfmv.f.s fa5, v11
-; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV64-NEXT:    vslide1down.vx v8, v8, a1
-; RV64-NEXT:    vslide1down.vx v8, v8, a0
-; RV64-NEXT:    fcvt.l.s a0, fa5
-; RV64-NEXT:    vslide1down.vx v8, v8, a0
+; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV64-NEXT:    vmv1r.v v10, v8
+; RV64-NEXT:    vfwcvt.x.f.v v8, v10
 ; RV64-NEXT:    ret
   %a = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> %x)
   ret <4 x i64> %a
@@ -322,149 +83,16 @@ declare <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float>)
 define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) {
 ; RV32-LABEL: llrint_v8i64_v8f32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -208
-; RV32-NEXT:    .cfi_def_cfa_offset 208
-; RV32-NEXT:    sw ra, 204(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s0, 200(sp) # 4-byte Folded Spill
-; RV32-NEXT:    .cfi_offset ra, -4
-; RV32-NEXT:    .cfi_offset s0, -8
-; RV32-NEXT:    addi s0, sp, 208
-; RV32-NEXT:    .cfi_def_cfa s0, 0
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    sub sp, sp, a0
-; RV32-NEXT:    andi sp, sp, -64
-; RV32-NEXT:    addi a0, sp, 192
-; RV32-NEXT:    vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
-; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT:    vfmv.f.s fa0, v8
-; RV32-NEXT:    call llrintf
-; RV32-NEXT:    sw a0, 64(sp)
-; RV32-NEXT:    sw a1, 68(sp)
-; RV32-NEXT:    addi a0, sp, 192
-; RV32-NEXT:    vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
-; RV32-NEXT:    vslidedown.vi v8, v8, 7
-; RV32-NEXT:    vfmv.f.s fa0, v8
-; RV32-NEXT:    call llrintf
-; RV32-NEXT:    sw a0, 120(sp)
-; RV32-NEXT:    sw a1, 124(sp)
-; RV32-NEXT:    addi a0, sp, 192
-; RV32-NEXT:    vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
-; RV32-NEXT:    vslidedown.vi v8, v8, 6
-; RV32-NEXT:    vfmv.f.s fa0, v8
-; RV32-NEXT:    call llrintf
-; RV32-NEXT:    sw a0, 112(sp)
-; RV32-NEXT:    sw a1, 116(sp)
-; RV32-NEXT:    addi a0, sp, 192
-; RV32-NEXT:    vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
-; RV32-NEXT:    vslidedown.vi v8, v8, 5
-; RV32-NEXT:    vfmv.f.s fa0, v8
-; RV32-NEXT:    call llrintf
-; RV32-NEXT:    sw a0, 104(sp)
-; RV32-NEXT:    sw a1, 108(sp)
-; RV32-NEXT:    addi a0, sp, 192
-; RV32-NEXT:    vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
-; RV32-NEXT:    vslidedown.vi v8, v8, 4
-; RV32-NEXT:    vfmv.f.s fa0, v8
-; RV32-NEXT:    call llrintf
-; RV32-NEXT:    sw a0, 96(sp)
-; RV32-NEXT:    sw a1, 100(sp)
-; RV32-NEXT:    addi a0, sp, 192
-; RV32-NEXT:    vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT:    vslidedown.vi v8, v8, 3
-; RV32-NEXT:    vfmv.f.s fa0, v8
-; RV32-NEXT:    call llrintf
-; RV32-NEXT:    sw a0, 88(sp)
-; RV32-NEXT:    sw a1, 92(sp)
-; RV32-NEXT:    addi a0, sp, 192
-; RV32-NEXT:    vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT:    vslidedown.vi v8, v8, 2
-; RV32-NEXT:    vfmv.f.s fa0, v8
-; RV32-NEXT:    call llrintf
-; RV32-NEXT:    sw a0, 80(sp)
-; RV32-NEXT:    sw a1, 84(sp)
-; RV32-NEXT:    addi a0, sp, 192
-; RV32-NEXT:    vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
-; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT:    vslidedown.vi v8, v8, 1
-; RV32-NEXT:    vfmv.f.s fa0, v8
-; RV32-NEXT:    call llrintf
-; RV32-NEXT:    sw a0, 72(sp)
-; RV32-NEXT:    sw a1, 76(sp)
-; RV32-NEXT:    addi a0, sp, 64
-; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT:    vle32.v v8, (a0)
-; RV32-NEXT:    addi sp, s0, -208
-; RV32-NEXT:    .cfi_def_cfa sp, 208
-; RV32-NEXT:    lw ra, 204(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s0, 200(sp) # 4-byte Folded Reload
-; RV32-NEXT:    .cfi_restore ra
-; RV32-NEXT:    .cfi_restore s0
-; RV32-NEXT:    addi sp, sp, 208
-; RV32-NEXT:    .cfi_def_cf...
[truncated]

@artagnon artagnon requested a review from mshockwave June 27, 2025 11:14
Copy link
Contributor

@lukel97 lukel97 left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

Comment on lines +3208 to +3210
case ISD::STRICT_FRINT:
case ISD::STRICT_LRINT:
case ISD::STRICT_LLRINT:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just checking, we don't have tests for the constrained intrinsics? Not that we need to handle them in this pr, just making a note

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't have custom-lowering or tests for strict/vp versions afaik.

@artagnon artagnon merged commit 652630b into llvm:main Jun 30, 2025
9 checks passed
@artagnon artagnon deleted the isel-rv-lrint-fixed-vector branch June 30, 2025 12:44
@llvm-ci
Copy link
Collaborator

llvm-ci commented Jun 30, 2025

LLVM Buildbot has detected a new failure on builder llvm-clang-aarch64-darwin running on doug-worker-4 while building llvm at step 6 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/190/builds/22476

Here is the relevant piece of the build log for the reference
Step 6 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'LLVM :: ExecutionEngine/OrcLazy/multiple-compile-threads-basic.ll' FAILED ********************
Exit Code: 2

Command Output (stderr):
--
/Users/buildbot/buildbot-root/aarch64-darwin/build/bin/lli -jit-kind=orc-lazy -compile-threads=2 -thread-entry hello /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/llvm/test/ExecutionEngine/OrcLazy/multiple-compile-threads-basic.ll | /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/FileCheck /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/llvm/test/ExecutionEngine/OrcLazy/multiple-compile-threads-basic.ll # RUN: at line 1
+ /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/lli -jit-kind=orc-lazy -compile-threads=2 -thread-entry hello /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/llvm/test/ExecutionEngine/OrcLazy/multiple-compile-threads-basic.ll
+ /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/FileCheck /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/llvm/test/ExecutionEngine/OrcLazy/multiple-compile-threads-basic.ll
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
 #0 0x0000000101a7c144 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (/Users/buildbot/buildbot-root/aarch64-darwin/build/bin/lli+0x100efc144)
 #1 0x0000000101a7a204 llvm::sys::RunSignalHandlers() (/Users/buildbot/buildbot-root/aarch64-darwin/build/bin/lli+0x100efa204)
 #2 0x0000000101a7c83c SignalHandler(int, __siginfo*, void*) (/Users/buildbot/buildbot-root/aarch64-darwin/build/bin/lli+0x100efc83c)
 #3 0x000000019d937584 (/usr/lib/system/libsystem_platform.dylib+0x18047b584)
 #4 0x000000019d90621c (/usr/lib/system/libsystem_pthread.dylib+0x18044a21c)
 #5 0x000000019d82cad0 (/usr/lib/libc++.1.dylib+0x180370ad0)
 #6 0x000000010160acd0 void llvm::detail::UniqueFunctionBase<void, llvm::Expected<llvm::DenseMap<llvm::orc::SymbolStringPtr, llvm::orc::ExecutorSymbolDef, llvm::DenseMapInfo<llvm::orc::SymbolStringPtr, void>, llvm::detail::DenseMapPair<llvm::orc::SymbolStringPtr, llvm::orc::ExecutorSymbolDef>>>>::CallImpl<llvm::orc::Platform::lookupInitSymbols(llvm::orc::ExecutionSession&, llvm::DenseMap<llvm::orc::JITDylib*, llvm::orc::SymbolLookupSet, llvm::DenseMapInfo<llvm::orc::JITDylib*, void>, llvm::detail::DenseMapPair<llvm::orc::JITDylib*, llvm::orc::SymbolLookupSet>> const&)::$_45>(void*, llvm::Expected<llvm::DenseMap<llvm::orc::SymbolStringPtr, llvm::orc::ExecutorSymbolDef, llvm::DenseMapInfo<llvm::orc::SymbolStringPtr, void>, llvm::detail::DenseMapPair<llvm::orc::SymbolStringPtr, llvm::orc::ExecutorSymbolDef>>>&) (/Users/buildbot/buildbot-root/aarch64-darwin/build/bin/lli+0x100a8acd0)
 #7 0x0000000101606904 llvm::orc::AsynchronousSymbolQuery::handleComplete(llvm::orc::ExecutionSession&)::RunQueryCompleteTask::run() (/Users/buildbot/buildbot-root/aarch64-darwin/build/bin/lli+0x100a86904)
 #8 0x00000001016cdad0 void* std::__1::__thread_proxy[abi:un170006]<std::__1::tuple<std::__1::unique_ptr<std::__1::__thread_struct, std::__1::default_delete<std::__1::__thread_struct>>, llvm::orc::DynamicThreadPoolTaskDispatcher::dispatch(std::__1::unique_ptr<llvm::orc::Task, std::__1::default_delete<llvm::orc::Task>>)::$_0>>(void*) (/Users/buildbot/buildbot-root/aarch64-darwin/build/bin/lli+0x100b4dad0)
 #9 0x000000019d906f94 (/usr/lib/system/libsystem_pthread.dylib+0x18044af94)
#10 0x000000019d901d34 (/usr/lib/system/libsystem_pthread.dylib+0x180445d34)
FileCheck error: '<stdin>' is empty.
FileCheck command line:  /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/FileCheck /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/llvm/test/ExecutionEngine/OrcLazy/multiple-compile-threads-basic.ll

--

********************


rlavaee pushed a commit to rlavaee/llvm-project that referenced this pull request Jul 1, 2025
Make the fixed-vector lowering of ISD::[L]LRINT use the custom-lowering
routine, lowerVectorXRINT, and fix issues in lowerVectorXRINT related to
this new functionality.
rlavaee pushed a commit to rlavaee/llvm-project that referenced this pull request Jul 1, 2025
Make the fixed-vector lowering of ISD::[L]LRINT use the custom-lowering
routine, lowerVectorXRINT, and fix issues in lowerVectorXRINT related to
this new functionality.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants