-
Notifications
You must be signed in to change notification settings - Fork 13.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISCV] Lower BUILD_VECTOR with i64 type to VID on RV32 if possible #132339
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-backend-risc-v Author: Jim Lin (tclin914) ChangesThe element type i64 of the BUILD_VECTOR is not legal on RV32. It doesn't catch the VID pattern after being legalized to i64. Full diff: https://github.com/llvm/llvm-project/pull/132339.diff 5 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 132faf5b85c1a..ff0ffa16f9aa6 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1281,6 +1281,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
+
+ // Lower BUILD_VECTOR with i64 type to VID on RV32 if possible.
+ setOperationAction(ISD::BUILD_VECTOR, MVT::i64, Custom);
}
setOperationAction(
@@ -3601,6 +3604,78 @@ static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
return Gather;
}
+static SDValue lowerBuildVectorViaVID(SDValue Op, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ MVT VT = Op.getSimpleValueType();
+ assert(VT.isFixedLengthVector() && "Unexpected vector!");
+
+ MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
+
+ SDLoc DL(Op);
+ auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
+
+ if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
+ int64_t StepNumerator = SimpleVID->StepNumerator;
+ unsigned StepDenominator = SimpleVID->StepDenominator;
+ int64_t Addend = SimpleVID->Addend;
+
+ assert(StepNumerator != 0 && "Invalid step");
+ bool Negate = false;
+ int64_t SplatStepVal = StepNumerator;
+ unsigned StepOpcode = ISD::MUL;
+ // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
+ // anyway as the shift of 63 won't fit in uimm5.
+ if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
+ isPowerOf2_64(std::abs(StepNumerator))) {
+ Negate = StepNumerator < 0;
+ StepOpcode = ISD::SHL;
+ SplatStepVal = Log2_64(std::abs(StepNumerator));
+ }
+
+ // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
+ // threshold since it's the immediate value many RVV instructions accept.
+ // There is no vmul.vi instruction so ensure multiply constant can fit in
+ // a single addi instruction.
+ if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
+ (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
+ isPowerOf2_32(StepDenominator) &&
+ (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
+ MVT VIDVT =
+ VT.isFloatingPoint() ? VT.changeVectorElementTypeToInteger() : VT;
+ MVT VIDContainerVT =
+ getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
+ SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
+ // Convert right out of the scalable type so we can use standard ISD
+ // nodes for the rest of the computation. If we used scalable types with
+ // these, we'd lose the fixed-length vector info and generate worse
+ // vsetvli code.
+ VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
+ if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
+ (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
+ SDValue SplatStep = DAG.getSignedConstant(SplatStepVal, DL, VIDVT);
+ VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
+ }
+ if (StepDenominator != 1) {
+ SDValue SplatStep =
+ DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
+ VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
+ }
+ if (Addend != 0 || Negate) {
+ SDValue SplatAddend = DAG.getSignedConstant(Addend, DL, VIDVT);
+ VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
+ VID);
+ }
+ if (VT.isFloatingPoint()) {
+ // TODO: Use vfwcvt to reduce register pressure.
+ VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
+ }
+ return VID;
+ }
+ }
+
+ return SDValue();
+}
+
/// Try and optimize BUILD_VECTORs with "dominant values" - these are values
/// which constitute a large proportion of the elements. In such cases we can
/// splat a vector with the dominant element and make up the shortfall with
@@ -3818,64 +3893,8 @@ static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG,
// Try and match index sequences, which we can lower to the vid instruction
// with optional modifications. An all-undef vector is matched by
// getSplatValue, above.
- if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
- int64_t StepNumerator = SimpleVID->StepNumerator;
- unsigned StepDenominator = SimpleVID->StepDenominator;
- int64_t Addend = SimpleVID->Addend;
-
- assert(StepNumerator != 0 && "Invalid step");
- bool Negate = false;
- int64_t SplatStepVal = StepNumerator;
- unsigned StepOpcode = ISD::MUL;
- // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
- // anyway as the shift of 63 won't fit in uimm5.
- if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
- isPowerOf2_64(std::abs(StepNumerator))) {
- Negate = StepNumerator < 0;
- StepOpcode = ISD::SHL;
- SplatStepVal = Log2_64(std::abs(StepNumerator));
- }
-
- // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
- // threshold since it's the immediate value many RVV instructions accept.
- // There is no vmul.vi instruction so ensure multiply constant can fit in
- // a single addi instruction.
- if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
- (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
- isPowerOf2_32(StepDenominator) &&
- (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
- MVT VIDVT =
- VT.isFloatingPoint() ? VT.changeVectorElementTypeToInteger() : VT;
- MVT VIDContainerVT =
- getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
- SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
- // Convert right out of the scalable type so we can use standard ISD
- // nodes for the rest of the computation. If we used scalable types with
- // these, we'd lose the fixed-length vector info and generate worse
- // vsetvli code.
- VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
- if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
- (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
- SDValue SplatStep = DAG.getSignedConstant(SplatStepVal, DL, VIDVT);
- VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
- }
- if (StepDenominator != 1) {
- SDValue SplatStep =
- DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
- VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
- }
- if (Addend != 0 || Negate) {
- SDValue SplatAddend = DAG.getSignedConstant(Addend, DL, VIDVT);
- VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
- VID);
- }
- if (VT.isFloatingPoint()) {
- // TODO: Use vfwcvt to reduce register pressure.
- VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
- }
- return VID;
- }
- }
+ if (SDValue Res = lowerBuildVectorViaVID(Op, DAG, Subtarget))
+ return Res;
// For very small build_vectors, use a single scalar insert of a constant.
// TODO: Base this on constant rematerialization cost, not size.
@@ -7473,8 +7492,13 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return lowerVECTOR_REVERSE(Op, DAG);
case ISD::VECTOR_SPLICE:
return lowerVECTOR_SPLICE(Op, DAG);
- case ISD::BUILD_VECTOR:
+ case ISD::BUILD_VECTOR: {
+ MVT VT = Op.getSimpleValueType();
+ MVT EltVT = VT.getVectorElementType();
+ if (!Subtarget.is64Bit() && EltVT == MVT::i64)
+ return lowerBuildVectorViaVID(Op, DAG, Subtarget);
return lowerBUILD_VECTOR(Op, DAG, Subtarget);
+ }
case ISD::SPLAT_VECTOR: {
MVT VT = Op.getSimpleValueType();
MVT EltVT = VT.getVectorElementType();
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
index a91263e85e9e8..41f2e9afbb07d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
@@ -290,15 +290,11 @@ define void @buildvec_vid_stepn3_addn3_v4i32(ptr %z0, ptr %z1, ptr %z2, ptr %z3)
ret void
}
-; FIXME: RV32 doesn't catch this pattern due to BUILD_VECTOR legalization.
define <4 x i64> @buildvec_vid_step1_add0_v4i64() {
; RV32-LABEL: buildvec_vid_step1_add0_v4i64:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI25_0)
-; RV32-NEXT: addi a0, a0, %lo(.LCPI25_0)
-; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT: vle8.v v10, (a0)
-; RV32-NEXT: vsext.vf4 v8, v10
+; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT: vid.v v8
; RV32-NEXT: ret
;
; RV64V-LABEL: buildvec_vid_step1_add0_v4i64:
@@ -323,11 +319,9 @@ define <4 x i64> @buildvec_vid_step1_add0_v4i64() {
define <4 x i64> @buildvec_vid_step2_add0_v4i64() {
; RV32-LABEL: buildvec_vid_step2_add0_v4i64:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI26_0)
-; RV32-NEXT: addi a0, a0, %lo(.LCPI26_0)
-; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT: vle8.v v10, (a0)
-; RV32-NEXT: vsext.vf4 v8, v10
+; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT: vid.v v8
+; RV32-NEXT: vadd.vv v8, v8, v8
; RV32-NEXT: ret
;
; RV64V-LABEL: buildvec_vid_step2_add0_v4i64:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
index 3bb5e179e0d06..a9adc87d29c8b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
@@ -1193,15 +1193,11 @@ define void @mulhu_v2i64(ptr %x) {
; RV32-NEXT: addi a1, a1, %lo(.LCPI69_0)
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vle32.v v9, (a1)
-; RV32-NEXT: lui a1, 32
-; RV32-NEXT: addi a1, a1, 1
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vmulhu.vv v8, v8, v9
-; RV32-NEXT: vmv.s.x v9, a1
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vsext.vf4 v10, v9
-; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT: vsrl.vv v8, v8, v10
+; RV32-NEXT: vid.v v9
+; RV32-NEXT: vadd.vi v9, v9, 1
+; RV32-NEXT: vsrl.vv v8, v8, v9
; RV32-NEXT: vse64.v v8, (a0)
; RV32-NEXT: ret
;
@@ -1348,27 +1344,21 @@ define void @mulhs_v2i64(ptr %x) {
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vle64.v v8, (a0)
; RV32-NEXT: lui a1, 349525
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vid.v v9
; RV32-NEXT: addi a2, a1, 1365
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vmv.v.x v10, a2
; RV32-NEXT: li a2, 63
; RV32-NEXT: addi a1, a1, 1366
-; RV32-NEXT: vsetvli zero, zero, e32, m1, tu, ma
-; RV32-NEXT: vmv.s.x v10, a1
-; RV32-NEXT: lui a1, 16
-; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV32-NEXT: vsrl.vi v9, v9, 1
-; RV32-NEXT: vrsub.vi v9, v9, 0
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT: vrsub.vi v11, v9, 0
+; RV32-NEXT: vsetvli zero, zero, e32, mf2, tu, ma
+; RV32-NEXT: vmv.s.x v10, a1
+; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; RV32-NEXT: vmulh.vv v10, v8, v10
-; RV32-NEXT: vmadd.vv v9, v8, v10
-; RV32-NEXT: vmv.s.x v8, a1
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vsext.vf4 v10, v8
-; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT: vsrl.vx v8, v9, a2
-; RV32-NEXT: vsra.vv v9, v9, v10
+; RV32-NEXT: vmadd.vv v11, v8, v10
+; RV32-NEXT: vsrl.vx v8, v11, a2
+; RV32-NEXT: vsra.vv v9, v11, v9
; RV32-NEXT: vadd.vv v8, v9, v8
; RV32-NEXT: vse64.v v8, (a0)
; RV32-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector.ll
index dcd16e093ea7e..9812e9832856d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
declare <2 x i8> @llvm.stepvector.v2i8()
@@ -161,19 +161,11 @@ define <16 x i32> @stepvector_v16i32() {
declare <2 x i64> @llvm.stepvector.v2i64()
define <2 x i64> @stepvector_v2i64() {
-; RV32-LABEL: stepvector_v2i64:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, 16
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vmv.s.x v9, a0
-; RV32-NEXT: vsext.vf4 v8, v9
-; RV32-NEXT: ret
-;
-; RV64-LABEL: stepvector_v2i64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV64-NEXT: vid.v v8
-; RV64-NEXT: ret
+; CHECK-LABEL: stepvector_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; CHECK-NEXT: vid.v v8
+; CHECK-NEXT: ret
%v = call <2 x i64> @llvm.stepvector.v2i64()
ret <2 x i64> %v
}
@@ -181,20 +173,11 @@ define <2 x i64> @stepvector_v2i64() {
declare <4 x i64> @llvm.stepvector.v4i64()
define <4 x i64> @stepvector_v4i64() {
-; RV32-LABEL: stepvector_v4i64:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI14_0)
-; RV32-NEXT: addi a0, a0, %lo(.LCPI14_0)
-; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT: vle8.v v10, (a0)
-; RV32-NEXT: vsext.vf4 v8, v10
-; RV32-NEXT: ret
-;
-; RV64-LABEL: stepvector_v4i64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV64-NEXT: vid.v v8
-; RV64-NEXT: ret
+; CHECK-LABEL: stepvector_v4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT: vid.v v8
+; CHECK-NEXT: ret
%v = call <4 x i64> @llvm.stepvector.v4i64()
ret <4 x i64> %v
}
@@ -202,20 +185,11 @@ define <4 x i64> @stepvector_v4i64() {
declare <8 x i64> @llvm.stepvector.v8i64()
define <8 x i64> @stepvector_v8i64() {
-; RV32-LABEL: stepvector_v8i64:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI15_0)
-; RV32-NEXT: addi a0, a0, %lo(.LCPI15_0)
-; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT: vle8.v v12, (a0)
-; RV32-NEXT: vsext.vf4 v8, v12
-; RV32-NEXT: ret
-;
-; RV64-LABEL: stepvector_v8i64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vid.v v8
-; RV64-NEXT: ret
+; CHECK-LABEL: stepvector_v8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; CHECK-NEXT: vid.v v8
+; CHECK-NEXT: ret
%v = call <8 x i64> @llvm.stepvector.v8i64()
ret <8 x i64> %v
}
@@ -223,21 +197,11 @@ define <8 x i64> @stepvector_v8i64() {
declare <16 x i64> @llvm.stepvector.v16i64()
define <16 x i64> @stepvector_v16i64() {
-; RV32-LABEL: stepvector_v16i64:
-; RV32: # %bb.0:
-; RV32-NEXT: li a0, 32
-; RV32-NEXT: lui a1, %hi(.LCPI16_0)
-; RV32-NEXT: addi a1, a1, %lo(.LCPI16_0)
-; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; RV32-NEXT: vle8.v v16, (a1)
-; RV32-NEXT: vsext.vf4 v8, v16
-; RV32-NEXT: ret
-;
-; RV64-LABEL: stepvector_v16i64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV64-NEXT: vid.v v8
-; RV64-NEXT: ret
+; CHECK-LABEL: stepvector_v16i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; CHECK-NEXT: vid.v v8
+; CHECK-NEXT: ret
%v = call <16 x i64> @llvm.stepvector.v16i64()
ret <16 x i64> %v
}
diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
index daeb306b7e85f..c486692a90ff1 100644
--- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
@@ -653,43 +653,33 @@ define void @test_srem_vec(ptr %X) nounwind {
; RV32MV-NEXT: mv a0, s1
; RV32MV-NEXT: mv a1, s3
; RV32MV-NEXT: call __moddi3
-; RV32MV-NEXT: addi a2, sp, 16
-; RV32MV-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32MV-NEXT: vlse64.v v8, (a2), zero
; RV32MV-NEXT: addi a2, sp, 32
-; RV32MV-NEXT: vl2r.v v10, (a2) # Unknown-size Folded Reload
+; RV32MV-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload
; RV32MV-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32MV-NEXT: vslide1down.vx v10, v10, a0
-; RV32MV-NEXT: vslide1down.vx v10, v10, a1
-; RV32MV-NEXT: vslidedown.vi v10, v10, 2
+; RV32MV-NEXT: vslide1down.vx v8, v8, a0
+; RV32MV-NEXT: addi a0, sp, 16
; RV32MV-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32MV-NEXT: vand.vv v8, v10, v8
-; RV32MV-NEXT: vsetivli zero, 3, e8, mf2, ta, ma
-; RV32MV-NEXT: vmv.v.i v10, 1
-; RV32MV-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV32MV-NEXT: vmv.v.i v12, 0
-; RV32MV-NEXT: vsetivli zero, 3, e8, mf2, tu, ma
-; RV32MV-NEXT: vslideup.vi v12, v10, 2
-; RV32MV-NEXT: vsetivli zero, 5, e8, mf2, ta, ma
-; RV32MV-NEXT: vmv.v.i v10, 2
-; RV32MV-NEXT: vsetvli zero, zero, e8, mf2, tu, ma
-; RV32MV-NEXT: vslideup.vi v12, v10, 4
+; RV32MV-NEXT: vlse64.v v10, (a0), zero
+; RV32MV-NEXT: vid.v v12
; RV32MV-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32MV-NEXT: vsext.vf4 v10, v12
+; RV32MV-NEXT: vslide1down.vx v8, v8, a1
+; RV32MV-NEXT: vslidedown.vi v8, v8, 2
; RV32MV-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV32MV-NEXT: vand.vv v8, v8, v10
+; RV32MV-NEXT: vand.vv v10, v12, v10
; RV32MV-NEXT: vmsne.vv v0, v8, v10
; RV32MV-NEXT: vmv.v.i v8, 0
; RV32MV-NEXT: vmerge.vim v8, v8, -1, v0
; RV32MV-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV32MV-NEXT: vslidedown.vi v10, v8, 1
-; RV32MV-NEXT: vslidedown.vi v11, v8, 2
-; RV32MV-NEXT: vmv.x.s a0, v10
-; RV32MV-NEXT: vmv.x.s a1, v11
+; RV32MV-NEXT: vslidedown.vi v12, v8, 1
+; RV32MV-NEXT: vslidedown.vi v13, v8, 2
; RV32MV-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32MV-NEXT: vslidedown.vi v10, v8, 4
+; RV32MV-NEXT: vmv.x.s a0, v12
+; RV32MV-NEXT: vmv.x.s a1, v13
+; RV32MV-NEXT: vslidedown.vi v12, v8, 5
; RV32MV-NEXT: vmv.x.s a2, v10
-; RV32MV-NEXT: vslidedown.vi v10, v8, 5
-; RV32MV-NEXT: vmv.x.s a3, v10
+; RV32MV-NEXT: vmv.x.s a3, v12
; RV32MV-NEXT: slli a4, a1, 1
; RV32MV-NEXT: sub a4, a4, a0
; RV32MV-NEXT: srli a0, a2, 30
|
This is "during" type legalization, not "before". |
Thanks. |
@@ -1281,6 +1281,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, | |||
if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) { | |||
setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); | |||
setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom); | |||
|
|||
// Lower BUILD_VECTOR with i64 type to VID on RV32 if possible. | |||
setOperationAction(ISD::BUILD_VECTOR, MVT::i64, Custom); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Shouldn't this be VT, not MVT::i64? (i.e. the vector type, not the scalar element type)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
MVT::i64 is correct so that the type legalizer calls it when the scalar type is illegal. Calling it on the vector type would mean it only gets called after the type legalizer since the vector type is legal.
The element type i64 of the BUILD_VECTOR is not legal on RV32. It doesn't catch the VID pattern after being legalized for i64.
So try to customized lower it to VID during type legalization.