@@ -902,6 +902,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
902
902
setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
903
903
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
904
904
setTargetDAGCombine(ISD::VECREDUCE_ADD);
905
+ setTargetDAGCombine(ISD::STEP_VECTOR);
905
906
906
907
setTargetDAGCombine(ISD::GlobalAddress);
907
908
@@ -1151,7 +1152,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
1151
1152
setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
1152
1153
setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
1153
1154
setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
1154
- setOperationAction(ISD::STEP_VECTOR, VT, Custom);
1155
1155
1156
1156
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
1157
1157
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
@@ -4476,8 +4476,6 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
4476
4476
return LowerVECTOR_SHUFFLE(Op, DAG);
4477
4477
case ISD::SPLAT_VECTOR:
4478
4478
return LowerSPLAT_VECTOR(Op, DAG);
4479
- case ISD::STEP_VECTOR:
4480
- return LowerSTEP_VECTOR(Op, DAG);
4481
4479
case ISD::EXTRACT_SUBVECTOR:
4482
4480
return LowerEXTRACT_SUBVECTOR(Op, DAG);
4483
4481
case ISD::INSERT_SUBVECTOR:
@@ -9162,20 +9160,6 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
9162
9160
return GenerateTBL(Op, ShuffleMask, DAG);
9163
9161
}
9164
9162
9165
- SDValue AArch64TargetLowering::LowerSTEP_VECTOR(SDValue Op,
9166
- SelectionDAG &DAG) const {
9167
- SDLoc dl(Op);
9168
- EVT VT = Op.getValueType();
9169
- assert(VT.isScalableVector() &&
9170
- "Only expect scalable vectors for STEP_VECTOR");
9171
- assert(VT.getScalarType() != MVT::i1 &&
9172
- "Vectors of i1 types not supported for STEP_VECTOR");
9173
-
9174
- SDValue StepVal = Op.getOperand(0);
9175
- SDValue Zero = DAG.getConstant(0, dl, StepVal.getValueType());
9176
- return DAG.getNode(AArch64ISD::INDEX_VECTOR, dl, VT, Zero, StepVal);
9177
- }
9178
-
9179
9163
SDValue AArch64TargetLowering::LowerSPLAT_VECTOR(SDValue Op,
9180
9164
SelectionDAG &DAG) const {
9181
9165
SDLoc dl(Op);
@@ -9261,9 +9245,7 @@ SDValue AArch64TargetLowering::LowerDUPQLane(SDValue Op,
9261
9245
SDValue SplatOne = DAG.getNode(ISD::SPLAT_VECTOR, DL, MVT::nxv2i64, One);
9262
9246
9263
9247
// create the vector 0,1,0,1,...
9264
- SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
9265
- SDValue SV = DAG.getNode(AArch64ISD::INDEX_VECTOR,
9266
- DL, MVT::nxv2i64, Zero, One);
9248
+ SDValue SV = DAG.getNode(ISD::STEP_VECTOR, DL, MVT::nxv2i64, One);
9267
9249
SV = DAG.getNode(ISD::AND, DL, MVT::nxv2i64, SV, SplatOne);
9268
9250
9269
9251
// create the vector idx64,idx64+1,idx64,idx64+1,...
@@ -13665,15 +13647,18 @@ static SDValue LowerSVEIntrinsicIndex(SDNode *N, SelectionDAG &DAG) {
13665
13647
SDLoc DL(N);
13666
13648
SDValue Op1 = N->getOperand(1);
13667
13649
SDValue Op2 = N->getOperand(2);
13668
- EVT ScalarTy = Op1.getValueType();
13669
-
13670
- if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16)) {
13671
- Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1);
13672
- Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2);
13673
- }
13650
+ EVT ScalarTy = Op2.getValueType();
13651
+ if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
13652
+ ScalarTy = MVT::i32;
13674
13653
13675
- return DAG.getNode(AArch64ISD::INDEX_VECTOR, DL, N->getValueType(0),
13676
- Op1, Op2);
13654
+ // Lower index_vector(base, step) to mul(step step_vector(1)) + splat(base).
13655
+ SDValue One = DAG.getConstant(1, DL, ScalarTy);
13656
+ SDValue StepVector =
13657
+ DAG.getNode(ISD::STEP_VECTOR, DL, N->getValueType(0), One);
13658
+ SDValue Step = DAG.getNode(ISD::SPLAT_VECTOR, DL, N->getValueType(0), Op2);
13659
+ SDValue Mul = DAG.getNode(ISD::MUL, DL, N->getValueType(0), StepVector, Step);
13660
+ SDValue Base = DAG.getNode(ISD::SPLAT_VECTOR, DL, N->getValueType(0), Op1);
13661
+ return DAG.getNode(ISD::ADD, DL, N->getValueType(0), Mul, Base);
13677
13662
}
13678
13663
13679
13664
static SDValue LowerSVEIntrinsicDUP(SDNode *N, SelectionDAG &DAG) {
@@ -15463,6 +15448,19 @@ static SDValue performGlobalAddressCombine(SDNode *N, SelectionDAG &DAG,
15463
15448
DAG.getConstant(MinOffset, DL, MVT::i64));
15464
15449
}
15465
15450
15451
+ static SDValue performStepVectorCombine(SDNode *N,
15452
+ TargetLowering::DAGCombinerInfo &DCI,
15453
+ SelectionDAG &DAG) {
15454
+ if (!DCI.isAfterLegalizeDAG())
15455
+ return SDValue();
15456
+
15457
+ SDLoc DL(N);
15458
+ EVT VT = N->getValueType(0);
15459
+ SDValue StepVal = N->getOperand(0);
15460
+ SDValue Zero = DAG.getConstant(0, DL, StepVal.getValueType());
15461
+ return DAG.getNode(AArch64ISD::INDEX_VECTOR, DL, VT, Zero, StepVal);
15462
+ }
15463
+
15466
15464
// Turns the vector of indices into a vector of byte offstes by scaling Offset
15467
15465
// by (BitWidth / 8).
15468
15466
static SDValue getScaledOffsetForBitWidth(SelectionDAG &DAG, SDValue Offset,
@@ -15977,6 +15975,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
15977
15975
return performExtractVectorEltCombine(N, DAG);
15978
15976
case ISD::VECREDUCE_ADD:
15979
15977
return performVecReduceAddCombine(N, DCI.DAG, Subtarget);
15978
+ case ISD::STEP_VECTOR:
15979
+ return performStepVectorCombine(N, DCI, DAG);
15980
15980
case ISD::INTRINSIC_VOID:
15981
15981
case ISD::INTRINSIC_W_CHAIN:
15982
15982
switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
0 commit comments