Skip to content

Commit e064211

Browse files
committed
[LV] Vectorize select min/max index.
Add support for vectorizing loops that select the index of the minimum or maximum element. The patch implements vectorizing those patterns by combining Min/Max and FindFirstIV reductions. It extends matching Min/Max reductions to allow in-loop users that are FindLastIV reductions. It records a flag indicating that the Min/Max reduction is used by another reduction. When creating reduction recipes, we process any reduction that has other reduction users. The reduction using the min/max reduction needs adjusting to compute the correct result: 1. We need to find the first IV for which the condition based on the min/max reduction is true, 2. Compare the partial min/max reduction result to its final value and, 3. Select the lanes of the partial FindLastIV reductions which correspond to the lanes matching the min/max reduction result.
1 parent c925a3e commit e064211

File tree

11 files changed

+1435
-242
lines changed

11 files changed

+1435
-242
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -798,6 +798,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
798798
// For each block in the loop.
799799
for (BasicBlock *BB : TheLoop->blocks()) {
800800
// Scan the instructions in the block and look for hazards.
801+
PHINode *UnclassifiedPhi = nullptr;
801802
for (Instruction &I : *BB) {
802803
if (auto *Phi = dyn_cast<PHINode>(&I)) {
803804
Type *PhiTy = Phi->getType();
@@ -887,12 +888,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
887888
addInductionPhi(Phi, ID, AllowedExit);
888889
continue;
889890
}
890-
891-
reportVectorizationFailure("Found an unidentified PHI",
892-
"value that could not be identified as "
893-
"reduction is used outside the loop",
894-
"NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi);
895-
return false;
891+
UnclassifiedPhi = Phi;
896892
} // end of PHI handling
897893

898894
// We handle calls that:
@@ -1043,6 +1039,19 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
10431039
return false;
10441040
}
10451041
} // next instr.
1042+
if (UnclassifiedPhi && none_of(BB->phis(), [this](PHINode &P) {
1043+
auto I = Reductions.find(&P);
1044+
return I != Reductions.end() &&
1045+
RecurrenceDescriptor::isFindLastIVRecurrenceKind(
1046+
I->second.getRecurrenceKind());
1047+
})) {
1048+
reportVectorizationFailure("Found an unidentified PHI",
1049+
"value that could not be identified as "
1050+
"reduction is used outside the loop",
1051+
"NonReductionValueUsedOutsideLoop", ORE,
1052+
TheLoop, UnclassifiedPhi);
1053+
return false;
1054+
}
10461055
}
10471056

10481057
if (!PrimaryInduction) {

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7295,6 +7295,9 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
72957295
Value *StartV = getStartValueFromReductionResult(EpiRedResult);
72967296
Value *SentinelV = EpiRedResult->getOperand(2)->getLiveInIRValue();
72977297
using namespace llvm::PatternMatch;
7298+
MainResumeValue = cast<VPInstruction>(EpiRedHeaderPhi->getStartValue())
7299+
->getOperand(0)
7300+
->getUnderlyingValue();
72987301
Value *Cmp, *OrigResumeV, *CmpOp;
72997302
bool IsExpectedPattern =
73007303
match(MainResumeValue,
@@ -7306,7 +7309,11 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
73067309
assert(IsExpectedPattern && "Unexpected reduction resume pattern");
73077310
(void)IsExpectedPattern;
73087311
MainResumeValue = OrigResumeV;
7312+
} else {
7313+
if (auto *VPI = dyn_cast<VPInstruction>(EpiRedHeaderPhi->getStartValue()))
7314+
MainResumeValue = VPI->getOperand(0)->getUnderlyingValue();
73097315
}
7316+
73107317
PHINode *MainResumePhi = cast<PHINode>(MainResumeValue);
73117318

73127319
// When fixing reductions in the epilogue loop we should already have
@@ -8172,7 +8179,7 @@ bool VPRecipeBuilder::getScaledReductions(
81728179
Instruction *PHI, Instruction *RdxExitInstr, VFRange &Range,
81738180
SmallVectorImpl<std::pair<PartialReductionChain, unsigned>> &Chains) {
81748181

8175-
if (!CM.TheLoop->contains(RdxExitInstr))
8182+
if (!RdxExitInstr || !CM.TheLoop->contains(RdxExitInstr))
81768183
return false;
81778184

81788185
auto *Update = dyn_cast<BinaryOperator>(RdxExitInstr);
@@ -8266,9 +8273,6 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R,
82668273
return Recipe;
82678274

82688275
VPHeaderPHIRecipe *PhiRecipe = nullptr;
8269-
assert((Legal->isReductionVariable(Phi) ||
8270-
Legal->isFixedOrderRecurrence(Phi)) &&
8271-
"can only widen reductions and fixed-order recurrences here");
82728276
VPValue *StartV = Operands[0];
82738277
if (Legal->isReductionVariable(Phi)) {
82748278
const RecurrenceDescriptor &RdxDesc =
@@ -8282,12 +8286,17 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R,
82828286
PhiRecipe = new VPReductionPHIRecipe(
82838287
Phi, RdxDesc, *StartV, CM.isInLoopReduction(Phi),
82848288
CM.useOrderedReductions(RdxDesc), ScaleFactor);
8285-
} else {
8289+
} else if (Legal->isFixedOrderRecurrence(Phi)) {
82868290
// TODO: Currently fixed-order recurrences are modeled as chains of
82878291
// first-order recurrences. If there are no users of the intermediate
82888292
// recurrences in the chain, the fixed order recurrence should be modeled
82898293
// directly, enabling more efficient codegen.
82908294
PhiRecipe = new VPFirstOrderRecurrencePHIRecipe(Phi, *StartV);
8295+
} else {
8296+
// Failed to identify phi as reduction or fixed-order recurrence. Keep the
8297+
// original VPWidenPHIRecipe for now, to be legalized later if possible.
8298+
setRecipe(Phi, R);
8299+
return nullptr;
82918300
}
82928301
// Add backedge value.
82938302
PhiRecipe->addOperand(Operands[1]);
@@ -8472,7 +8481,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
84728481
// TODO: Extract final value from induction recipe initially, optimize to
84738482
// pre-computed end value together in optimizeInductionExitUsers.
84748483
auto *VectorPhiR =
8475-
cast<VPHeaderPHIRecipe>(Builder.getRecipe(&ScalarPhiIRI->getIRPhi()));
8484+
cast<VPSingleDefRecipe>(Builder.getRecipe(&ScalarPhiIRI->getIRPhi()));
84768485
if (auto *WideIVR = dyn_cast<VPWidenInductionRecipe>(VectorPhiR)) {
84778486
if (VPInstruction *ResumePhi = addResumePhiRecipeForInduction(
84788487
WideIVR, VectorPHBuilder, ScalarPHBuilder, TypeInfo,
@@ -8494,7 +8503,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
84948503
// which for FORs is a vector whose last element needs to be extracted. The
84958504
// start value provides the value if the loop is bypassed.
84968505
bool IsFOR = isa<VPFirstOrderRecurrencePHIRecipe>(VectorPhiR);
8497-
auto *ResumeFromVectorLoop = VectorPhiR->getBackedgeValue();
8506+
auto *ResumeFromVectorLoop = VectorPhiR->getOperand(1);
84988507
assert(VectorRegion->getSingleSuccessor() == Plan.getMiddleBlock() &&
84998508
"Cannot handle loops with uncountable early exits");
85008509
if (IsFOR)
@@ -8503,7 +8512,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
85038512
"vector.recur.extract");
85048513
StringRef Name = IsFOR ? "scalar.recur.init" : "bc.merge.rdx";
85058514
auto *ResumePhiR = ScalarPHBuilder.createScalarPhi(
8506-
{ResumeFromVectorLoop, VectorPhiR->getStartValue()}, {}, Name);
8515+
{ResumeFromVectorLoop, VectorPhiR->getOperand(0)}, {}, Name);
85078516
ScalarPhiIRI->addOperand(ResumePhiR);
85088517
}
85098518
}
@@ -8805,6 +8814,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
88058814
VPRecipeBase *Recipe =
88068815
RecipeBuilder.tryToCreateWidenRecipe(SingleDef, Range);
88078816
if (!Recipe) {
8817+
if (isa<VPWidenPHIRecipe>(SingleDef))
8818+
continue;
88088819
SmallVector<VPValue *, 4> Operands(R.operands());
88098820
Recipe = RecipeBuilder.handleReplication(Instr, Operands, Range);
88108821
}
@@ -8870,6 +8881,11 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
88708881
// Adjust the recipes for any inloop reductions.
88718882
adjustRecipesForReductions(Plan, RecipeBuilder, Range.Start);
88728883

8884+
// Try to convert remaining VPWidenPHIRecipes to reduction recipes.
8885+
if (!VPlanTransforms::runPass(VPlanTransforms::legalizeUnclassifiedPhis,
8886+
*Plan))
8887+
return nullptr;
8888+
88738889
// Transform recipes to abstract recipes if it is legal and beneficial and
88748890
// clamp the range for better cost estimation.
88758891
// TODO: Enable following transform when the EVL-version of extended-reduction
@@ -9331,6 +9347,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
93319347
PhiR->setOperand(0, StartV);
93329348
}
93339349
}
9350+
93349351
for (VPRecipeBase *R : ToDelete)
93359352
R->eraseFromParent();
93369353

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1832,7 +1832,8 @@ class VPHeaderPHIRecipe : public VPSingleDefRecipe, public VPPhiAccessors {
18321832
~VPHeaderPHIRecipe() override = default;
18331833

18341834
/// Method to support type inquiry through isa, cast, and dyn_cast.
1835-
static inline bool classof(const VPRecipeBase *B) {
1835+
static inline bool classof(const VPUser *U) {
1836+
auto *B = cast<VPRecipeBase>(U);
18361837
return B->getVPDefID() >= VPDef::VPFirstHeaderPHISC &&
18371838
B->getVPDefID() <= VPDef::VPLastHeaderPHISC;
18381839
}
@@ -1841,6 +1842,10 @@ class VPHeaderPHIRecipe : public VPSingleDefRecipe, public VPPhiAccessors {
18411842
return B && B->getVPDefID() >= VPRecipeBase::VPFirstHeaderPHISC &&
18421843
B->getVPDefID() <= VPRecipeBase::VPLastHeaderPHISC;
18431844
}
1845+
static inline bool classof(const VPSingleDefRecipe *B) {
1846+
return B->getVPDefID() >= VPDef::VPFirstHeaderPHISC &&
1847+
B->getVPDefID() <= VPDef::VPLastHeaderPHISC;
1848+
}
18441849

18451850
/// Generate the phi nodes.
18461851
void execute(VPTransformState &State) override = 0;
@@ -1902,7 +1907,7 @@ class VPWidenInductionRecipe : public VPHeaderPHIRecipe {
19021907
return R && classof(R);
19031908
}
19041909

1905-
static inline bool classof(const VPHeaderPHIRecipe *R) {
1910+
static inline bool classof(const VPSingleDefRecipe *R) {
19061911
return classof(static_cast<const VPRecipeBase *>(R));
19071912
}
19081913

llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -589,3 +589,148 @@ void VPlanTransforms::createLoopRegions(VPlan &Plan) {
589589
TopRegion->setName("vector loop");
590590
TopRegion->getEntryBasicBlock()->setName("vector.body");
591591
}
592+
593+
bool VPlanTransforms::legalizeUnclassifiedPhis(VPlan &Plan) {
594+
using namespace VPlanPatternMatch;
595+
for (auto &PhiR : make_early_inc_range(
596+
Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis())) {
597+
if (!isa<VPWidenPHIRecipe>(&PhiR))
598+
continue;
599+
600+
// Check if PhiR is a min/max reduction that has a user inside the loop
601+
// outside the min/max reduction chain. The other user must be the compare
602+
// of a FindLastIV reduction chain.
603+
auto *MinMaxPhiR = cast<VPWidenPHIRecipe>(&PhiR);
604+
auto *MinMaxOp = dyn_cast_or_null<VPSingleDefRecipe>(
605+
MinMaxPhiR->getOperand(1)->getDefiningRecipe());
606+
if (!MinMaxOp)
607+
return false;
608+
609+
// The incoming value must be a min/max instrinsic.
610+
// TODO: Also handle the select variant.
611+
Intrinsic::ID ID = Intrinsic::not_intrinsic;
612+
if (auto *WideInt = dyn_cast<VPWidenIntrinsicRecipe>(MinMaxOp))
613+
ID = WideInt->getVectorIntrinsicID();
614+
else {
615+
auto *RepR = dyn_cast<VPReplicateRecipe>(MinMaxOp);
616+
if (!RepR || !isa<IntrinsicInst>(RepR->getUnderlyingInstr()))
617+
return false;
618+
ID = cast<IntrinsicInst>(RepR->getUnderlyingInstr())->getIntrinsicID();
619+
}
620+
RecurKind RdxKind = RecurKind::None;
621+
switch (ID) {
622+
case Intrinsic::umax:
623+
RdxKind = RecurKind::UMax;
624+
break;
625+
case Intrinsic::umin:
626+
RdxKind = RecurKind::UMin;
627+
break;
628+
case Intrinsic::smax:
629+
RdxKind = RecurKind::SMax;
630+
break;
631+
case Intrinsic::smin:
632+
RdxKind = RecurKind::SMin;
633+
break;
634+
default:
635+
return false;
636+
}
637+
638+
// The min/max intrinsic must use the phi and itself must only be used by
639+
// the phi and a resume-phi in the scalar preheader.
640+
if (MinMaxOp->getOperand(0) != MinMaxPhiR &&
641+
MinMaxOp->getOperand(1) != MinMaxPhiR)
642+
return false;
643+
if (MinMaxPhiR->getNumUsers() != 2 ||
644+
any_of(MinMaxOp->users(), [MinMaxPhiR, &Plan](VPUser *U) {
645+
auto *Phi = dyn_cast<VPPhi>(U);
646+
return MinMaxPhiR != U &&
647+
(!Phi || Phi->getParent() != Plan.getScalarPreheader());
648+
}))
649+
return false;
650+
651+
// One user of MinMaxPhiR is MinMaxOp, the other users must be a compare
652+
// that's part of a FindLastIV chain.
653+
auto MinMaxUsers = to_vector(MinMaxPhiR->users());
654+
auto *Cmp = dyn_cast<VPRecipeWithIRFlags>(
655+
MinMaxUsers[0] == MinMaxOp ? MinMaxUsers[1] : MinMaxUsers[0]);
656+
VPValue *CmpOpA;
657+
VPValue *CmpOpB;
658+
if (!Cmp || Cmp->getNumUsers() != 1 ||
659+
!match(Cmp, m_Binary<Instruction::ICmp>(m_VPValue(CmpOpA),
660+
m_VPValue(CmpOpB))))
661+
return false;
662+
663+
// Normalize the predicate so MinMaxPhiR is on the right side.
664+
CmpInst::Predicate Pred = Cmp->getPredicate();
665+
if (CmpOpA == MinMaxPhiR)
666+
Pred = CmpInst::getSwappedPredicate(Pred);
667+
668+
// Determine if the predicate is not strict.
669+
bool IsNonStrictPred = ICmpInst::isLE(Pred) || ICmpInst::isGE(Pred);
670+
// Account for a mis-match between RdxKind and the predicate.
671+
switch (RdxKind) {
672+
case RecurKind::UMin:
673+
case RecurKind::SMin:
674+
IsNonStrictPred |= ICmpInst::isGT(Pred);
675+
break;
676+
case RecurKind::UMax:
677+
case RecurKind::SMax:
678+
IsNonStrictPred |= ICmpInst::isLT(Pred);
679+
break;
680+
default:
681+
llvm_unreachable("unsupported kind");
682+
}
683+
684+
// TODO: Strict predicates need to find the first IV value for which the
685+
// predicate holds, not the last.
686+
if (Pred == CmpInst::ICMP_NE || !IsNonStrictPred)
687+
return false;
688+
689+
// Cmp must be used by the select of a FindLastIV chain.
690+
VPValue *Sel = dyn_cast<VPSingleDefRecipe>(*Cmp->user_begin());
691+
VPValue *IVOp, *FindIV;
692+
if (!Sel ||
693+
!match(Sel,
694+
m_Select(m_Specific(Cmp), m_VPValue(IVOp), m_VPValue(FindIV))) ||
695+
Sel->getNumUsers() != 2 || !isa<VPWidenIntOrFpInductionRecipe>(IVOp))
696+
return false;
697+
auto *FindIVPhiR = dyn_cast<VPReductionPHIRecipe>(FindIV);
698+
if (!FindIVPhiR || !RecurrenceDescriptor::isFindLastIVRecurrenceKind(
699+
FindIVPhiR->getRecurrenceKind()))
700+
return false;
701+
702+
assert(!FindIVPhiR->isInLoop() && !FindIVPhiR->isOrdered() &&
703+
"cannot handle inloop/ordered reductions yet");
704+
705+
auto NewPhiR = new VPReductionPHIRecipe(
706+
cast<PHINode>(MinMaxPhiR->getUnderlyingInstr()), RdxKind,
707+
*MinMaxPhiR->getOperand(0), false, false, 1);
708+
NewPhiR->insertBefore(MinMaxPhiR);
709+
MinMaxPhiR->replaceAllUsesWith(NewPhiR);
710+
NewPhiR->addOperand(MinMaxPhiR->getOperand(1));
711+
MinMaxPhiR->eraseFromParent();
712+
713+
// The reduction using MinMaxPhiR needs adjusting to compute the correct
714+
// result:
715+
// 1. We need to find the last IV for which the condition based on the
716+
// min/max recurrence is true,
717+
// 2. Compare the partial min/max reduction result to its final value and,
718+
// 3. Select the lanes of the partial FindLastIV reductions which
719+
// correspond to the lanes matching the min/max reduction result.
720+
VPInstruction *FindIVResult = cast<VPInstruction>(
721+
*(Sel->user_begin() + (*Sel->user_begin() == FindIVPhiR ? 1 : 0)));
722+
VPBuilder B(FindIVResult);
723+
VPInstruction *MinMaxResult =
724+
B.createNaryOp(VPInstruction::ComputeReductionResult,
725+
{NewPhiR, NewPhiR->getBackedgeValue()}, VPIRFlags(), {});
726+
NewPhiR->getBackedgeValue()->replaceUsesWithIf(
727+
MinMaxResult, [](VPUser &U, unsigned) { return isa<VPPhi>(&U); });
728+
auto *FinalMinMaxCmp = B.createICmp(
729+
CmpInst::ICMP_EQ, MinMaxResult->getOperand(1), MinMaxResult);
730+
auto *FinalIVSelect =
731+
B.createSelect(FinalMinMaxCmp, FindIVResult->getOperand(3),
732+
FindIVResult->getOperand(2));
733+
FindIVResult->setOperand(3, FinalIVSelect);
734+
}
735+
return true;
736+
}

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,11 @@ struct VPlanTransforms {
8383
GetIntOrFpInductionDescriptor,
8484
ScalarEvolution &SE, const TargetLibraryInfo &TLI);
8585

86+
/// Try to legalize unclassified phis by converting VPWidenPHIRecipes to
87+
/// min-max reductions used by FindLastIV reductions if possible. Returns
88+
/// false if the VPlan contains VPWidenPHIRecipes that cannot be legalized.
89+
static bool legalizeUnclassifiedPhis(VPlan &Plan);
90+
8691
/// Try to have all users of fixed-order recurrences appear after the recipe
8792
/// defining their previous value, by either sinking users or hoisting recipes
8893
/// defining their previous value (and its operands). Then introduce

0 commit comments

Comments
 (0)