Skip to content

[LV] Vectorize selecting last IV of min/max element. #141431

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 15 additions & 6 deletions llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
Original file line number Diff line number Diff line change
@@ -798,6 +798,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// For each block in the loop.
for (BasicBlock *BB : TheLoop->blocks()) {
// Scan the instructions in the block and look for hazards.
PHINode *UnclassifiedPhi = nullptr;
for (Instruction &I : *BB) {
if (auto *Phi = dyn_cast<PHINode>(&I)) {
Type *PhiTy = Phi->getType();
@@ -887,12 +888,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
addInductionPhi(Phi, ID, AllowedExit);
continue;
}

reportVectorizationFailure("Found an unidentified PHI",
"value that could not be identified as "
"reduction is used outside the loop",
"NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi);
return false;
UnclassifiedPhi = Phi;
} // end of PHI handling

// We handle calls that:
@@ -1043,6 +1039,19 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
return false;
}
} // next instr.
if (UnclassifiedPhi && none_of(BB->phis(), [this](PHINode &P) {
auto I = Reductions.find(&P);
return I != Reductions.end() &&
RecurrenceDescriptor::isFindLastIVRecurrenceKind(
I->second.getRecurrenceKind());
})) {
reportVectorizationFailure("Found an unidentified PHI",
"value that could not be identified as "
"reduction is used outside the loop",
"NonReductionValueUsedOutsideLoop", ORE,
TheLoop, UnclassifiedPhi);
return false;
}
}

if (!PrimaryInduction) {
30 changes: 23 additions & 7 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
@@ -7204,6 +7204,9 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
Value *StartV = getStartValueFromReductionResult(EpiRedResult);
Value *SentinelV = EpiRedResult->getOperand(2)->getLiveInIRValue();
using namespace llvm::PatternMatch;
MainResumeValue = cast<VPInstruction>(EpiRedHeaderPhi->getStartValue())
->getOperand(0)
->getUnderlyingValue();
Value *Cmp, *OrigResumeV, *CmpOp;
[[maybe_unused]] bool IsExpectedPattern =
match(MainResumeValue,
@@ -7214,7 +7217,11 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
((CmpOp == StartV && isGuaranteedNotToBeUndefOrPoison(CmpOp))));
assert(IsExpectedPattern && "Unexpected reduction resume pattern");
MainResumeValue = OrigResumeV;
} else if (auto *VPI =
dyn_cast<VPInstruction>(EpiRedHeaderPhi->getStartValue())) {
MainResumeValue = VPI->getOperand(0)->getUnderlyingValue();
}

PHINode *MainResumePhi = cast<PHINode>(MainResumeValue);

// When fixing reductions in the epilogue loop we should already have
@@ -8227,9 +8234,6 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R,
return Recipe;

VPHeaderPHIRecipe *PhiRecipe = nullptr;
assert((Legal->isReductionVariable(Phi) ||
Legal->isFixedOrderRecurrence(Phi)) &&
"can only widen reductions and fixed-order recurrences here");
VPValue *StartV = Operands[0];
if (Legal->isReductionVariable(Phi)) {
const RecurrenceDescriptor &RdxDesc = Legal->getRecurrenceDescriptor(Phi);
@@ -8242,12 +8246,17 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R,
PhiRecipe = new VPReductionPHIRecipe(
Phi, RdxDesc.getRecurrenceKind(), *StartV, CM.isInLoopReduction(Phi),
CM.useOrderedReductions(RdxDesc), ScaleFactor);
} else {
} else if (Legal->isFixedOrderRecurrence(Phi)) {
// TODO: Currently fixed-order recurrences are modeled as chains of
// first-order recurrences. If there are no users of the intermediate
// recurrences in the chain, the fixed order recurrence should be modeled
// directly, enabling more efficient codegen.
PhiRecipe = new VPFirstOrderRecurrencePHIRecipe(Phi, *StartV);
} else {
// Failed to identify phi as reduction or fixed-order recurrence. Keep the
// original VPWidenPHIRecipe for now, to be legalized later if possible.
setRecipe(Phi, R);
return nullptr;
}
// Add backedge value.
PhiRecipe->addOperand(Operands[1]);
@@ -8432,7 +8441,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
// TODO: Extract final value from induction recipe initially, optimize to
// pre-computed end value together in optimizeInductionExitUsers.
auto *VectorPhiR =
cast<VPHeaderPHIRecipe>(Builder.getRecipe(&ScalarPhiIRI->getIRPhi()));
cast<VPSingleDefRecipe>(Builder.getRecipe(&ScalarPhiIRI->getIRPhi()));
if (auto *WideIVR = dyn_cast<VPWidenInductionRecipe>(VectorPhiR)) {
if (VPInstruction *ResumePhi = addResumePhiRecipeForInduction(
WideIVR, VectorPHBuilder, ScalarPHBuilder, TypeInfo,
@@ -8454,7 +8463,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
// which for FORs is a vector whose last element needs to be extracted. The
// start value provides the value if the loop is bypassed.
bool IsFOR = isa<VPFirstOrderRecurrencePHIRecipe>(VectorPhiR);
auto *ResumeFromVectorLoop = VectorPhiR->getBackedgeValue();
auto *ResumeFromVectorLoop = VectorPhiR->getOperand(1);
assert(VectorRegion->getSingleSuccessor() == Plan.getMiddleBlock() &&
"Cannot handle loops with uncountable early exits");
if (IsFOR)
@@ -8463,7 +8472,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
"vector.recur.extract");
StringRef Name = IsFOR ? "scalar.recur.init" : "bc.merge.rdx";
auto *ResumePhiR = ScalarPHBuilder.createScalarPhi(
{ResumeFromVectorLoop, VectorPhiR->getStartValue()}, {}, Name);
{ResumeFromVectorLoop, VectorPhiR->getOperand(0)}, {}, Name);
ScalarPhiIRI->addOperand(ResumePhiR);
}
}
@@ -8734,6 +8743,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
VPRecipeBase *Recipe =
RecipeBuilder.tryToCreateWidenRecipe(SingleDef, Range);
if (!Recipe) {
if (isa<VPWidenPHIRecipe>(SingleDef))
continue;
SmallVector<VPValue *, 4> Operands(R.operands());
Recipe = RecipeBuilder.handleReplication(Instr, Operands, Range);
}
@@ -8796,6 +8807,10 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
// Adjust the recipes for any inloop reductions.
adjustRecipesForReductions(Plan, RecipeBuilder, Range.Start);

// Try to convert remaining VPWidenPHIRecipes to reduction recipes.
if (!VPlanTransforms::runPass(VPlanTransforms::legalizeUnclassifiedPhis,
*Plan))
return nullptr;
// Apply mandatory transformation to handle FP maxnum/minnum reduction with
// NaNs if possible, bail out otherwise.
if (!VPlanTransforms::runPass(VPlanTransforms::handleMaxMinNumReductions,
@@ -9268,6 +9283,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
PhiR->setOperand(0, StartV);
}
}

for (VPRecipeBase *R : ToDelete)
R->eraseFromParent();

9 changes: 7 additions & 2 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
@@ -1886,7 +1886,8 @@ class LLVM_ABI_FOR_TEST VPHeaderPHIRecipe : public VPSingleDefRecipe,
~VPHeaderPHIRecipe() override = default;

/// Method to support type inquiry through isa, cast, and dyn_cast.
static inline bool classof(const VPRecipeBase *B) {
static inline bool classof(const VPUser *U) {
auto *B = cast<VPRecipeBase>(U);
return B->getVPDefID() >= VPDef::VPFirstHeaderPHISC &&
B->getVPDefID() <= VPDef::VPLastHeaderPHISC;
}
@@ -1895,6 +1896,10 @@ class LLVM_ABI_FOR_TEST VPHeaderPHIRecipe : public VPSingleDefRecipe,
return B && B->getVPDefID() >= VPRecipeBase::VPFirstHeaderPHISC &&
B->getVPDefID() <= VPRecipeBase::VPLastHeaderPHISC;
}
static inline bool classof(const VPSingleDefRecipe *B) {
return B->getVPDefID() >= VPDef::VPFirstHeaderPHISC &&
B->getVPDefID() <= VPDef::VPLastHeaderPHISC;
}

/// Generate the phi nodes.
void execute(VPTransformState &State) override = 0;
@@ -1956,7 +1961,7 @@ class VPWidenInductionRecipe : public VPHeaderPHIRecipe {
return R && classof(R);
}

static inline bool classof(const VPHeaderPHIRecipe *R) {
static inline bool classof(const VPSingleDefRecipe *R) {
return classof(static_cast<const VPRecipeBase *>(R));
}

145 changes: 145 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
Original file line number Diff line number Diff line change
@@ -813,3 +813,148 @@ bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) {
MiddleTerm->setOperand(0, NewCond);
return true;
}

bool VPlanTransforms::legalizeUnclassifiedPhis(VPlan &Plan) {
using namespace VPlanPatternMatch;
for (auto &PhiR : make_early_inc_range(
Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis())) {
if (!isa<VPWidenPHIRecipe>(&PhiR))
continue;

// Check if PhiR is a min/max reduction that has a user inside the loop
// outside the min/max reduction chain. The other user must be the compare
// of a FindLastIV reduction chain.
auto *MinMaxPhiR = cast<VPWidenPHIRecipe>(&PhiR);
auto *MinMaxOp = dyn_cast_or_null<VPSingleDefRecipe>(
MinMaxPhiR->getOperand(1)->getDefiningRecipe());
if (!MinMaxOp)
return false;

// The incoming value must be a min/max instrinsic.
// TODO: Also handle the select variant.
Intrinsic::ID ID = Intrinsic::not_intrinsic;
if (auto *WideInt = dyn_cast<VPWidenIntrinsicRecipe>(MinMaxOp)) {
ID = WideInt->getVectorIntrinsicID();
} else {
auto *RepR = dyn_cast<VPReplicateRecipe>(MinMaxOp);
if (!RepR || !isa<IntrinsicInst>(RepR->getUnderlyingInstr()))
return false;
ID = cast<IntrinsicInst>(RepR->getUnderlyingInstr())->getIntrinsicID();
}
RecurKind RdxKind = RecurKind::None;
switch (ID) {
case Intrinsic::umax:
RdxKind = RecurKind::UMax;
break;
case Intrinsic::umin:
RdxKind = RecurKind::UMin;
break;
case Intrinsic::smax:
RdxKind = RecurKind::SMax;
break;
case Intrinsic::smin:
RdxKind = RecurKind::SMin;
break;
default:
return false;
}

// The min/max intrinsic must use the phi and itself must only be used by
// the phi and a resume-phi in the scalar preheader.
if (MinMaxOp->getOperand(0) != MinMaxPhiR &&
MinMaxOp->getOperand(1) != MinMaxPhiR)
return false;
if (MinMaxPhiR->getNumUsers() != 2 ||
any_of(MinMaxOp->users(), [MinMaxPhiR, &Plan](VPUser *U) {
auto *Phi = dyn_cast<VPPhi>(U);
return MinMaxPhiR != U &&
(!Phi || Phi->getParent() != Plan.getScalarPreheader());
}))
return false;

// One user of MinMaxPhiR is MinMaxOp, the other users must be a compare
// that's part of a FindLastIV chain.
auto MinMaxUsers = to_vector(MinMaxPhiR->users());
auto *Cmp = dyn_cast<VPRecipeWithIRFlags>(
MinMaxUsers[0] == MinMaxOp ? MinMaxUsers[1] : MinMaxUsers[0]);
VPValue *CmpOpA;
VPValue *CmpOpB;
if (!Cmp || Cmp->getNumUsers() != 1 ||
!match(Cmp, m_Binary<Instruction::ICmp>(m_VPValue(CmpOpA),
m_VPValue(CmpOpB))))
return false;

// Normalize the predicate so MinMaxPhiR is on the right side.
CmpInst::Predicate Pred = Cmp->getPredicate();
if (CmpOpA == MinMaxPhiR)
Pred = CmpInst::getSwappedPredicate(Pred);

// Determine if the predicate is not strict.
bool IsNonStrictPred = ICmpInst::isLE(Pred) || ICmpInst::isGE(Pred);
// Account for a mis-match between RdxKind and the predicate.
switch (RdxKind) {
case RecurKind::UMin:
case RecurKind::SMin:
IsNonStrictPred |= ICmpInst::isGT(Pred);
break;
case RecurKind::UMax:
case RecurKind::SMax:
IsNonStrictPred |= ICmpInst::isLT(Pred);
break;
default:
llvm_unreachable("unsupported kind");
}

// TODO: Strict predicates need to find the first IV value for which the
// predicate holds, not the last.
if (Pred == CmpInst::ICMP_NE || !IsNonStrictPred)
return false;

// Cmp must be used by the select of a FindLastIV chain.
VPValue *Sel = dyn_cast<VPSingleDefRecipe>(*Cmp->user_begin());
VPValue *IVOp, *FindIV;
if (!Sel ||
!match(Sel,
m_Select(m_Specific(Cmp), m_VPValue(IVOp), m_VPValue(FindIV))) ||
Sel->getNumUsers() != 2 || !isa<VPWidenIntOrFpInductionRecipe>(IVOp))
return false;
auto *FindIVPhiR = dyn_cast<VPReductionPHIRecipe>(FindIV);
if (!FindIVPhiR || !RecurrenceDescriptor::isFindLastIVRecurrenceKind(
FindIVPhiR->getRecurrenceKind()))
return false;

assert(!FindIVPhiR->isInLoop() && !FindIVPhiR->isOrdered() &&
"cannot handle inloop/ordered reductions yet");

auto NewPhiR = new VPReductionPHIRecipe(
cast<PHINode>(MinMaxPhiR->getUnderlyingInstr()), RdxKind,
*MinMaxPhiR->getOperand(0), false, false, 1);
NewPhiR->insertBefore(MinMaxPhiR);
MinMaxPhiR->replaceAllUsesWith(NewPhiR);
NewPhiR->addOperand(MinMaxPhiR->getOperand(1));
MinMaxPhiR->eraseFromParent();

// The reduction using MinMaxPhiR needs adjusting to compute the correct
// result:
// 1. We need to find the last IV for which the condition based on the
// min/max recurrence is true,
// 2. Compare the partial min/max reduction result to its final value and,
// 3. Select the lanes of the partial FindLastIV reductions which
// correspond to the lanes matching the min/max reduction result.
VPInstruction *FindIVResult = cast<VPInstruction>(
*(Sel->user_begin() + (*Sel->user_begin() == FindIVPhiR ? 1 : 0)));
VPBuilder B(FindIVResult);
VPInstruction *MinMaxResult =
B.createNaryOp(VPInstruction::ComputeReductionResult,
{NewPhiR, NewPhiR->getBackedgeValue()}, VPIRFlags(), {});
NewPhiR->getBackedgeValue()->replaceUsesWithIf(
MinMaxResult, [](VPUser &U, unsigned) { return isa<VPPhi>(&U); });
auto *FinalMinMaxCmp = B.createICmp(
CmpInst::ICMP_EQ, MinMaxResult->getOperand(1), MinMaxResult);
auto *FinalIVSelect =
B.createSelect(FinalMinMaxCmp, FindIVResult->getOperand(3),
FindIVResult->getOperand(2));
FindIVResult->setOperand(3, FinalIVSelect);
}
return true;
}
5 changes: 5 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.h
Original file line number Diff line number Diff line change
@@ -93,6 +93,11 @@ struct VPlanTransforms {
GetIntOrFpInductionDescriptor,
ScalarEvolution &SE, const TargetLibraryInfo &TLI);

/// Try to legalize unclassified phis by converting VPWidenPHIRecipes to
/// min-max reductions used by FindLastIV reductions if possible. Returns
/// false if the VPlan contains VPWidenPHIRecipes that cannot be legalized.
static bool legalizeUnclassifiedPhis(VPlan &Plan);

/// Try to have all users of fixed-order recurrences appear after the recipe
/// defining their previous value, by either sinking users or hoisting recipes
/// defining their previous value (and its operands). Then introduce
Loading
Oops, something went wrong.
Loading
Oops, something went wrong.