Skip to content

Commit 6db4524

Browse files
[NFC][LLVM][LoopVectorize] Change getSmallBestKnownTC to return an ElementCount.
This is prep work for enabling better UF calculations when using vscale based VFs to vectorise loops with vscale based tripcounts. NOTE: NFC because All uses remain fixed-length until a following PR changes getSmallConstantRuntimeTripCount().
1 parent 9aebf4c commit 6db4524

File tree

3 files changed

+31
-20
lines changed

3 files changed

+31
-20
lines changed

llvm/include/llvm/Analysis/ScalarEvolution.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -823,6 +823,10 @@ class ScalarEvolution {
823823
/// than the backedge taken count for the loop.
824824
LLVM_ABI unsigned getSmallConstantTripCount(const Loop *L);
825825

826+
/// A version of getSmallConstantTripCount that returns as an ElementCount to
827+
/// include loops whose trip count is a function of llvm.vscale().
828+
ElementCount getSmallConstantRuntimeTripCount(const Loop *L);
829+
826830
/// Return the exact trip count for this loop if we exit through ExitingBlock.
827831
/// '0' is used to represent an unknown or non-constant trip count. Note
828832
/// that a trip count is simply one more than the backedge taken count for

llvm/lib/Analysis/ScalarEvolution.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8217,6 +8217,10 @@ unsigned ScalarEvolution::getSmallConstantTripCount(const Loop *L) {
82178217
return getConstantTripCount(ExitCount);
82188218
}
82198219

8220+
ElementCount ScalarEvolution::getSmallConstantRuntimeTripCount(const Loop *L) {
8221+
return ElementCount::getFixed(getSmallConstantTripCount(L));
8222+
}
8223+
82208224
unsigned
82218225
ScalarEvolution::getSmallConstantTripCount(const Loop *L,
82228226
const BasicBlock *ExitingBlock) {

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 23 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -427,24 +427,24 @@ static bool hasIrregularType(Type *Ty, const DataLayout &DL) {
427427
/// 2) Returns expected trip count according to profile data if any.
428428
/// 3) Returns upper bound estimate if known, and if \p CanUseConstantMax.
429429
/// 4) Returns std::nullopt if all of the above failed.
430-
static std::optional<unsigned>
430+
static std::optional<ElementCount>
431431
getSmallBestKnownTC(PredicatedScalarEvolution &PSE, Loop *L,
432432
bool CanUseConstantMax = true) {
433433
// Check if exact trip count is known.
434-
if (unsigned ExpectedTC = PSE.getSE()->getSmallConstantTripCount(L))
434+
if (auto ExpectedTC = PSE.getSE()->getSmallConstantRuntimeTripCount(L))
435435
return ExpectedTC;
436436

437437
// Check if there is an expected trip count available from profile data.
438438
if (LoopVectorizeWithBlockFrequency)
439439
if (auto EstimatedTC = getLoopEstimatedTripCount(L))
440-
return *EstimatedTC;
440+
return ElementCount::getFixed(*EstimatedTC);
441441

442442
if (!CanUseConstantMax)
443443
return std::nullopt;
444444

445445
// Check if upper bound estimate is known.
446446
if (unsigned ExpectedTC = PSE.getSmallConstantMaxTripCount())
447-
return ExpectedTC;
447+
return ElementCount::getFixed(ExpectedTC);
448448

449449
return std::nullopt;
450450
}
@@ -1977,7 +1977,8 @@ class GeneratedRTChecks {
19771977
// Get the best known TC estimate.
19781978
if (auto EstimatedTC = getSmallBestKnownTC(
19791979
PSE, OuterLoop, /* CanUseConstantMax = */ false))
1980-
BestTripCount = *EstimatedTC;
1980+
if (EstimatedTC->isFixed())
1981+
BestTripCount = EstimatedTC->getFixedValue();
19811982

19821983
InstructionCost NewMemCheckCost = MemCheckCost / BestTripCount;
19831984

@@ -3751,12 +3752,12 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
37513752
}
37523753

37533754
ScalarEvolution *SE = PSE.getSE();
3754-
unsigned TC = SE->getSmallConstantTripCount(TheLoop);
3755+
ElementCount TC = SE->getSmallConstantRuntimeTripCount(TheLoop);
37553756
unsigned MaxTC = PSE.getSmallConstantMaxTripCount();
37563757
LLVM_DEBUG(dbgs() << "LV: Found trip count: " << TC << '\n');
3757-
if (TC != MaxTC)
3758+
if (TC != ElementCount::getFixed(MaxTC))
37583759
LLVM_DEBUG(dbgs() << "LV: Found maximum trip count: " << MaxTC << '\n');
3759-
if (TC == 1) {
3760+
if (TC.isScalar()) {
37603761
reportVectorizationFailure("Single iteration (non) loop",
37613762
"loop trip count is one, irrelevant for vectorization",
37623763
"SingleIterationLoop", ORE, TheLoop);
@@ -3870,7 +3871,9 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
38703871
}
38713872

38723873
auto ExpectedTC = getSmallBestKnownTC(PSE, TheLoop);
3873-
if (ExpectedTC && ExpectedTC <= TTI.getMinTripCountTailFoldingThreshold()) {
3874+
if (ExpectedTC && ExpectedTC->isFixed() &&
3875+
ExpectedTC->getFixedValue() <=
3876+
TTI.getMinTripCountTailFoldingThreshold()) {
38743877
if (MaxPowerOf2RuntimeVF > 0u) {
38753878
// If we have a low-trip-count, and the fixed-width VF is known to divide
38763879
// the trip count but the scalable factor does not, use the fixed-width
@@ -3928,7 +3931,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
39283931
return FixedScalableVFPair::getNone();
39293932
}
39303933

3931-
if (TC == 0) {
3934+
if (TC.isZero()) {
39323935
reportVectorizationFailure(
39333936
"unable to calculate the loop count due to complex control flow",
39343937
"UnknownLoopCountComplexCFG", ORE, TheLoop);
@@ -5071,13 +5074,13 @@ LoopVectorizationCostModel::selectInterleaveCount(VPlan &Plan, ElementCount VF,
50715074
// At least one iteration must be scalar when this constraint holds. So the
50725075
// maximum available iterations for interleaving is one less.
50735076
unsigned AvailableTC = requiresScalarEpilogue(VF.isVector())
5074-
? (*BestKnownTC) - 1
5075-
: *BestKnownTC;
5077+
? BestKnownTC->getFixedValue() - 1
5078+
: BestKnownTC->getFixedValue();
50765079

50775080
unsigned InterleaveCountLB = bit_floor(std::max(
50785081
1u, std::min(AvailableTC / (EstimatedVF * 2), MaxInterleaveCount)));
50795082

5080-
if (PSE.getSE()->getSmallConstantTripCount(TheLoop) > 0) {
5083+
if (PSE.getSE()->getSmallConstantRuntimeTripCount(TheLoop).isNonZero()) {
50815084
// If the best known trip count is exact, we select between two
50825085
// prospective ICs, where
50835086
//
@@ -5437,8 +5440,8 @@ InstructionCost LoopVectorizationCostModel::expectedCost(ElementCount VF) {
54375440
// costs of comparison and induction instructions, as they'll get simplified
54385441
// away.
54395442
SmallPtrSet<Instruction *, 2> ValuesToIgnoreForVF;
5440-
auto TC = PSE.getSE()->getSmallConstantTripCount(TheLoop);
5441-
if (VF.isFixed() && TC == VF.getFixedValue() && !foldTailByMasking())
5443+
auto TC = PSE.getSE()->getSmallConstantRuntimeTripCount(TheLoop);
5444+
if (TC == VF && !foldTailByMasking())
54425445
addFullyUnrolledInstructionsToIgnore(TheLoop, Legal->getInductionVars(),
54435446
ValuesToIgnoreForVF);
54445447

@@ -7134,8 +7137,8 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF,
71347137
// simplified away.
71357138
// TODO: Remove this code after stepping away from the legacy cost model and
71367139
// adding code to simplify VPlans before calculating their costs.
7137-
auto TC = PSE.getSE()->getSmallConstantTripCount(OrigLoop);
7138-
if (VF.isFixed() && TC == VF.getFixedValue() && !CM.foldTailByMasking())
7140+
auto TC = PSE.getSE()->getSmallConstantRuntimeTripCount(OrigLoop);
7141+
if (TC == VF && !CM.foldTailByMasking())
71397142
addFullyUnrolledInstructionsToIgnore(OrigLoop, Legal->getInductionVars(),
71407143
CostCtx.SkipCostComputation);
71417144

@@ -9942,8 +9945,7 @@ static bool isOutsideLoopWorkProfitable(GeneratedRTChecks &Checks,
99429945
// Skip vectorization if the expected trip count is less than the minimum
99439946
// required trip count.
99449947
if (auto ExpectedTC = getSmallBestKnownTC(PSE, L)) {
9945-
if (ElementCount::isKnownLT(ElementCount::getFixed(*ExpectedTC),
9946-
VF.MinProfitableTripCount)) {
9948+
if (ElementCount::isKnownLT(*ExpectedTC, VF.MinProfitableTripCount)) {
99479949
LLVM_DEBUG(dbgs() << "LV: Vectorization is not beneficial: expected "
99489950
"trip count < minimum profitable VF ("
99499951
<< *ExpectedTC << " < " << VF.MinProfitableTripCount
@@ -10300,7 +10302,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1030010302
// Check the loop for a trip count threshold: vectorize loops with a tiny trip
1030110303
// count by optimizing for size, to minimize overheads.
1030210304
auto ExpectedTC = getSmallBestKnownTC(PSE, L);
10303-
if (ExpectedTC && *ExpectedTC < TinyTripCountVectorThreshold) {
10305+
if (ExpectedTC && ExpectedTC->isFixed() &&
10306+
ExpectedTC->getFixedValue() < TinyTripCountVectorThreshold) {
1030410307
LLVM_DEBUG(dbgs() << "LV: Found a loop with a very small trip count. "
1030510308
<< "This loop is worth vectorizing only if no scalar "
1030610309
<< "iteration overheads are incurred.");

0 commit comments

Comments
 (0)