@@ -427,24 +427,24 @@ static bool hasIrregularType(Type *Ty, const DataLayout &DL) {
427
427
// / 2) Returns expected trip count according to profile data if any.
428
428
// / 3) Returns upper bound estimate if known, and if \p CanUseConstantMax.
429
429
// / 4) Returns std::nullopt if all of the above failed.
430
- static std::optional<unsigned >
430
+ static std::optional<ElementCount >
431
431
getSmallBestKnownTC (PredicatedScalarEvolution &PSE, Loop *L,
432
432
bool CanUseConstantMax = true ) {
433
433
// Check if exact trip count is known.
434
- if (unsigned ExpectedTC = PSE.getSE ()->getSmallConstantTripCount (L))
434
+ if (auto ExpectedTC = PSE.getSE ()->getSmallConstantRuntimeTripCount (L))
435
435
return ExpectedTC;
436
436
437
437
// Check if there is an expected trip count available from profile data.
438
438
if (LoopVectorizeWithBlockFrequency)
439
439
if (auto EstimatedTC = getLoopEstimatedTripCount (L))
440
- return *EstimatedTC;
440
+ return ElementCount::getFixed ( *EstimatedTC) ;
441
441
442
442
if (!CanUseConstantMax)
443
443
return std::nullopt;
444
444
445
445
// Check if upper bound estimate is known.
446
446
if (unsigned ExpectedTC = PSE.getSmallConstantMaxTripCount ())
447
- return ExpectedTC;
447
+ return ElementCount::getFixed ( ExpectedTC) ;
448
448
449
449
return std::nullopt;
450
450
}
@@ -1977,7 +1977,8 @@ class GeneratedRTChecks {
1977
1977
// Get the best known TC estimate.
1978
1978
if (auto EstimatedTC = getSmallBestKnownTC (
1979
1979
PSE, OuterLoop, /* CanUseConstantMax = */ false ))
1980
- BestTripCount = *EstimatedTC;
1980
+ if (EstimatedTC->isFixed ())
1981
+ BestTripCount = EstimatedTC->getFixedValue ();
1981
1982
1982
1983
InstructionCost NewMemCheckCost = MemCheckCost / BestTripCount;
1983
1984
@@ -3751,12 +3752,12 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
3751
3752
}
3752
3753
3753
3754
ScalarEvolution *SE = PSE.getSE ();
3754
- unsigned TC = SE->getSmallConstantTripCount (TheLoop);
3755
+ ElementCount TC = SE->getSmallConstantRuntimeTripCount (TheLoop);
3755
3756
unsigned MaxTC = PSE.getSmallConstantMaxTripCount ();
3756
3757
LLVM_DEBUG (dbgs () << " LV: Found trip count: " << TC << ' \n ' );
3757
- if (TC != MaxTC)
3758
+ if (TC != ElementCount::getFixed ( MaxTC) )
3758
3759
LLVM_DEBUG (dbgs () << " LV: Found maximum trip count: " << MaxTC << ' \n ' );
3759
- if (TC == 1 ) {
3760
+ if (TC. isScalar () ) {
3760
3761
reportVectorizationFailure (" Single iteration (non) loop" ,
3761
3762
" loop trip count is one, irrelevant for vectorization" ,
3762
3763
" SingleIterationLoop" , ORE, TheLoop);
@@ -3870,7 +3871,9 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
3870
3871
}
3871
3872
3872
3873
auto ExpectedTC = getSmallBestKnownTC (PSE, TheLoop);
3873
- if (ExpectedTC && ExpectedTC <= TTI.getMinTripCountTailFoldingThreshold ()) {
3874
+ if (ExpectedTC && ExpectedTC->isFixed () &&
3875
+ ExpectedTC->getFixedValue () <=
3876
+ TTI.getMinTripCountTailFoldingThreshold ()) {
3874
3877
if (MaxPowerOf2RuntimeVF > 0u ) {
3875
3878
// If we have a low-trip-count, and the fixed-width VF is known to divide
3876
3879
// the trip count but the scalable factor does not, use the fixed-width
@@ -3928,7 +3931,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
3928
3931
return FixedScalableVFPair::getNone ();
3929
3932
}
3930
3933
3931
- if (TC == 0 ) {
3934
+ if (TC. isZero () ) {
3932
3935
reportVectorizationFailure (
3933
3936
" unable to calculate the loop count due to complex control flow" ,
3934
3937
" UnknownLoopCountComplexCFG" , ORE, TheLoop);
@@ -5071,13 +5074,13 @@ LoopVectorizationCostModel::selectInterleaveCount(VPlan &Plan, ElementCount VF,
5071
5074
// At least one iteration must be scalar when this constraint holds. So the
5072
5075
// maximum available iterations for interleaving is one less.
5073
5076
unsigned AvailableTC = requiresScalarEpilogue (VF.isVector ())
5074
- ? (* BestKnownTC) - 1
5075
- : * BestKnownTC;
5077
+ ? BestKnownTC-> getFixedValue ( ) - 1
5078
+ : BestKnownTC-> getFixedValue () ;
5076
5079
5077
5080
unsigned InterleaveCountLB = bit_floor (std::max (
5078
5081
1u , std::min (AvailableTC / (EstimatedVF * 2 ), MaxInterleaveCount)));
5079
5082
5080
- if (PSE.getSE ()->getSmallConstantTripCount (TheLoop) > 0 ) {
5083
+ if (PSE.getSE ()->getSmallConstantRuntimeTripCount (TheLoop). isNonZero () ) {
5081
5084
// If the best known trip count is exact, we select between two
5082
5085
// prospective ICs, where
5083
5086
//
@@ -5437,8 +5440,8 @@ InstructionCost LoopVectorizationCostModel::expectedCost(ElementCount VF) {
5437
5440
// costs of comparison and induction instructions, as they'll get simplified
5438
5441
// away.
5439
5442
SmallPtrSet<Instruction *, 2 > ValuesToIgnoreForVF;
5440
- auto TC = PSE.getSE ()->getSmallConstantTripCount (TheLoop);
5441
- if (VF. isFixed () && TC == VF. getFixedValue () && !foldTailByMasking ())
5443
+ auto TC = PSE.getSE ()->getSmallConstantRuntimeTripCount (TheLoop);
5444
+ if (TC == VF && !foldTailByMasking ())
5442
5445
addFullyUnrolledInstructionsToIgnore (TheLoop, Legal->getInductionVars (),
5443
5446
ValuesToIgnoreForVF);
5444
5447
@@ -7134,8 +7137,8 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF,
7134
7137
// simplified away.
7135
7138
// TODO: Remove this code after stepping away from the legacy cost model and
7136
7139
// adding code to simplify VPlans before calculating their costs.
7137
- auto TC = PSE.getSE ()->getSmallConstantTripCount (OrigLoop);
7138
- if (VF. isFixed () && TC == VF. getFixedValue () && !CM.foldTailByMasking ())
7140
+ auto TC = PSE.getSE ()->getSmallConstantRuntimeTripCount (OrigLoop);
7141
+ if (TC == VF && !CM.foldTailByMasking ())
7139
7142
addFullyUnrolledInstructionsToIgnore (OrigLoop, Legal->getInductionVars (),
7140
7143
CostCtx.SkipCostComputation );
7141
7144
@@ -9942,8 +9945,7 @@ static bool isOutsideLoopWorkProfitable(GeneratedRTChecks &Checks,
9942
9945
// Skip vectorization if the expected trip count is less than the minimum
9943
9946
// required trip count.
9944
9947
if (auto ExpectedTC = getSmallBestKnownTC (PSE, L)) {
9945
- if (ElementCount::isKnownLT (ElementCount::getFixed (*ExpectedTC),
9946
- VF.MinProfitableTripCount )) {
9948
+ if (ElementCount::isKnownLT (*ExpectedTC, VF.MinProfitableTripCount )) {
9947
9949
LLVM_DEBUG (dbgs () << " LV: Vectorization is not beneficial: expected "
9948
9950
" trip count < minimum profitable VF ("
9949
9951
<< *ExpectedTC << " < " << VF.MinProfitableTripCount
@@ -10300,7 +10302,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
10300
10302
// Check the loop for a trip count threshold: vectorize loops with a tiny trip
10301
10303
// count by optimizing for size, to minimize overheads.
10302
10304
auto ExpectedTC = getSmallBestKnownTC (PSE, L);
10303
- if (ExpectedTC && *ExpectedTC < TinyTripCountVectorThreshold) {
10305
+ if (ExpectedTC && ExpectedTC->isFixed () &&
10306
+ ExpectedTC->getFixedValue () < TinyTripCountVectorThreshold) {
10304
10307
LLVM_DEBUG (dbgs () << " LV: Found a loop with a very small trip count. "
10305
10308
<< " This loop is worth vectorizing only if no scalar "
10306
10309
<< " iteration overheads are incurred." );
0 commit comments