[LV] Change getSmallBestKnownTC to return an ElementCount (NFC) #141793

paulwalker-arm · 2025-05-28T16:05:29Z

This is prep work for enabling better UF calculations when using vscale based VFs to vectorise loops with vscale based tripcounts.

NOTE: NFC because All uses remain fixed-length until a following PR changes LoopVectorize's version of getSmallConstantTripCount().

llvmbot · 2025-05-28T16:06:03Z

@llvm/pr-subscribers-llvm-analysis

@llvm/pr-subscribers-llvm-transforms

Author: Paul Walker (paulwalker-arm)

Changes

This is prep work for enabling better UF calculations when using vscale based VFs to vectorise loops with vscale based tripcounts.

NOTE: NFC because All uses remain fixed-length until a following PR changes getSmallConstantRuntimeTripCount().

Full diff: https://github.com/llvm/llvm-project/pull/141793.diff

3 Files Affected:

(modified) llvm/include/llvm/Analysis/ScalarEvolution.h (+4)
(modified) llvm/lib/Analysis/ScalarEvolution.cpp (+4)
(modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+23-20)

diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
index 167845ce646b9..b5faa4c479afd 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -823,6 +823,10 @@ class ScalarEvolution {
   /// than the backedge taken count for the loop.
   LLVM_ABI unsigned getSmallConstantTripCount(const Loop *L);
 
+  /// A version of getSmallConstantTripCount that returns as an ElementCount to
+  /// include loops whose trip count is a function of llvm.vscale().
+  ElementCount getSmallConstantRuntimeTripCount(const Loop *L);
+
   /// Return the exact trip count for this loop if we exit through ExitingBlock.
   /// '0' is used to represent an unknown or non-constant trip count.  Note
   /// that a trip count is simply one more than the backedge taken count for
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 4bd5a4c3ab75c..5542bf2a8fc38 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -8217,6 +8217,10 @@ unsigned ScalarEvolution::getSmallConstantTripCount(const Loop *L) {
   return getConstantTripCount(ExitCount);
 }
 
+ElementCount ScalarEvolution::getSmallConstantRuntimeTripCount(const Loop *L) {
+  return ElementCount::getFixed(getSmallConstantTripCount(L));
+}
+
 unsigned
 ScalarEvolution::getSmallConstantTripCount(const Loop *L,
                                            const BasicBlock *ExitingBlock) {
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 2fe59a464457f..ad1c698f96f82 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -427,24 +427,24 @@ static bool hasIrregularType(Type *Ty, const DataLayout &DL) {
 ///   2) Returns expected trip count according to profile data if any.
 ///   3) Returns upper bound estimate if known, and if \p CanUseConstantMax.
 ///   4) Returns std::nullopt if all of the above failed.
-static std::optional<unsigned>
+static std::optional<ElementCount>
 getSmallBestKnownTC(PredicatedScalarEvolution &PSE, Loop *L,
                     bool CanUseConstantMax = true) {
   // Check if exact trip count is known.
-  if (unsigned ExpectedTC = PSE.getSE()->getSmallConstantTripCount(L))
+  if (auto ExpectedTC = PSE.getSE()->getSmallConstantRuntimeTripCount(L))
     return ExpectedTC;
 
   // Check if there is an expected trip count available from profile data.
   if (LoopVectorizeWithBlockFrequency)
     if (auto EstimatedTC = getLoopEstimatedTripCount(L))
-      return *EstimatedTC;
+      return ElementCount::getFixed(*EstimatedTC);
 
   if (!CanUseConstantMax)
     return std::nullopt;
 
   // Check if upper bound estimate is known.
   if (unsigned ExpectedTC = PSE.getSmallConstantMaxTripCount())
-    return ExpectedTC;
+    return ElementCount::getFixed(ExpectedTC);
 
   return std::nullopt;
 }
@@ -1977,7 +1977,8 @@ class GeneratedRTChecks {
           // Get the best known TC estimate.
           if (auto EstimatedTC = getSmallBestKnownTC(
                   PSE, OuterLoop, /* CanUseConstantMax = */ false))
-            BestTripCount = *EstimatedTC;
+            if (EstimatedTC->isFixed())
+              BestTripCount = EstimatedTC->getFixedValue();
 
           InstructionCost NewMemCheckCost = MemCheckCost / BestTripCount;
 
@@ -3751,12 +3752,12 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
   }
 
   ScalarEvolution *SE = PSE.getSE();
-  unsigned TC = SE->getSmallConstantTripCount(TheLoop);
+  ElementCount TC = SE->getSmallConstantRuntimeTripCount(TheLoop);
   unsigned MaxTC = PSE.getSmallConstantMaxTripCount();
   LLVM_DEBUG(dbgs() << "LV: Found trip count: " << TC << '\n');
-  if (TC != MaxTC)
+  if (TC != ElementCount::getFixed(MaxTC))
     LLVM_DEBUG(dbgs() << "LV: Found maximum trip count: " << MaxTC << '\n');
-  if (TC == 1) {
+  if (TC.isScalar()) {
     reportVectorizationFailure("Single iteration (non) loop",
         "loop trip count is one, irrelevant for vectorization",
         "SingleIterationLoop", ORE, TheLoop);
@@ -3870,7 +3871,9 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
   }
 
   auto ExpectedTC = getSmallBestKnownTC(PSE, TheLoop);
-  if (ExpectedTC && ExpectedTC <= TTI.getMinTripCountTailFoldingThreshold()) {
+  if (ExpectedTC && ExpectedTC->isFixed() &&
+      ExpectedTC->getFixedValue() <=
+          TTI.getMinTripCountTailFoldingThreshold()) {
     if (MaxPowerOf2RuntimeVF > 0u) {
       // If we have a low-trip-count, and the fixed-width VF is known to divide
       // the trip count but the scalable factor does not, use the fixed-width
@@ -3928,7 +3931,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
     return FixedScalableVFPair::getNone();
   }
 
-  if (TC == 0) {
+  if (TC.isZero()) {
     reportVectorizationFailure(
         "unable to calculate the loop count due to complex control flow",
         "UnknownLoopCountComplexCFG", ORE, TheLoop);
@@ -5071,13 +5074,13 @@ LoopVectorizationCostModel::selectInterleaveCount(VPlan &Plan, ElementCount VF,
     // At least one iteration must be scalar when this constraint holds. So the
     // maximum available iterations for interleaving is one less.
     unsigned AvailableTC = requiresScalarEpilogue(VF.isVector())
-                               ? (*BestKnownTC) - 1
-                               : *BestKnownTC;
+                               ? BestKnownTC->getFixedValue() - 1
+                               : BestKnownTC->getFixedValue();
 
     unsigned InterleaveCountLB = bit_floor(std::max(
         1u, std::min(AvailableTC / (EstimatedVF * 2), MaxInterleaveCount)));
 
-    if (PSE.getSE()->getSmallConstantTripCount(TheLoop) > 0) {
+    if (PSE.getSE()->getSmallConstantRuntimeTripCount(TheLoop).isNonZero()) {
       // If the best known trip count is exact, we select between two
       // prospective ICs, where
       //
@@ -5437,8 +5440,8 @@ InstructionCost LoopVectorizationCostModel::expectedCost(ElementCount VF) {
   // costs of comparison and induction instructions, as they'll get simplified
   // away.
   SmallPtrSet<Instruction *, 2> ValuesToIgnoreForVF;
-  auto TC = PSE.getSE()->getSmallConstantTripCount(TheLoop);
-  if (VF.isFixed() && TC == VF.getFixedValue() && !foldTailByMasking())
+  auto TC = PSE.getSE()->getSmallConstantRuntimeTripCount(TheLoop);
+  if (TC == VF && !foldTailByMasking())
     addFullyUnrolledInstructionsToIgnore(TheLoop, Legal->getInductionVars(),
                                          ValuesToIgnoreForVF);
 
@@ -7134,8 +7137,8 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF,
     // simplified away.
     // TODO: Remove this code after stepping away from the legacy cost model and
     // adding code to simplify VPlans before calculating their costs.
-    auto TC = PSE.getSE()->getSmallConstantTripCount(OrigLoop);
-    if (VF.isFixed() && TC == VF.getFixedValue() && !CM.foldTailByMasking())
+    auto TC = PSE.getSE()->getSmallConstantRuntimeTripCount(OrigLoop);
+    if (TC == VF && !CM.foldTailByMasking())
       addFullyUnrolledInstructionsToIgnore(OrigLoop, Legal->getInductionVars(),
                                            CostCtx.SkipCostComputation);
 
@@ -9942,8 +9945,7 @@ static bool isOutsideLoopWorkProfitable(GeneratedRTChecks &Checks,
   // Skip vectorization if the expected trip count is less than the minimum
   // required trip count.
   if (auto ExpectedTC = getSmallBestKnownTC(PSE, L)) {
-    if (ElementCount::isKnownLT(ElementCount::getFixed(*ExpectedTC),
-                                VF.MinProfitableTripCount)) {
+    if (ElementCount::isKnownLT(*ExpectedTC, VF.MinProfitableTripCount)) {
       LLVM_DEBUG(dbgs() << "LV: Vectorization is not beneficial: expected "
                            "trip count < minimum profitable VF ("
                         << *ExpectedTC << " < " << VF.MinProfitableTripCount
@@ -10300,7 +10302,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
   // Check the loop for a trip count threshold: vectorize loops with a tiny trip
   // count by optimizing for size, to minimize overheads.
   auto ExpectedTC = getSmallBestKnownTC(PSE, L);
-  if (ExpectedTC && *ExpectedTC < TinyTripCountVectorThreshold) {
+  if (ExpectedTC && ExpectedTC->isFixed() &&
+      ExpectedTC->getFixedValue() < TinyTripCountVectorThreshold) {
     LLVM_DEBUG(dbgs() << "LV: Found a loop with a very small trip count. "
                       << "This loop is worth vectorizing only if no scalar "
                       << "iteration overheads are incurred.");

llvmbot · 2025-05-28T16:06:04Z

@llvm/pr-subscribers-vectorizers

Author: Paul Walker (paulwalker-arm)

Changes

This is prep work for enabling better UF calculations when using vscale based VFs to vectorise loops with vscale based tripcounts.

NOTE: NFC because All uses remain fixed-length until a following PR changes getSmallConstantRuntimeTripCount().

Full diff: https://github.com/llvm/llvm-project/pull/141793.diff

3 Files Affected:

(modified) llvm/include/llvm/Analysis/ScalarEvolution.h (+4)
(modified) llvm/lib/Analysis/ScalarEvolution.cpp (+4)
(modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+23-20)

diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
index 167845ce646b9..b5faa4c479afd 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -823,6 +823,10 @@ class ScalarEvolution {
   /// than the backedge taken count for the loop.
   LLVM_ABI unsigned getSmallConstantTripCount(const Loop *L);
 
+  /// A version of getSmallConstantTripCount that returns as an ElementCount to
+  /// include loops whose trip count is a function of llvm.vscale().
+  ElementCount getSmallConstantRuntimeTripCount(const Loop *L);
+
   /// Return the exact trip count for this loop if we exit through ExitingBlock.
   /// '0' is used to represent an unknown or non-constant trip count.  Note
   /// that a trip count is simply one more than the backedge taken count for
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 4bd5a4c3ab75c..5542bf2a8fc38 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -8217,6 +8217,10 @@ unsigned ScalarEvolution::getSmallConstantTripCount(const Loop *L) {
   return getConstantTripCount(ExitCount);
 }
 
+ElementCount ScalarEvolution::getSmallConstantRuntimeTripCount(const Loop *L) {
+  return ElementCount::getFixed(getSmallConstantTripCount(L));
+}
+
 unsigned
 ScalarEvolution::getSmallConstantTripCount(const Loop *L,
                                            const BasicBlock *ExitingBlock) {
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 2fe59a464457f..ad1c698f96f82 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -427,24 +427,24 @@ static bool hasIrregularType(Type *Ty, const DataLayout &DL) {
 ///   2) Returns expected trip count according to profile data if any.
 ///   3) Returns upper bound estimate if known, and if \p CanUseConstantMax.
 ///   4) Returns std::nullopt if all of the above failed.
-static std::optional<unsigned>
+static std::optional<ElementCount>
 getSmallBestKnownTC(PredicatedScalarEvolution &PSE, Loop *L,
                     bool CanUseConstantMax = true) {
   // Check if exact trip count is known.
-  if (unsigned ExpectedTC = PSE.getSE()->getSmallConstantTripCount(L))
+  if (auto ExpectedTC = PSE.getSE()->getSmallConstantRuntimeTripCount(L))
     return ExpectedTC;
 
   // Check if there is an expected trip count available from profile data.
   if (LoopVectorizeWithBlockFrequency)
     if (auto EstimatedTC = getLoopEstimatedTripCount(L))
-      return *EstimatedTC;
+      return ElementCount::getFixed(*EstimatedTC);
 
   if (!CanUseConstantMax)
     return std::nullopt;
 
   // Check if upper bound estimate is known.
   if (unsigned ExpectedTC = PSE.getSmallConstantMaxTripCount())
-    return ExpectedTC;
+    return ElementCount::getFixed(ExpectedTC);
 
   return std::nullopt;
 }
@@ -1977,7 +1977,8 @@ class GeneratedRTChecks {
           // Get the best known TC estimate.
           if (auto EstimatedTC = getSmallBestKnownTC(
                   PSE, OuterLoop, /* CanUseConstantMax = */ false))
-            BestTripCount = *EstimatedTC;
+            if (EstimatedTC->isFixed())
+              BestTripCount = EstimatedTC->getFixedValue();
 
           InstructionCost NewMemCheckCost = MemCheckCost / BestTripCount;
 
@@ -3751,12 +3752,12 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
   }
 
   ScalarEvolution *SE = PSE.getSE();
-  unsigned TC = SE->getSmallConstantTripCount(TheLoop);
+  ElementCount TC = SE->getSmallConstantRuntimeTripCount(TheLoop);
   unsigned MaxTC = PSE.getSmallConstantMaxTripCount();
   LLVM_DEBUG(dbgs() << "LV: Found trip count: " << TC << '\n');
-  if (TC != MaxTC)
+  if (TC != ElementCount::getFixed(MaxTC))
     LLVM_DEBUG(dbgs() << "LV: Found maximum trip count: " << MaxTC << '\n');
-  if (TC == 1) {
+  if (TC.isScalar()) {
     reportVectorizationFailure("Single iteration (non) loop",
         "loop trip count is one, irrelevant for vectorization",
         "SingleIterationLoop", ORE, TheLoop);
@@ -3870,7 +3871,9 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
   }
 
   auto ExpectedTC = getSmallBestKnownTC(PSE, TheLoop);
-  if (ExpectedTC && ExpectedTC <= TTI.getMinTripCountTailFoldingThreshold()) {
+  if (ExpectedTC && ExpectedTC->isFixed() &&
+      ExpectedTC->getFixedValue() <=
+          TTI.getMinTripCountTailFoldingThreshold()) {
     if (MaxPowerOf2RuntimeVF > 0u) {
       // If we have a low-trip-count, and the fixed-width VF is known to divide
       // the trip count but the scalable factor does not, use the fixed-width
@@ -3928,7 +3931,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
     return FixedScalableVFPair::getNone();
   }
 
-  if (TC == 0) {
+  if (TC.isZero()) {
     reportVectorizationFailure(
         "unable to calculate the loop count due to complex control flow",
         "UnknownLoopCountComplexCFG", ORE, TheLoop);
@@ -5071,13 +5074,13 @@ LoopVectorizationCostModel::selectInterleaveCount(VPlan &Plan, ElementCount VF,
     // At least one iteration must be scalar when this constraint holds. So the
     // maximum available iterations for interleaving is one less.
     unsigned AvailableTC = requiresScalarEpilogue(VF.isVector())
-                               ? (*BestKnownTC) - 1
-                               : *BestKnownTC;
+                               ? BestKnownTC->getFixedValue() - 1
+                               : BestKnownTC->getFixedValue();
 
     unsigned InterleaveCountLB = bit_floor(std::max(
         1u, std::min(AvailableTC / (EstimatedVF * 2), MaxInterleaveCount)));
 
-    if (PSE.getSE()->getSmallConstantTripCount(TheLoop) > 0) {
+    if (PSE.getSE()->getSmallConstantRuntimeTripCount(TheLoop).isNonZero()) {
       // If the best known trip count is exact, we select between two
       // prospective ICs, where
       //
@@ -5437,8 +5440,8 @@ InstructionCost LoopVectorizationCostModel::expectedCost(ElementCount VF) {
   // costs of comparison and induction instructions, as they'll get simplified
   // away.
   SmallPtrSet<Instruction *, 2> ValuesToIgnoreForVF;
-  auto TC = PSE.getSE()->getSmallConstantTripCount(TheLoop);
-  if (VF.isFixed() && TC == VF.getFixedValue() && !foldTailByMasking())
+  auto TC = PSE.getSE()->getSmallConstantRuntimeTripCount(TheLoop);
+  if (TC == VF && !foldTailByMasking())
     addFullyUnrolledInstructionsToIgnore(TheLoop, Legal->getInductionVars(),
                                          ValuesToIgnoreForVF);
 
@@ -7134,8 +7137,8 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF,
     // simplified away.
     // TODO: Remove this code after stepping away from the legacy cost model and
     // adding code to simplify VPlans before calculating their costs.
-    auto TC = PSE.getSE()->getSmallConstantTripCount(OrigLoop);
-    if (VF.isFixed() && TC == VF.getFixedValue() && !CM.foldTailByMasking())
+    auto TC = PSE.getSE()->getSmallConstantRuntimeTripCount(OrigLoop);
+    if (TC == VF && !CM.foldTailByMasking())
       addFullyUnrolledInstructionsToIgnore(OrigLoop, Legal->getInductionVars(),
                                            CostCtx.SkipCostComputation);
 
@@ -9942,8 +9945,7 @@ static bool isOutsideLoopWorkProfitable(GeneratedRTChecks &Checks,
   // Skip vectorization if the expected trip count is less than the minimum
   // required trip count.
   if (auto ExpectedTC = getSmallBestKnownTC(PSE, L)) {
-    if (ElementCount::isKnownLT(ElementCount::getFixed(*ExpectedTC),
-                                VF.MinProfitableTripCount)) {
+    if (ElementCount::isKnownLT(*ExpectedTC, VF.MinProfitableTripCount)) {
       LLVM_DEBUG(dbgs() << "LV: Vectorization is not beneficial: expected "
                            "trip count < minimum profitable VF ("
                         << *ExpectedTC << " < " << VF.MinProfitableTripCount
@@ -10300,7 +10302,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
   // Check the loop for a trip count threshold: vectorize loops with a tiny trip
   // count by optimizing for size, to minimize overheads.
   auto ExpectedTC = getSmallBestKnownTC(PSE, L);
-  if (ExpectedTC && *ExpectedTC < TinyTripCountVectorThreshold) {
+  if (ExpectedTC && ExpectedTC->isFixed() &&
+      ExpectedTC->getFixedValue() < TinyTripCountVectorThreshold) {
     LLVM_DEBUG(dbgs() << "LV: Found a loop with a very small trip count. "
                       << "This loop is worth vectorizing only if no scalar "
                       << "iteration overheads are incurred.");

david-arm

LGTM with the minor comment addressed!

david-arm · 2025-06-02T12:28:27Z

llvm/include/llvm/Analysis/ScalarEvolution.h

@@ -823,6 +823,10 @@ class ScalarEvolution {
  /// than the backedge taken count for the loop.
  LLVM_ABI unsigned getSmallConstantTripCount(const Loop *L);

+  /// A version of getSmallConstantTripCount that returns as an ElementCount to
+  /// include loops whose trip count is a function of llvm.vscale().
+  ElementCount getSmallConstantRuntimeTripCount(const Loop *L);


I think this need the same LLVM_ABI prefix as above.

artagnon · 2025-06-03T19:46:54Z

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

 getSmallBestKnownTC(PredicatedScalarEvolution &PSE, Loop *L,
                    bool CanUseConstantMax = true) {


Is it sufficient to change getSmallBestKnownTC to return ElementCount, and not touch SCEV?

I don't think so because there are other places within LoopVectorize where getSmallConstantTripCount is called that also require the ElementCount returning variant.

Would be good to see the follow-up patch, to see what this patch will be used?

It's part of a chain so I'll see about pulling it out.

My objective is to allow multiples of vscale to be returned so that when vectorising a loop of the form for (int i = 0; i < llvm.vscale() * N; ++i) using a scalable VF the vectoriser can make sensible interleaving decisions. For example, today for the N=1 case LoopVectorize picks the default interleaving factor of 2 which means we never enter the vector loop.

In the same vein, representing vscale base trip counts also allows the vectoriser to better reason about whether the scalar loop becomes dead after vectorisation.

I did investigate the possibility of changing the existing interface but felt the impact was too high, especially as the majority of uses do not care about vscale based trip counts.

Sounds good. Maybe we can move getSmallConstantRuntimeTC into LV, as nobody else cares about vscale?

That causes a little duplication but otherwise works well so I've implemented the recommendation. As this is now LoopVectorize's version of getSmallConstantTripCount() I've adopted the same name.

…ementCount. This is prep work for enabling better UF calculations when using vscale based VFs to vectorise loops with vscale based tripcounts. NOTE: NFC because All uses remain fixed-length until a following PR changes LoopVectorize's version of getSmallConstantTripCount().

david-arm · 2025-06-17T14:47:47Z

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

@@ -419,6 +419,12 @@ static bool hasIrregularType(Type *Ty, const DataLayout &DL) {
  return DL.getTypeAllocSizeInBits(Ty) != DL.getTypeSizeInBits(Ty);
 }

+/// A version of ScalarEvolution::getSmallConstantTripCount that returns an
+/// ElementCount to include loops whose trip count is a function of vscale.
+ElementCount getSmallConstantTripCount(ScalarEvolution *SE, const Loop *L) {


Can this be marked static or do you expect it to be called from elsewhere?

fhahn

LGTM, thanks

llvm-ci · 2025-06-18T12:29:47Z

LLVM Buildbot has detected a new failure on builder premerge-monolithic-linux running on premerge-linux-1 while building llvm at step 7 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/153/builds/35204

Here is the relevant piece of the build log for the reference

Step 7 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'LLVM :: tools/llvm-exegesis/RISCV/rvv/filter.test' FAILED ********************
Exit Code: 2

Command Output (stderr):
--
/build/buildbot/premerge-monolithic-linux/build/bin/llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=inverse_throughput --opcode-name=PseudoVNCLIPU_WX_M1_MASK     --riscv-filter-config='vtype = {VXRM: rod, AVL: VLMAX, SEW: e(8|16), Policy: ta/mu}' --max-configs-per-opcode=1000 --min-instructions=10 | /build/buildbot/premerge-monolithic-linux/build/bin/FileCheck /build/buildbot/premerge-monolithic-linux/llvm-project/llvm/test/tools/llvm-exegesis/RISCV/rvv/filter.test # RUN: at line 1
+ /build/buildbot/premerge-monolithic-linux/build/bin/llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=inverse_throughput --opcode-name=PseudoVNCLIPU_WX_M1_MASK '--riscv-filter-config=vtype = {VXRM: rod, AVL: VLMAX, SEW: e(8|16), Policy: ta/mu}' --max-configs-per-opcode=1000 --min-instructions=10
+ /build/buildbot/premerge-monolithic-linux/build/bin/FileCheck /build/buildbot/premerge-monolithic-linux/llvm-project/llvm/test/tools/llvm-exegesis/RISCV/rvv/filter.test
PseudoVNCLIPU_WX_M1_MASK: Failed to produce any snippet via: instruction has tied variables, avoiding Read-After-Write issue, picking random def and use registers not aliasing each other, for uses, one unique register for each position
FileCheck error: '<stdin>' is empty.
FileCheck command line:  /build/buildbot/premerge-monolithic-linux/build/bin/FileCheck /build/buildbot/premerge-monolithic-linux/llvm-project/llvm/test/tools/llvm-exegesis/RISCV/rvv/filter.test

--

********************

…#141793) This is prep work for enabling better UF calculations when using vscale based VFs to vectorise loops with vscale based tripcounts. NOTE: NFC because All uses remain fixed-length until a following PR changes LoopVectorize's version of getSmallConstantTripCount().

paulwalker-arm requested a review from nikic as a code owner May 28, 2025 16:05

llvmbot added vectorizers llvm:analysis llvm:transforms labels May 28, 2025

paulwalker-arm requested review from fhahn and david-arm May 28, 2025 16:26

david-arm approved these changes Jun 2, 2025

View reviewed changes

artagnon changed the title ~~[NFC][LLVM][LoopVectorize] Change getSmallBestKnownTC to return an ElementCount.~~ [LV] Change getSmallBestKnownTC to return an ElementCount (NFC) Jun 3, 2025

artagnon reviewed Jun 3, 2025

View reviewed changes

paulwalker-arm force-pushed the lv-vscale-based-tc-loops-refactor branch from 6db4524 to 8af01ca Compare June 17, 2025 14:07

david-arm reviewed Jun 17, 2025

View reviewed changes

Make getSmallConstantTripCount a static function.

29a79f1

fhahn approved these changes Jun 17, 2025

View reviewed changes

paulwalker-arm merged commit d3441f7 into llvm:main Jun 18, 2025
7 checks passed

paulwalker-arm deleted the lv-vscale-based-tc-loops-refactor branch June 18, 2025 10:45

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[LV] Change getSmallBestKnownTC to return an ElementCount (NFC) #141793

[LV] Change getSmallBestKnownTC to return an ElementCount (NFC) #141793

Uh oh!

paulwalker-arm commented May 28, 2025 •

edited

Loading

Uh oh!

llvmbot commented May 28, 2025 •

edited

Loading

Uh oh!

llvmbot commented May 28, 2025

Uh oh!

david-arm left a comment

Uh oh!

david-arm Jun 2, 2025

Uh oh!

artagnon Jun 3, 2025

Uh oh!

paulwalker-arm Jun 4, 2025

Uh oh!

artagnon Jun 6, 2025

Uh oh!

paulwalker-arm Jun 6, 2025

Uh oh!

artagnon Jun 6, 2025

Uh oh!

paulwalker-arm Jun 17, 2025

Uh oh!

david-arm Jun 17, 2025

Uh oh!

fhahn left a comment

Uh oh!

Uh oh!

llvm-ci commented Jun 18, 2025

Uh oh!

Uh oh!

		getSmallBestKnownTC(PredicatedScalarEvolution &PSE, Loop *L,
		bool CanUseConstantMax = true) {

[LV] Change getSmallBestKnownTC to return an ElementCount (NFC) #141793

[LV] Change getSmallBestKnownTC to return an ElementCount (NFC) #141793

Uh oh!

Conversation

paulwalker-arm commented May 28, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented May 28, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented May 28, 2025

Uh oh!

david-arm left a comment

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

fhahn left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

llvm-ci commented Jun 18, 2025

Uh oh!

Uh oh!

paulwalker-arm commented May 28, 2025 •

edited

Loading

llvmbot commented May 28, 2025 •

edited

Loading