-
Notifications
You must be signed in to change notification settings - Fork 14k
[VPlan] Connect Entry to scalar preheader during initial construction. #140132
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
d481278
76212e6
a324d27
2ad3e76
f4a28d4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||
---|---|---|---|---|---|---|---|---|
|
@@ -490,7 +490,7 @@ class InnerLoopVectorizer { | |||||||
MinProfitableTripCount(MinProfitableTripCount), UF(UnrollFactor), | ||||||||
Builder(PSE.getSE()->getContext()), Cost(CM), BFI(BFI), PSI(PSI), | ||||||||
RTChecks(RTChecks), Plan(Plan), | ||||||||
VectorPHVPB(Plan.getEntry()->getSingleSuccessor()) {} | ||||||||
VectorPHVPB(Plan.getEntry()->getSuccessors()[1]) {} | ||||||||
|
||||||||
virtual ~InnerLoopVectorizer() = default; | ||||||||
|
||||||||
|
@@ -2365,14 +2365,11 @@ InnerLoopVectorizer::getOrCreateVectorTripCount(BasicBlock *InsertBlock) { | |||||||
void InnerLoopVectorizer::introduceCheckBlockInVPlan(BasicBlock *CheckIRBB) { | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Clarify that CheckBlock now excludes the initial trip-count check, which is expected to be already introduced before calling There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added a note, thanks |
||||||||
VPBlockBase *ScalarPH = Plan.getScalarPreheader(); | ||||||||
VPBlockBase *PreVectorPH = VectorPHVPB->getSinglePredecessor(); | ||||||||
if (PreVectorPH->getNumSuccessors() != 1) { | ||||||||
assert(PreVectorPH->getNumSuccessors() == 2 && "Expected 2 successors"); | ||||||||
assert(PreVectorPH->getSuccessors()[0] == ScalarPH && | ||||||||
"Unexpected successor"); | ||||||||
VPIRBasicBlock *CheckVPIRBB = Plan.createVPIRBasicBlock(CheckIRBB); | ||||||||
VPBlockUtils::insertOnEdge(PreVectorPH, VectorPHVPB, CheckVPIRBB); | ||||||||
PreVectorPH = CheckVPIRBB; | ||||||||
} | ||||||||
assert(PreVectorPH->getNumSuccessors() == 2 && "Expected 2 successors"); | ||||||||
assert(PreVectorPH->getSuccessors()[0] == ScalarPH && "Unexpected successor"); | ||||||||
VPIRBasicBlock *CheckVPIRBB = Plan.createVPIRBasicBlock(CheckIRBB); | ||||||||
VPBlockUtils::insertOnEdge(PreVectorPH, VectorPHVPB, CheckVPIRBB); | ||||||||
PreVectorPH = CheckVPIRBB; | ||||||||
VPBlockUtils::connectBlocks(PreVectorPH, ScalarPH); | ||||||||
PreVectorPH->swapSuccessors(); | ||||||||
|
||||||||
|
@@ -2463,9 +2460,6 @@ void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) { | |||||||
setBranchWeights(BI, MinItersBypassWeights, /*IsExpected=*/false); | ||||||||
ReplaceInstWithInst(TCCheckBlock->getTerminator(), &BI); | ||||||||
LoopBypassBlocks.push_back(TCCheckBlock); | ||||||||
|
||||||||
// TODO: Wrap LoopVectorPreHeader in VPIRBasicBlock here. | ||||||||
introduceCheckBlockInVPlan(TCCheckBlock); | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Connecting entry VPBB to scalar preheader VPBB is done here as part of introducing the conditional branch at end of entry IRBB? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. TCCheckBlock is the same block as the VPlan entry block. Before the change, we needed special logic to not create a new VPIRBB for this call. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Worth an assert to clarify? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added thanks |
||||||||
} | ||||||||
|
||||||||
BasicBlock *InnerLoopVectorizer::emitSCEVChecks(BasicBlock *Bypass) { | ||||||||
|
@@ -7837,7 +7831,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan( | |||||||
|
||||||||
// 1. Set up the skeleton for vectorization, including vector pre-header and | ||||||||
// middle block. The vector loop is created during VPlan execution. | ||||||||
VPBasicBlock *VectorPH = cast<VPBasicBlock>(Entry->getSingleSuccessor()); | ||||||||
VPBasicBlock *VectorPH = cast<VPBasicBlock>(Entry->getSuccessors()[1]); | ||||||||
State.CFG.PrevBB = ILV.createVectorizedLoopSkeleton(); | ||||||||
if (VectorizingEpilogue) | ||||||||
VPlanTransforms::removeDeadRecipes(BestVPlan); | ||||||||
|
@@ -8070,7 +8064,8 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass, | |||||||
setBranchWeights(BI, MinItersBypassWeights, /*IsExpected=*/false); | ||||||||
ReplaceInstWithInst(TCCheckBlock->getTerminator(), &BI); | ||||||||
|
||||||||
introduceCheckBlockInVPlan(TCCheckBlock); | ||||||||
if (!ForEpilogue) | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added thanks |
||||||||
introduceCheckBlockInVPlan(TCCheckBlock); | ||||||||
return TCCheckBlock; | ||||||||
} | ||||||||
|
||||||||
|
@@ -8200,7 +8195,6 @@ EpilogueVectorizerEpilogueLoop::emitMinimumVectorEpilogueIterCountCheck( | |||||||
Plan.setEntry(NewEntry); | ||||||||
// OldEntry is now dead and will be cleaned up when the plan gets destroyed. | ||||||||
|
||||||||
introduceCheckBlockInVPlan(Insert); | ||||||||
return Insert; | ||||||||
} | ||||||||
|
||||||||
|
@@ -9160,7 +9154,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan, | |||||||
DenseMap<VPValue *, VPValue *> &IVEndValues) { | ||||||||
VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType()); | ||||||||
auto *ScalarPH = Plan.getScalarPreheader(); | ||||||||
auto *MiddleVPBB = cast<VPBasicBlock>(ScalarPH->getSinglePredecessor()); | ||||||||
auto *MiddleVPBB = cast<VPBasicBlock>(ScalarPH->getPredecessors()[0]); | ||||||||
Comment on lines
8792
to
+8793
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Suggesting a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sounds good, will check where this could be useful separately. |
||||||||
VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion(); | ||||||||
VPBuilder VectorPHBuilder( | ||||||||
cast<VPBasicBlock>(VectorRegion->getSinglePredecessor())); | ||||||||
|
Original file line number | Diff line number | Diff line change | ||||||||
---|---|---|---|---|---|---|---|---|---|---|
|
@@ -4246,7 +4246,8 @@ class VPlan { | |||||||||
/// that this relies on unneeded branches to the scalar tail loop being | ||||||||||
/// removed. | ||||||||||
bool hasScalarTail() const { | ||||||||||
return getScalarPreheader()->getNumPredecessors() != 0; | ||||||||||
return getScalarPreheader()->getNumPredecessors() != 0 && | ||||||||||
getScalarPreheader()->getSinglePredecessor() != getEntry(); | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps clearer as:
Suggested change
I.e., scalar loop does not take care of leftover iterations if (a) it doesn't take care of anything, or (b) it only takes care of all iterations as an alternative to the vector loop? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated thanks |
||||||||||
} | ||||||||||
}; | ||||||||||
|
||||||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -546,6 +546,9 @@ void VPlanTransforms::prepareForVectorization( | |
if (auto *LatchExitVPB = MiddleVPBB->getSingleSuccessor()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Independent: worth clarifying that not requiring a scalar epilog check means the scalar epilog is (always) required, i.e., case 1. |
||
VPBlockUtils::disconnectBlocks(MiddleVPBB, LatchExitVPB); | ||
VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH); | ||
VPBlockUtils::connectBlocks(Plan.getEntry(), ScalarPH); | ||
Plan.getEntry()->swapSuccessors(); | ||
|
||
// The exit blocks are unreachable, remove their recipes to make sure no | ||
// users remain that may pessimize transforms. | ||
for (auto *EB : Plan.getExitBlocks()) { | ||
|
@@ -558,6 +561,9 @@ void VPlanTransforms::prepareForVectorization( | |
// The connection order corresponds to the operands of the conditional branch, | ||
// with the middle block already connected to the exit block. | ||
VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH); | ||
// Also connect the entry block to the scalar preheader. | ||
VPBlockUtils::connectBlocks(Plan.getEntry(), ScalarPH); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The CFG is less consistent with VPlan's recipes, connecting entry VPBB to scalarPH VPBB now, when it is already connected to vectorPH, w/o introducing a conditional branch recipe at the end of entry - the branch instruction is generated outside VPlan's execute. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep, we still need to allow terminator-less VPIRBBs for various parts of the skeleton. One of the next steps after adding it early here is adding a branch recipe. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Temporary inconsistency is worth clarifying in a TODO. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added thanks |
||
Plan.getEntry()->swapSuccessors(); | ||
|
||
auto *ScalarLatchTerm = TheLoop->getLoopLatch()->getTerminator(); | ||
// Here we use the same DebugLoc as the scalar loop latch terminator instead | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1732,7 +1732,7 @@ static void removeBranchOnCondTrue(VPlan &Plan) { | |
using namespace llvm::VPlanPatternMatch; | ||
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>( | ||
vp_depth_first_shallow(Plan.getEntry()))) { | ||
if (VPBB->getNumSuccessors() != 2 || | ||
if (VPBB->getNumSuccessors() != 2 || isa<VPIRBasicBlock>(VPBB) || | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Excluding VPIRBB's only because of the temporary terminator-less entry block with two successors? If so, suffice to exclude There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated, thanks! We don't run removeBranchOnCond after introducing the other, terminator-less VPIRBBs. |
||
!match(&VPBB->back(), m_BranchOnCond(m_True()))) | ||
continue; | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So now Plan->entry has ScalarPH as its first successor and VectorPH as its second, rather than the latter as its only successor.
Good to assert there are two successors. Perhaps some API to retrieve "1of2" and "2of2", analogous to existing "1of1"?
Better look instead for first predecessor (among two) of first header block? Possibly as follow-up.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated to get the single predecessor of the vector loop region, which should be clearer, thanks