Skip to content

Commit 56bbf81

Browse files
authored
[BOLT] CDSplit main logic part 1/2 (llvm#73895)
This diff defines and initializes auxiliary variables used by CDSplit and implements two important helper functions. The first helper function approximates the block level size increase if a function is hot-warm split at a given split index (X86 specific). The second helper function finds all calls in the form of X->Y or Y->X for each BF given function order [... X ... BF ... Y ...]. These calls are referred to as "cover calls". Their distance will decrease if BF's hot fragment size is further reduced by hot-warm splitting. NFC.
1 parent 2441e23 commit 56bbf81

File tree

2 files changed

+230
-5
lines changed

2 files changed

+230
-5
lines changed

bolt/lib/Core/FunctionLayout.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -188,10 +188,6 @@ bool FunctionLayout::update(const ArrayRef<BinaryBasicBlock *> NewLayout) {
188188
for (BinaryBasicBlock *const BB : NewLayout) {
189189
FragmentNum Num = BB->getFragmentNum();
190190

191-
assert(Num >= Fragments.back()->getFragmentNum() &&
192-
"Blocks must be arranged such that fragments are monotonically "
193-
"increasing.");
194-
195191
// Add empty fragments if necessary
196192
while (Fragments.back()->getFragmentNum() < Num)
197193
addFragment();

bolt/lib/Passes/SplitFunctions.cpp

Lines changed: 230 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,11 @@ static cl::opt<SplitFunctionsStrategy> SplitStrategy(
109109
"fragment contains exactly a single basic block")),
110110
cl::desc("strategy used to partition blocks into fragments"),
111111
cl::cat(BoltOptCategory));
112+
113+
static cl::opt<double> CallScale(
114+
"call-scale",
115+
cl::desc("Call score scale coefficient (when --split-strategy=cdsplit)"),
116+
cl::init(0.95), cl::ReallyHidden, cl::cat(BoltOptCategory));
112117
} // namespace opts
113118

114119
namespace {
@@ -140,12 +145,18 @@ struct SplitProfile2 final : public SplitStrategy {
140145
};
141146

142147
struct SplitCacheDirected final : public SplitStrategy {
148+
BinaryContext &BC;
143149
using BasicBlockOrder = BinaryFunction::BasicBlockOrderType;
144150

145151
bool canSplit(const BinaryFunction &BF) override {
146152
return BF.hasValidProfile() && hasFullProfile(BF) && !allBlocksCold(BF);
147153
}
148154

155+
explicit SplitCacheDirected(BinaryContext &BC) : BC(BC) {
156+
initializeAuxiliaryVariables();
157+
buildCallGraph();
158+
}
159+
149160
// When some functions are hot-warm split and others are hot-warm-cold split,
150161
// we do not want to change the fragment numbers of the blocks in the hot-warm
151162
// split functions.
@@ -173,6 +184,224 @@ struct SplitCacheDirected final : public SplitStrategy {
173184
}
174185

175186
private:
187+
struct JumpInfo {
188+
bool HasUncondBranch = false;
189+
BinaryBasicBlock *CondSuccessor = nullptr;
190+
BinaryBasicBlock *UncondSuccessor = nullptr;
191+
};
192+
193+
struct CallInfo {
194+
size_t Length;
195+
size_t Count;
196+
};
197+
198+
// Auxiliary variables used by the algorithm.
199+
size_t TotalNumBlocks{0};
200+
size_t OrigHotSectionSize{0};
201+
DenseMap<const BinaryBasicBlock *, size_t> GlobalIndices;
202+
DenseMap<const BinaryBasicBlock *, size_t> BBSizes;
203+
DenseMap<const BinaryBasicBlock *, size_t> BBOffsets;
204+
DenseMap<const BinaryBasicBlock *, JumpInfo> JumpInfos;
205+
206+
// Call graph.
207+
std::vector<SmallVector<const BinaryBasicBlock *, 0>> Callers;
208+
std::vector<SmallVector<const BinaryBasicBlock *, 0>> Callees;
209+
210+
bool shouldConsiderForCallGraph(const BinaryFunction &BF) {
211+
// Only a subset of the functions in the binary will be considered
212+
// for initializing auxiliary variables and building call graph.
213+
return BF.hasValidIndex() && BF.hasValidProfile() && !BF.empty();
214+
}
215+
216+
void initializeAuxiliaryVariables() {
217+
// Gather information about conditional and unconditional successors of
218+
// each basic block; this information will be used to estimate block size
219+
// increase due to hot-warm splitting.
220+
auto analyzeBranches = [&](BinaryBasicBlock &BB) {
221+
JumpInfo BBJumpInfo;
222+
const MCSymbol *TBB = nullptr;
223+
const MCSymbol *FBB = nullptr;
224+
MCInst *CondBranch = nullptr;
225+
MCInst *UncondBranch = nullptr;
226+
if (BB.analyzeBranch(TBB, FBB, CondBranch, UncondBranch)) {
227+
BBJumpInfo.HasUncondBranch = UncondBranch != nullptr;
228+
if (BB.succ_size() == 1) {
229+
BBJumpInfo.UncondSuccessor = BB.getSuccessor();
230+
} else if (BB.succ_size() == 2) {
231+
BBJumpInfo.CondSuccessor = BB.getConditionalSuccessor(true);
232+
BBJumpInfo.UncondSuccessor = BB.getConditionalSuccessor(false);
233+
}
234+
}
235+
return BBJumpInfo;
236+
};
237+
238+
for (BinaryFunction *BF : BC.getSortedFunctions()) {
239+
if (!shouldConsiderForCallGraph(*BF))
240+
continue;
241+
242+
// Calculate the size of each BB after hot-cold splitting.
243+
// This populates BinaryBasicBlock::OutputAddressRange which
244+
// can be used to compute the size of each BB.
245+
BC.calculateEmittedSize(*BF, /*FixBranches=*/true);
246+
247+
for (BinaryBasicBlock *BB : BF->getLayout().blocks()) {
248+
// Unique global index.
249+
GlobalIndices[BB] = TotalNumBlocks;
250+
TotalNumBlocks++;
251+
252+
// Block size after hot-cold splitting.
253+
BBSizes[BB] = BB->getOutputSize();
254+
255+
// Hot block offset after hot-cold splitting.
256+
BBOffsets[BB] = OrigHotSectionSize;
257+
if (!BB->isSplit())
258+
OrigHotSectionSize += BBSizes[BB];
259+
260+
// (Un)Conditional branch instruction information.
261+
JumpInfos[BB] = analyzeBranches(*BB);
262+
}
263+
}
264+
}
265+
266+
void buildCallGraph() {
267+
Callers.resize(TotalNumBlocks);
268+
Callees.resize(TotalNumBlocks);
269+
for (const BinaryFunction *SrcFunction : BC.getSortedFunctions()) {
270+
if (!shouldConsiderForCallGraph(*SrcFunction))
271+
continue;
272+
273+
for (BinaryBasicBlock &SrcBB : SrcFunction->blocks()) {
274+
// Skip blocks that are not executed
275+
if (SrcBB.getKnownExecutionCount() == 0)
276+
continue;
277+
278+
// Find call instructions and extract target symbols from each one
279+
for (const MCInst &Inst : SrcBB) {
280+
if (!BC.MIB->isCall(Inst))
281+
continue;
282+
283+
// Call info
284+
const MCSymbol *DstSym = BC.MIB->getTargetSymbol(Inst);
285+
// Ignore calls w/o information
286+
if (!DstSym)
287+
continue;
288+
289+
const BinaryFunction *DstFunction = BC.getFunctionForSymbol(DstSym);
290+
// Ignore calls that do not have a valid target, but do not ignore
291+
// recursive calls, because caller block could be moved to warm.
292+
if (!DstFunction || DstFunction->getLayout().block_empty())
293+
continue;
294+
295+
const BinaryBasicBlock *DstBB = &(DstFunction->front());
296+
297+
// Record the call only if DstBB is also in functions to consider for
298+
// call graph.
299+
if (GlobalIndices.contains(DstBB)) {
300+
Callers[GlobalIndices[DstBB]].push_back(&SrcBB);
301+
Callees[GlobalIndices[&SrcBB]].push_back(DstBB);
302+
}
303+
}
304+
}
305+
}
306+
}
307+
308+
/// Populate BinaryBasicBlock::OutputAddressRange with estimated basic block
309+
/// start and end addresses for hot and warm basic blocks, assuming hot-warm
310+
/// splitting happens at \p SplitIndex. Also return estimated end addresses
311+
/// of the hot fragment before and after splitting.
312+
/// The estimations take into account the potential addition of branch
313+
/// instructions due to split fall through branches as well as the need to
314+
/// use longer branch instructions for split (un)conditional branches.
315+
std::pair<size_t, size_t>
316+
estimatePostSplitBBAddress(const BasicBlockOrder &BlockOrder,
317+
const size_t SplitIndex) {
318+
assert(SplitIndex < BlockOrder.size() && "Invalid split index");
319+
320+
// Update function layout assuming hot-warm splitting at SplitIndex
321+
for (size_t Index = 0; Index < BlockOrder.size(); Index++) {
322+
BinaryBasicBlock *BB = BlockOrder[Index];
323+
if (BB->getFragmentNum() == FragmentNum::cold())
324+
break;
325+
BB->setFragmentNum(Index <= SplitIndex ? FragmentNum::main()
326+
: FragmentNum::warm());
327+
}
328+
BinaryFunction *BF = BlockOrder[0]->getFunction();
329+
BF->getLayout().update(BlockOrder);
330+
// Populate BB.OutputAddressRange under the updated layout.
331+
BC.calculateEmittedSize(*BF);
332+
333+
// Populate BB.OutputAddressRange with estimated new start and end addresses
334+
// and compute the old end address of the hot section and the new end
335+
// address of the hot section.
336+
size_t OldHotEndAddr;
337+
size_t NewHotEndAddr;
338+
size_t CurrentAddr = BBOffsets[BlockOrder[0]];
339+
for (BinaryBasicBlock *BB : BlockOrder) {
340+
// We only care about new addresses of blocks in hot/warm.
341+
if (BB->getFragmentNum() == FragmentNum::cold())
342+
break;
343+
BB->setOutputStartAddress(CurrentAddr);
344+
CurrentAddr += BB->getOutputSize();
345+
BB->setOutputEndAddress(CurrentAddr);
346+
if (BB->getLayoutIndex() == SplitIndex) {
347+
NewHotEndAddr = CurrentAddr;
348+
// Approximate the start address of the warm fragment of the current
349+
// function using the original hot section size.
350+
CurrentAddr = OrigHotSectionSize;
351+
}
352+
OldHotEndAddr = BBOffsets[BB] + BBSizes[BB];
353+
}
354+
return std::make_pair(OldHotEndAddr, NewHotEndAddr);
355+
}
356+
357+
/// Get a collection of "shortenable" calls, that is, calls of type X->Y
358+
/// when the function order is [... X ... BF ... Y ...].
359+
/// If the hot fragment size of BF is reduced, then such calls are guaranteed
360+
/// to get shorter by the reduced hot fragment size.
361+
std::vector<CallInfo> extractCoverCalls(const BinaryFunction &BF) {
362+
// Record the length and the count of the calls that can be shortened
363+
std::vector<CallInfo> CoverCalls;
364+
if (opts::CallScale == 0)
365+
return CoverCalls;
366+
367+
const BinaryFunction *ThisBF = &BF;
368+
const BinaryBasicBlock *ThisBB = &(ThisBF->front());
369+
const size_t ThisGI = GlobalIndices[ThisBB];
370+
371+
for (const BinaryFunction *DstBF : BC.getSortedFunctions()) {
372+
if (!shouldConsiderForCallGraph(*DstBF))
373+
continue;
374+
375+
const BinaryBasicBlock *DstBB = &(DstBF->front());
376+
if (DstBB->getKnownExecutionCount() == 0)
377+
continue;
378+
379+
const size_t DstGI = GlobalIndices[DstBB];
380+
for (const BinaryBasicBlock *SrcBB : Callers[DstGI]) {
381+
const BinaryFunction *SrcBF = SrcBB->getFunction();
382+
if (ThisBF == SrcBF)
383+
continue;
384+
385+
const size_t CallCount = SrcBB->getKnownExecutionCount();
386+
387+
const size_t SrcGI = GlobalIndices[SrcBB];
388+
389+
const bool IsCoverCall = (SrcGI < ThisGI && ThisGI < DstGI) ||
390+
(DstGI <= ThisGI && ThisGI < SrcGI);
391+
if (!IsCoverCall)
392+
continue;
393+
394+
const size_t SrcBBEndAddr = BBOffsets[SrcBB] + BBSizes[SrcBB];
395+
const size_t DstBBStartAddr = BBOffsets[DstBB];
396+
const size_t CallLength =
397+
AbsoluteDifference(SrcBBEndAddr, DstBBStartAddr);
398+
const CallInfo CI{CallLength, CallCount};
399+
CoverCalls.emplace_back(CI);
400+
}
401+
}
402+
return CoverCalls;
403+
}
404+
176405
/// Find the best index for splitting. The returned value is the index of the
177406
/// last hot basic block. Hence, "no splitting" is equivalent to returning the
178407
/// value which is one less than the size of the function.
@@ -308,7 +537,7 @@ void SplitFunctions::runOnFunctions(BinaryContext &BC) {
308537
// before function reordering and hot-warm-cold splitting
309538
// (SplitCacheDirected) after function reordering.
310539
if (BC.HasFinalizedFunctionOrder)
311-
Strategy = std::make_unique<SplitCacheDirected>();
540+
Strategy = std::make_unique<SplitCacheDirected>(BC);
312541
else
313542
Strategy = std::make_unique<SplitProfile2>();
314543
opts::AggressiveSplitting = true;

0 commit comments

Comments
 (0)