@@ -109,6 +109,11 @@ static cl::opt<SplitFunctionsStrategy> SplitStrategy(
109
109
" fragment contains exactly a single basic block" )),
110
110
cl::desc(" strategy used to partition blocks into fragments" ),
111
111
cl::cat(BoltOptCategory));
112
+
113
+ static cl::opt<double > CallScale (
114
+ " call-scale" ,
115
+ cl::desc (" Call score scale coefficient (when --split-strategy=cdsplit)" ),
116
+ cl::init(0.95 ), cl::ReallyHidden, cl::cat(BoltOptCategory));
112
117
} // namespace opts
113
118
114
119
namespace {
@@ -140,12 +145,18 @@ struct SplitProfile2 final : public SplitStrategy {
140
145
};
141
146
142
147
struct SplitCacheDirected final : public SplitStrategy {
148
+ BinaryContext &BC;
143
149
using BasicBlockOrder = BinaryFunction::BasicBlockOrderType;
144
150
145
151
bool canSplit (const BinaryFunction &BF) override {
146
152
return BF.hasValidProfile () && hasFullProfile (BF) && !allBlocksCold (BF);
147
153
}
148
154
155
+ explicit SplitCacheDirected (BinaryContext &BC) : BC(BC) {
156
+ initializeAuxiliaryVariables ();
157
+ buildCallGraph ();
158
+ }
159
+
149
160
// When some functions are hot-warm split and others are hot-warm-cold split,
150
161
// we do not want to change the fragment numbers of the blocks in the hot-warm
151
162
// split functions.
@@ -173,6 +184,224 @@ struct SplitCacheDirected final : public SplitStrategy {
173
184
}
174
185
175
186
private:
187
+ struct JumpInfo {
188
+ bool HasUncondBranch = false ;
189
+ BinaryBasicBlock *CondSuccessor = nullptr ;
190
+ BinaryBasicBlock *UncondSuccessor = nullptr ;
191
+ };
192
+
193
+ struct CallInfo {
194
+ size_t Length;
195
+ size_t Count;
196
+ };
197
+
198
+ // Auxiliary variables used by the algorithm.
199
+ size_t TotalNumBlocks{0 };
200
+ size_t OrigHotSectionSize{0 };
201
+ DenseMap<const BinaryBasicBlock *, size_t > GlobalIndices;
202
+ DenseMap<const BinaryBasicBlock *, size_t > BBSizes;
203
+ DenseMap<const BinaryBasicBlock *, size_t > BBOffsets;
204
+ DenseMap<const BinaryBasicBlock *, JumpInfo> JumpInfos;
205
+
206
+ // Call graph.
207
+ std::vector<SmallVector<const BinaryBasicBlock *, 0 >> Callers;
208
+ std::vector<SmallVector<const BinaryBasicBlock *, 0 >> Callees;
209
+
210
+ bool shouldConsiderForCallGraph (const BinaryFunction &BF) {
211
+ // Only a subset of the functions in the binary will be considered
212
+ // for initializing auxiliary variables and building call graph.
213
+ return BF.hasValidIndex () && BF.hasValidProfile () && !BF.empty ();
214
+ }
215
+
216
+ void initializeAuxiliaryVariables () {
217
+ // Gather information about conditional and unconditional successors of
218
+ // each basic block; this information will be used to estimate block size
219
+ // increase due to hot-warm splitting.
220
+ auto analyzeBranches = [&](BinaryBasicBlock &BB) {
221
+ JumpInfo BBJumpInfo;
222
+ const MCSymbol *TBB = nullptr ;
223
+ const MCSymbol *FBB = nullptr ;
224
+ MCInst *CondBranch = nullptr ;
225
+ MCInst *UncondBranch = nullptr ;
226
+ if (BB.analyzeBranch (TBB, FBB, CondBranch, UncondBranch)) {
227
+ BBJumpInfo.HasUncondBranch = UncondBranch != nullptr ;
228
+ if (BB.succ_size () == 1 ) {
229
+ BBJumpInfo.UncondSuccessor = BB.getSuccessor ();
230
+ } else if (BB.succ_size () == 2 ) {
231
+ BBJumpInfo.CondSuccessor = BB.getConditionalSuccessor (true );
232
+ BBJumpInfo.UncondSuccessor = BB.getConditionalSuccessor (false );
233
+ }
234
+ }
235
+ return BBJumpInfo;
236
+ };
237
+
238
+ for (BinaryFunction *BF : BC.getSortedFunctions ()) {
239
+ if (!shouldConsiderForCallGraph (*BF))
240
+ continue ;
241
+
242
+ // Calculate the size of each BB after hot-cold splitting.
243
+ // This populates BinaryBasicBlock::OutputAddressRange which
244
+ // can be used to compute the size of each BB.
245
+ BC.calculateEmittedSize (*BF, /* FixBranches=*/ true );
246
+
247
+ for (BinaryBasicBlock *BB : BF->getLayout ().blocks ()) {
248
+ // Unique global index.
249
+ GlobalIndices[BB] = TotalNumBlocks;
250
+ TotalNumBlocks++;
251
+
252
+ // Block size after hot-cold splitting.
253
+ BBSizes[BB] = BB->getOutputSize ();
254
+
255
+ // Hot block offset after hot-cold splitting.
256
+ BBOffsets[BB] = OrigHotSectionSize;
257
+ if (!BB->isSplit ())
258
+ OrigHotSectionSize += BBSizes[BB];
259
+
260
+ // (Un)Conditional branch instruction information.
261
+ JumpInfos[BB] = analyzeBranches (*BB);
262
+ }
263
+ }
264
+ }
265
+
266
+ void buildCallGraph () {
267
+ Callers.resize (TotalNumBlocks);
268
+ Callees.resize (TotalNumBlocks);
269
+ for (const BinaryFunction *SrcFunction : BC.getSortedFunctions ()) {
270
+ if (!shouldConsiderForCallGraph (*SrcFunction))
271
+ continue ;
272
+
273
+ for (BinaryBasicBlock &SrcBB : SrcFunction->blocks ()) {
274
+ // Skip blocks that are not executed
275
+ if (SrcBB.getKnownExecutionCount () == 0 )
276
+ continue ;
277
+
278
+ // Find call instructions and extract target symbols from each one
279
+ for (const MCInst &Inst : SrcBB) {
280
+ if (!BC.MIB ->isCall (Inst))
281
+ continue ;
282
+
283
+ // Call info
284
+ const MCSymbol *DstSym = BC.MIB ->getTargetSymbol (Inst);
285
+ // Ignore calls w/o information
286
+ if (!DstSym)
287
+ continue ;
288
+
289
+ const BinaryFunction *DstFunction = BC.getFunctionForSymbol (DstSym);
290
+ // Ignore calls that do not have a valid target, but do not ignore
291
+ // recursive calls, because caller block could be moved to warm.
292
+ if (!DstFunction || DstFunction->getLayout ().block_empty ())
293
+ continue ;
294
+
295
+ const BinaryBasicBlock *DstBB = &(DstFunction->front ());
296
+
297
+ // Record the call only if DstBB is also in functions to consider for
298
+ // call graph.
299
+ if (GlobalIndices.contains (DstBB)) {
300
+ Callers[GlobalIndices[DstBB]].push_back (&SrcBB);
301
+ Callees[GlobalIndices[&SrcBB]].push_back (DstBB);
302
+ }
303
+ }
304
+ }
305
+ }
306
+ }
307
+
308
+ // / Populate BinaryBasicBlock::OutputAddressRange with estimated basic block
309
+ // / start and end addresses for hot and warm basic blocks, assuming hot-warm
310
+ // / splitting happens at \p SplitIndex. Also return estimated end addresses
311
+ // / of the hot fragment before and after splitting.
312
+ // / The estimations take into account the potential addition of branch
313
+ // / instructions due to split fall through branches as well as the need to
314
+ // / use longer branch instructions for split (un)conditional branches.
315
+ std::pair<size_t , size_t >
316
+ estimatePostSplitBBAddress (const BasicBlockOrder &BlockOrder,
317
+ const size_t SplitIndex) {
318
+ assert (SplitIndex < BlockOrder.size () && " Invalid split index" );
319
+
320
+ // Update function layout assuming hot-warm splitting at SplitIndex
321
+ for (size_t Index = 0 ; Index < BlockOrder.size (); Index++) {
322
+ BinaryBasicBlock *BB = BlockOrder[Index];
323
+ if (BB->getFragmentNum () == FragmentNum::cold ())
324
+ break ;
325
+ BB->setFragmentNum (Index <= SplitIndex ? FragmentNum::main ()
326
+ : FragmentNum::warm ());
327
+ }
328
+ BinaryFunction *BF = BlockOrder[0 ]->getFunction ();
329
+ BF->getLayout ().update (BlockOrder);
330
+ // Populate BB.OutputAddressRange under the updated layout.
331
+ BC.calculateEmittedSize (*BF);
332
+
333
+ // Populate BB.OutputAddressRange with estimated new start and end addresses
334
+ // and compute the old end address of the hot section and the new end
335
+ // address of the hot section.
336
+ size_t OldHotEndAddr;
337
+ size_t NewHotEndAddr;
338
+ size_t CurrentAddr = BBOffsets[BlockOrder[0 ]];
339
+ for (BinaryBasicBlock *BB : BlockOrder) {
340
+ // We only care about new addresses of blocks in hot/warm.
341
+ if (BB->getFragmentNum () == FragmentNum::cold ())
342
+ break ;
343
+ BB->setOutputStartAddress (CurrentAddr);
344
+ CurrentAddr += BB->getOutputSize ();
345
+ BB->setOutputEndAddress (CurrentAddr);
346
+ if (BB->getLayoutIndex () == SplitIndex) {
347
+ NewHotEndAddr = CurrentAddr;
348
+ // Approximate the start address of the warm fragment of the current
349
+ // function using the original hot section size.
350
+ CurrentAddr = OrigHotSectionSize;
351
+ }
352
+ OldHotEndAddr = BBOffsets[BB] + BBSizes[BB];
353
+ }
354
+ return std::make_pair (OldHotEndAddr, NewHotEndAddr);
355
+ }
356
+
357
+ // / Get a collection of "shortenable" calls, that is, calls of type X->Y
358
+ // / when the function order is [... X ... BF ... Y ...].
359
+ // / If the hot fragment size of BF is reduced, then such calls are guaranteed
360
+ // / to get shorter by the reduced hot fragment size.
361
+ std::vector<CallInfo> extractCoverCalls (const BinaryFunction &BF) {
362
+ // Record the length and the count of the calls that can be shortened
363
+ std::vector<CallInfo> CoverCalls;
364
+ if (opts::CallScale == 0 )
365
+ return CoverCalls;
366
+
367
+ const BinaryFunction *ThisBF = &BF;
368
+ const BinaryBasicBlock *ThisBB = &(ThisBF->front ());
369
+ const size_t ThisGI = GlobalIndices[ThisBB];
370
+
371
+ for (const BinaryFunction *DstBF : BC.getSortedFunctions ()) {
372
+ if (!shouldConsiderForCallGraph (*DstBF))
373
+ continue ;
374
+
375
+ const BinaryBasicBlock *DstBB = &(DstBF->front ());
376
+ if (DstBB->getKnownExecutionCount () == 0 )
377
+ continue ;
378
+
379
+ const size_t DstGI = GlobalIndices[DstBB];
380
+ for (const BinaryBasicBlock *SrcBB : Callers[DstGI]) {
381
+ const BinaryFunction *SrcBF = SrcBB->getFunction ();
382
+ if (ThisBF == SrcBF)
383
+ continue ;
384
+
385
+ const size_t CallCount = SrcBB->getKnownExecutionCount ();
386
+
387
+ const size_t SrcGI = GlobalIndices[SrcBB];
388
+
389
+ const bool IsCoverCall = (SrcGI < ThisGI && ThisGI < DstGI) ||
390
+ (DstGI <= ThisGI && ThisGI < SrcGI);
391
+ if (!IsCoverCall)
392
+ continue ;
393
+
394
+ const size_t SrcBBEndAddr = BBOffsets[SrcBB] + BBSizes[SrcBB];
395
+ const size_t DstBBStartAddr = BBOffsets[DstBB];
396
+ const size_t CallLength =
397
+ AbsoluteDifference (SrcBBEndAddr, DstBBStartAddr);
398
+ const CallInfo CI{CallLength, CallCount};
399
+ CoverCalls.emplace_back (CI);
400
+ }
401
+ }
402
+ return CoverCalls;
403
+ }
404
+
176
405
// / Find the best index for splitting. The returned value is the index of the
177
406
// / last hot basic block. Hence, "no splitting" is equivalent to returning the
178
407
// / value which is one less than the size of the function.
@@ -308,7 +537,7 @@ void SplitFunctions::runOnFunctions(BinaryContext &BC) {
308
537
// before function reordering and hot-warm-cold splitting
309
538
// (SplitCacheDirected) after function reordering.
310
539
if (BC.HasFinalizedFunctionOrder )
311
- Strategy = std::make_unique<SplitCacheDirected>();
540
+ Strategy = std::make_unique<SplitCacheDirected>(BC );
312
541
else
313
542
Strategy = std::make_unique<SplitProfile2>();
314
543
opts::AggressiveSplitting = true ;
0 commit comments