Skip to content

Commit b0e2363

Browse files
committed
[BOLT] Add BB index to BAT
Add input basic block index to BAT metadata. This addresses the case where some basic blocks are eliminated, and output index is not equal to the input block index. These indices are used in non-stale-matching mode. Increases BAT section size to: - large binary: 39521512 bytes (1.02x original), - medium binary: 3799988 bytes (0.64x), - small binary: 920 bytes (0.64x). Test Plan: Updated bolt-address-translation{,-yaml}.test Pull Request: llvm#86044
1 parent f66d631 commit b0e2363

File tree

5 files changed

+43
-18
lines changed

5 files changed

+43
-18
lines changed

bolt/docs/BAT.md

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -90,11 +90,12 @@ current function.
9090
### Address translation table
9191
Delta encoding means that only the difference with the previous corresponding
9292
entry is encoded. Input offsets implicitly start at zero.
93-
| Entry | Encoding | Description |
94-
| ------ | ------| ----------- |
95-
| `OutputOffset` | Continuous, Delta, ULEB128 | Function offset in output binary |
96-
| `InputOffset` | Optional, Delta, SLEB128 | Function offset in input binary with `BRANCHENTRY` LSB bit |
97-
| `BBHash` | Optional, 8b | Basic block entries only: basic block hash in input binary |
93+
| Entry | Encoding | Description | Branch/BB |
94+
| ------ | ------| ----------- | ------ |
95+
| `OutputOffset` | Continuous, Delta, ULEB128 | Function offset in output binary | Both |
96+
| `InputOffset` | Optional, Delta, SLEB128 | Function offset in input binary with `BRANCHENTRY` LSB bit | Both |
97+
| `BBHash` | Optional, 8b | Basic block hash in input binary | BB |
98+
| `BBIdx` | Optional, Delta, ULEB128 | Basic block index in input binary | BB |
9899

99100
`BRANCHENTRY` bit denotes whether a given offset pair is a control flow source
100101
(branch or call instruction). If not set, it signifies a control flow target

bolt/include/bolt/Profile/BoltAddressTranslation.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,10 @@ class BoltAddressTranslation {
122122
/// Returns BF hash by function output address (after BOLT).
123123
size_t getBFHash(uint64_t OutputAddress) const;
124124

125+
/// Returns BB index by function output address (after BOLT) and basic block
126+
/// input offset.
127+
unsigned getBBIndex(uint64_t FuncOutputAddress, uint32_t BBInputOffset) const;
128+
125129
/// True if a given \p Address is a function with translation table entry.
126130
bool isBATFunction(uint64_t Address) const { return Maps.count(Address); }
127131

@@ -154,7 +158,8 @@ class BoltAddressTranslation {
154158

155159
std::map<uint64_t, MapTy> Maps;
156160

157-
using BBHashMap = std::unordered_map<uint32_t, size_t>;
161+
/// Map basic block input offset to a basic block index and hash pair.
162+
using BBHashMap = std::unordered_map<uint32_t, std::pair<unsigned, size_t>>;
158163
std::unordered_map<uint64_t, std::pair<size_t, BBHashMap>> FuncHashes;
159164

160165
/// Links outlined cold bocks to their original function

bolt/lib/Profile/BoltAddressTranslation.cpp

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ void BoltAddressTranslation::writeEntriesForBB(MapTy &Map,
4545
LLVM_DEBUG(dbgs() << formatv(" Hash: {0:x}\n",
4646
getBBHash(HotFuncAddress, BBInputOffset)));
4747
(void)HotFuncAddress;
48+
LLVM_DEBUG(dbgs() << formatv(" Index: {0}\n",
49+
getBBIndex(HotFuncAddress, BBInputOffset)));
4850
// In case of conflicts (same Key mapping to different Vals), the last
4951
// update takes precedence. Of course it is not ideal to have conflicts and
5052
// those happen when we have an empty BB that either contained only
@@ -217,6 +219,7 @@ void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
217219
}
218220
size_t Index = 0;
219221
uint64_t InOffset = 0;
222+
size_t PrevBBIndex = 0;
220223
// Output and Input addresses and delta-encoded
221224
for (std::pair<const uint32_t, uint32_t> &KeyVal : Map) {
222225
const uint64_t OutputAddress = KeyVal.first + Address;
@@ -226,11 +229,15 @@ void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
226229
encodeSLEB128(KeyVal.second - InOffset, OS);
227230
InOffset = KeyVal.second; // Keeping InOffset as if BRANCHENTRY is encoded
228231
if ((InOffset & BRANCHENTRY) == 0) {
229-
// Basic block hash
230-
size_t BBHash = FuncHashPair.second[InOffset >> 1];
232+
unsigned BBIndex;
233+
size_t BBHash;
234+
std::tie(BBIndex, BBHash) = FuncHashPair.second[InOffset >> 1];
231235
OS.write(reinterpret_cast<char *>(&BBHash), 8);
232-
LLVM_DEBUG(dbgs() << formatv("{0:x} -> {1:x} {2:x}\n", KeyVal.first,
233-
InOffset >> 1, BBHash));
236+
// Basic block index in the input binary
237+
encodeULEB128(BBIndex - PrevBBIndex, OS);
238+
PrevBBIndex = BBIndex;
239+
LLVM_DEBUG(dbgs() << formatv("{0:x} -> {1:x} {2:x} {3}\n", KeyVal.first,
240+
InOffset >> 1, BBHash, BBIndex));
234241
}
235242
}
236243
}
@@ -316,6 +323,7 @@ void BoltAddressTranslation::parseMaps(std::vector<uint64_t> &HotFuncs,
316323
LLVM_DEBUG(dbgs() << "Parsing " << NumEntries << " entries for 0x"
317324
<< Twine::utohexstr(Address) << "\n");
318325
uint64_t InputOffset = 0;
326+
size_t BBIndex = 0;
319327
for (uint32_t J = 0; J < NumEntries; ++J) {
320328
const uint64_t OutputDelta = DE.getULEB128(&Offset, &Err);
321329
const uint64_t OutputAddress = PrevAddress + OutputDelta;
@@ -330,19 +338,25 @@ void BoltAddressTranslation::parseMaps(std::vector<uint64_t> &HotFuncs,
330338
}
331339
Map.insert(std::pair<uint32_t, uint32_t>(OutputOffset, InputOffset));
332340
size_t BBHash = 0;
341+
size_t BBIndexDelta = 0;
333342
const bool IsBranchEntry = InputOffset & BRANCHENTRY;
334343
if (!IsBranchEntry) {
335344
BBHash = DE.getU64(&Offset, &Err);
345+
BBIndexDelta = DE.getULEB128(&Offset, &Err);
346+
BBIndex += BBIndexDelta;
336347
// Map basic block hash to hot fragment by input offset
337-
FuncHashes[HotAddress].second.emplace(InputOffset >> 1, BBHash);
348+
FuncHashes[HotAddress].second.emplace(InputOffset >> 1,
349+
std::pair(BBIndex, BBHash));
338350
}
339351
LLVM_DEBUG({
340352
dbgs() << formatv(
341353
"{0:x} -> {1:x} ({2}/{3}b -> {4}/{5}b), {6:x}", OutputOffset,
342354
InputOffset, OutputDelta, getULEB128Size(OutputDelta), InputDelta,
343355
(J < EqualElems) ? 0 : getSLEB128Size(InputDelta), OutputAddress);
344-
if (BBHash)
345-
dbgs() << formatv(" {0:x}", BBHash);
356+
if (!IsBranchEntry) {
357+
dbgs() << formatv(" {0:x} {1}/{2}b", BBHash, BBIndex,
358+
getULEB128Size(BBIndexDelta));
359+
}
346360
dbgs() << '\n';
347361
});
348362
}
@@ -494,14 +508,19 @@ void BoltAddressTranslation::saveMetadata(BinaryContext &BC) {
494508
FuncHashes[BF.getAddress()].first = BF.computeHash();
495509
BF.computeBlockHashes();
496510
for (const BinaryBasicBlock &BB : BF)
497-
FuncHashes[BF.getAddress()].second.emplace(BB.getInputOffset(),
498-
BB.getHash());
511+
FuncHashes[BF.getAddress()].second.emplace(
512+
BB.getInputOffset(), std::pair(BB.getIndex(), BB.getHash()));
499513
}
500514
}
501515

516+
unsigned BoltAddressTranslation::getBBIndex(uint64_t FuncOutputAddress,
517+
uint32_t BBInputOffset) const {
518+
return FuncHashes.at(FuncOutputAddress).second.at(BBInputOffset).first;
519+
}
520+
502521
size_t BoltAddressTranslation::getBBHash(uint64_t FuncOutputAddress,
503522
uint32_t BBInputOffset) const {
504-
return FuncHashes.at(FuncOutputAddress).second.at(BBInputOffset);
523+
return FuncHashes.at(FuncOutputAddress).second.at(BBInputOffset).second;
505524
}
506525

507526
size_t BoltAddressTranslation::getBFHash(uint64_t OutputAddress) const {

bolt/test/X86/bolt-address-translation-yaml.test

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ RUN: | FileCheck --check-prefix CHECK-BOLT-YAML %s
1818

1919
WRITE-BAT-CHECK: BOLT-INFO: Wrote 5 BAT maps
2020
WRITE-BAT-CHECK: BOLT-INFO: Wrote 4 function and 22 basic block hashes
21-
WRITE-BAT-CHECK: BOLT-INFO: BAT section size (bytes): 344
21+
WRITE-BAT-CHECK: BOLT-INFO: BAT section size (bytes): 376
2222

2323
READ-BAT-CHECK-NOT: BOLT-ERROR: unable to save profile in YAML format for input file processed by BOLT
2424
READ-BAT-CHECK: BOLT-INFO: Parsed 5 BAT entries

bolt/test/X86/bolt-address-translation.test

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
# CHECK: BOLT: 3 out of 7 functions were overwritten.
3838
# CHECK: BOLT-INFO: Wrote 6 BAT maps
3939
# CHECK: BOLT-INFO: Wrote 3 function and 58 basic block hashes
40-
# CHECK: BOLT-INFO: BAT section size (bytes): 816
40+
# CHECK: BOLT-INFO: BAT section size (bytes): 920
4141
#
4242
# usqrt mappings (hot part). We match against any key (left side containing
4343
# the bolted binary offsets) because BOLT may change where it puts instructions

0 commit comments

Comments
 (0)