Skip to content

Commit 3b3de48

Browse files
authored
[BOLT] Add BB index to BAT (llvm#86044)
1 parent 31a9a4b commit 3b3de48

File tree

12 files changed

+92
-77
lines changed

12 files changed

+92
-77
lines changed

bolt/docs/BAT.md

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -90,11 +90,12 @@ current function.
9090
### Address translation table
9191
Delta encoding means that only the difference with the previous corresponding
9292
entry is encoded. Input offsets implicitly start at zero.
93-
| Entry | Encoding | Description |
94-
| ------ | ------| ----------- |
95-
| `OutputOffset` | Continuous, Delta, ULEB128 | Function offset in output binary |
96-
| `InputOffset` | Optional, Delta, SLEB128 | Function offset in input binary with `BRANCHENTRY` LSB bit |
97-
| `BBHash` | Optional, 8b | Basic block entries only: basic block hash in input binary |
93+
| Entry | Encoding | Description | Branch/BB |
94+
| ------ | ------| ----------- | ------ |
95+
| `OutputOffset` | Continuous, Delta, ULEB128 | Function offset in output binary | Both |
96+
| `InputOffset` | Optional, Delta, SLEB128 | Function offset in input binary with `BRANCHENTRY` LSB bit | Both |
97+
| `BBHash` | Optional, 8b | Basic block hash in input binary | BB |
98+
| `BBIdx` | Optional, Delta, ULEB128 | Basic block index in input binary | BB |
9899

99100
`BRANCHENTRY` bit denotes whether a given offset pair is a control flow source
100101
(branch or call instruction). If not set, it signifies a control flow target

bolt/include/bolt/Profile/BoltAddressTranslation.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,10 @@ class BoltAddressTranslation {
122122
/// Returns BF hash by function output address (after BOLT).
123123
size_t getBFHash(uint64_t OutputAddress) const;
124124

125+
/// Returns BB index by function output address (after BOLT) and basic block
126+
/// input offset.
127+
unsigned getBBIndex(uint64_t FuncOutputAddress, uint32_t BBInputOffset) const;
128+
125129
/// True if a given \p Address is a function with translation table entry.
126130
bool isBATFunction(uint64_t Address) const { return Maps.count(Address); }
127131

@@ -154,7 +158,8 @@ class BoltAddressTranslation {
154158

155159
std::map<uint64_t, MapTy> Maps;
156160

157-
using BBHashMap = std::unordered_map<uint32_t, size_t>;
161+
/// Map basic block input offset to a basic block index and hash pair.
162+
using BBHashMap = std::unordered_map<uint32_t, std::pair<unsigned, size_t>>;
158163
std::unordered_map<uint64_t, std::pair<size_t, BBHashMap>> FuncHashes;
159164

160165
/// Links outlined cold bocks to their original function

bolt/lib/Profile/BoltAddressTranslation.cpp

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ void BoltAddressTranslation::writeEntriesForBB(MapTy &Map,
4545
LLVM_DEBUG(dbgs() << formatv(" Hash: {0:x}\n",
4646
getBBHash(HotFuncAddress, BBInputOffset)));
4747
(void)HotFuncAddress;
48+
LLVM_DEBUG(dbgs() << formatv(" Index: {0}\n",
49+
getBBIndex(HotFuncAddress, BBInputOffset)));
4850
// In case of conflicts (same Key mapping to different Vals), the last
4951
// update takes precedence. Of course it is not ideal to have conflicts and
5052
// those happen when we have an empty BB that either contained only
@@ -217,6 +219,7 @@ void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
217219
}
218220
size_t Index = 0;
219221
uint64_t InOffset = 0;
222+
size_t PrevBBIndex = 0;
220223
// Output and Input addresses and delta-encoded
221224
for (std::pair<const uint32_t, uint32_t> &KeyVal : Map) {
222225
const uint64_t OutputAddress = KeyVal.first + Address;
@@ -226,11 +229,15 @@ void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
226229
encodeSLEB128(KeyVal.second - InOffset, OS);
227230
InOffset = KeyVal.second; // Keeping InOffset as if BRANCHENTRY is encoded
228231
if ((InOffset & BRANCHENTRY) == 0) {
229-
// Basic block hash
230-
size_t BBHash = FuncHashPair.second[InOffset >> 1];
232+
unsigned BBIndex;
233+
size_t BBHash;
234+
std::tie(BBIndex, BBHash) = FuncHashPair.second[InOffset >> 1];
231235
OS.write(reinterpret_cast<char *>(&BBHash), 8);
232-
LLVM_DEBUG(dbgs() << formatv("{0:x} -> {1:x} {2:x}\n", KeyVal.first,
233-
InOffset >> 1, BBHash));
236+
// Basic block index in the input binary
237+
encodeULEB128(BBIndex - PrevBBIndex, OS);
238+
PrevBBIndex = BBIndex;
239+
LLVM_DEBUG(dbgs() << formatv("{0:x} -> {1:x} {2:x} {3}\n", KeyVal.first,
240+
InOffset >> 1, BBHash, BBIndex));
234241
}
235242
}
236243
}
@@ -316,6 +323,7 @@ void BoltAddressTranslation::parseMaps(std::vector<uint64_t> &HotFuncs,
316323
LLVM_DEBUG(dbgs() << "Parsing " << NumEntries << " entries for 0x"
317324
<< Twine::utohexstr(Address) << "\n");
318325
uint64_t InputOffset = 0;
326+
size_t BBIndex = 0;
319327
for (uint32_t J = 0; J < NumEntries; ++J) {
320328
const uint64_t OutputDelta = DE.getULEB128(&Offset, &Err);
321329
const uint64_t OutputAddress = PrevAddress + OutputDelta;
@@ -330,19 +338,25 @@ void BoltAddressTranslation::parseMaps(std::vector<uint64_t> &HotFuncs,
330338
}
331339
Map.insert(std::pair<uint32_t, uint32_t>(OutputOffset, InputOffset));
332340
size_t BBHash = 0;
341+
size_t BBIndexDelta = 0;
333342
const bool IsBranchEntry = InputOffset & BRANCHENTRY;
334343
if (!IsBranchEntry) {
335344
BBHash = DE.getU64(&Offset, &Err);
345+
BBIndexDelta = DE.getULEB128(&Offset, &Err);
346+
BBIndex += BBIndexDelta;
336347
// Map basic block hash to hot fragment by input offset
337-
FuncHashes[HotAddress].second.emplace(InputOffset >> 1, BBHash);
348+
FuncHashes[HotAddress].second.emplace(InputOffset >> 1,
349+
std::pair(BBIndex, BBHash));
338350
}
339351
LLVM_DEBUG({
340352
dbgs() << formatv(
341353
"{0:x} -> {1:x} ({2}/{3}b -> {4}/{5}b), {6:x}", OutputOffset,
342354
InputOffset, OutputDelta, getULEB128Size(OutputDelta), InputDelta,
343355
(J < EqualElems) ? 0 : getSLEB128Size(InputDelta), OutputAddress);
344-
if (BBHash)
345-
dbgs() << formatv(" {0:x}", BBHash);
356+
if (!IsBranchEntry) {
357+
dbgs() << formatv(" {0:x} {1}/{2}b", BBHash, BBIndex,
358+
getULEB128Size(BBIndexDelta));
359+
}
346360
dbgs() << '\n';
347361
});
348362
}
@@ -494,14 +508,19 @@ void BoltAddressTranslation::saveMetadata(BinaryContext &BC) {
494508
FuncHashes[BF.getAddress()].first = BF.computeHash();
495509
BF.computeBlockHashes();
496510
for (const BinaryBasicBlock &BB : BF)
497-
FuncHashes[BF.getAddress()].second.emplace(BB.getInputOffset(),
498-
BB.getHash());
511+
FuncHashes[BF.getAddress()].second.emplace(
512+
BB.getInputOffset(), std::pair(BB.getIndex(), BB.getHash()));
499513
}
500514
}
501515

516+
unsigned BoltAddressTranslation::getBBIndex(uint64_t FuncOutputAddress,
517+
uint32_t BBInputOffset) const {
518+
return FuncHashes.at(FuncOutputAddress).second.at(BBInputOffset).first;
519+
}
520+
502521
size_t BoltAddressTranslation::getBBHash(uint64_t FuncOutputAddress,
503522
uint32_t BBInputOffset) const {
504-
return FuncHashes.at(FuncOutputAddress).second.at(BBInputOffset);
523+
return FuncHashes.at(FuncOutputAddress).second.at(BBInputOffset).second;
505524
}
506525

507526
size_t BoltAddressTranslation::getBFHash(uint64_t OutputAddress) const {

bolt/test/X86/bolt-address-translation-yaml.test

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ RUN: | FileCheck --check-prefix CHECK-BOLT-YAML %s
1818

1919
WRITE-BAT-CHECK: BOLT-INFO: Wrote 5 BAT maps
2020
WRITE-BAT-CHECK: BOLT-INFO: Wrote 4 function and 22 basic block hashes
21-
WRITE-BAT-CHECK: BOLT-INFO: BAT section size (bytes): 344
21+
WRITE-BAT-CHECK: BOLT-INFO: BAT section size (bytes): 376
2222

2323
READ-BAT-CHECK-NOT: BOLT-ERROR: unable to save profile in YAML format for input file processed by BOLT
2424
READ-BAT-CHECK: BOLT-INFO: Parsed 5 BAT entries

bolt/test/X86/bolt-address-translation.test

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
# CHECK: BOLT: 3 out of 7 functions were overwritten.
3838
# CHECK: BOLT-INFO: Wrote 6 BAT maps
3939
# CHECK: BOLT-INFO: Wrote 3 function and 58 basic block hashes
40-
# CHECK: BOLT-INFO: BAT section size (bytes): 816
40+
# CHECK: BOLT-INFO: BAT section size (bytes): 920
4141
#
4242
# usqrt mappings (hot part). We match against any key (left side containing
4343
# the bolted binary offsets) because BOLT may change where it puts instructions

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5863,8 +5863,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
58635863
} else if (Triple.getArch() == llvm::Triple::x86_64) {
58645864
Ok = llvm::is_contained({"small", "kernel", "medium", "large", "tiny"},
58655865
CM);
5866-
} else if (Triple.isNVPTX() || Triple.isAMDGPU() || Triple.isSPIRV()) {
5867-
// NVPTX/AMDGPU/SPIRV does not care about the code model and will accept
5866+
} else if (Triple.isNVPTX() || Triple.isAMDGPU()) {
5867+
// NVPTX/AMDGPU does not care about the code model and will accept
58685868
// whatever works for the host.
58695869
Ok = true;
58705870
} else if (Triple.isSPARC64()) {

clang/test/Driver/unsupported-option-gpu.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,4 @@
22
// DEFINE: %{check} = %clang -### --target=x86_64-linux-gnu -c -mcmodel=medium
33

44
// RUN: %{check} -x cuda %s --cuda-path=%S/Inputs/CUDA/usr/local/cuda --offload-arch=sm_60 --no-cuda-version-check -fbasic-block-sections=all
5-
// RUN: %{check} -x hip %s --offload=spirv64 -nogpulib -nogpuinc
65
// RUN: %{check} -x hip %s --rocm-path=%S/Inputs/rocm -nogpulib -nogpuinc

lld/MachO/Driver.cpp

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -612,7 +612,7 @@ static void replaceCommonSymbols() {
612612
if (!osec)
613613
osec = ConcatOutputSection::getOrCreateForInput(isec);
614614
isec->parent = osec;
615-
addInputSection(isec);
615+
inputSections.push_back(isec);
616616

617617
// FIXME: CommonSymbol should store isReferencedDynamically, noDeadStrip
618618
// and pass them on here.
@@ -1220,18 +1220,53 @@ static void createFiles(const InputArgList &args) {
12201220

12211221
static void gatherInputSections() {
12221222
TimeTraceScope timeScope("Gathering input sections");
1223+
int inputOrder = 0;
12231224
for (const InputFile *file : inputFiles) {
12241225
for (const Section *section : file->sections) {
12251226
// Compact unwind entries require special handling elsewhere. (In
12261227
// contrast, EH frames are handled like regular ConcatInputSections.)
12271228
if (section->name == section_names::compactUnwind)
12281229
continue;
1229-
for (const Subsection &subsection : section->subsections)
1230-
addInputSection(subsection.isec);
1230+
ConcatOutputSection *osec = nullptr;
1231+
for (const Subsection &subsection : section->subsections) {
1232+
if (auto *isec = dyn_cast<ConcatInputSection>(subsection.isec)) {
1233+
if (isec->isCoalescedWeak())
1234+
continue;
1235+
if (config->emitInitOffsets &&
1236+
sectionType(isec->getFlags()) == S_MOD_INIT_FUNC_POINTERS) {
1237+
in.initOffsets->addInput(isec);
1238+
continue;
1239+
}
1240+
isec->outSecOff = inputOrder++;
1241+
if (!osec)
1242+
osec = ConcatOutputSection::getOrCreateForInput(isec);
1243+
isec->parent = osec;
1244+
inputSections.push_back(isec);
1245+
} else if (auto *isec =
1246+
dyn_cast<CStringInputSection>(subsection.isec)) {
1247+
if (isec->getName() == section_names::objcMethname) {
1248+
if (in.objcMethnameSection->inputOrder == UnspecifiedInputOrder)
1249+
in.objcMethnameSection->inputOrder = inputOrder++;
1250+
in.objcMethnameSection->addInput(isec);
1251+
} else {
1252+
if (in.cStringSection->inputOrder == UnspecifiedInputOrder)
1253+
in.cStringSection->inputOrder = inputOrder++;
1254+
in.cStringSection->addInput(isec);
1255+
}
1256+
} else if (auto *isec =
1257+
dyn_cast<WordLiteralInputSection>(subsection.isec)) {
1258+
if (in.wordLiteralSection->inputOrder == UnspecifiedInputOrder)
1259+
in.wordLiteralSection->inputOrder = inputOrder++;
1260+
in.wordLiteralSection->addInput(isec);
1261+
} else {
1262+
llvm_unreachable("unexpected input section kind");
1263+
}
1264+
}
12311265
}
12321266
if (!file->objCImageInfo.empty())
12331267
in.objCImageInfo->addFile(file);
12341268
}
1269+
assert(inputOrder <= UnspecifiedInputOrder);
12351270
}
12361271

12371272
static void foldIdenticalLiterals() {
@@ -1387,7 +1422,6 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
13871422
concatOutputSections.clear();
13881423
inputFiles.clear();
13891424
inputSections.clear();
1390-
inputSectionsOrder = 0;
13911425
loadedArchives.clear();
13921426
loadedObjectFrameworks.clear();
13931427
missingAutolinkWarnings.clear();

lld/MachO/InputSection.cpp

Lines changed: 0 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -37,44 +37,6 @@ static_assert(sizeof(void *) != 8 ||
3737
"instances of it");
3838

3939
std::vector<ConcatInputSection *> macho::inputSections;
40-
int macho::inputSectionsOrder = 0;
41-
42-
// Call this function to add a new InputSection and have it routed to the
43-
// appropriate container. Depending on its type and current config, it will
44-
// either be added to 'inputSections' vector or to a synthetic section.
45-
void lld::macho::addInputSection(InputSection *inputSection) {
46-
if (auto *isec = dyn_cast<ConcatInputSection>(inputSection)) {
47-
if (isec->isCoalescedWeak())
48-
return;
49-
if (config->emitInitOffsets &&
50-
sectionType(isec->getFlags()) == S_MOD_INIT_FUNC_POINTERS) {
51-
in.initOffsets->addInput(isec);
52-
return;
53-
}
54-
isec->outSecOff = inputSectionsOrder++;
55-
auto *osec = ConcatOutputSection::getOrCreateForInput(isec);
56-
isec->parent = osec;
57-
inputSections.push_back(isec);
58-
} else if (auto *isec = dyn_cast<CStringInputSection>(inputSection)) {
59-
if (isec->getName() == section_names::objcMethname) {
60-
if (in.objcMethnameSection->inputOrder == UnspecifiedInputOrder)
61-
in.objcMethnameSection->inputOrder = inputSectionsOrder++;
62-
in.objcMethnameSection->addInput(isec);
63-
} else {
64-
if (in.cStringSection->inputOrder == UnspecifiedInputOrder)
65-
in.cStringSection->inputOrder = inputSectionsOrder++;
66-
in.cStringSection->addInput(isec);
67-
}
68-
} else if (auto *isec = dyn_cast<WordLiteralInputSection>(inputSection)) {
69-
if (in.wordLiteralSection->inputOrder == UnspecifiedInputOrder)
70-
in.wordLiteralSection->inputOrder = inputSectionsOrder++;
71-
in.wordLiteralSection->addInput(isec);
72-
} else {
73-
llvm_unreachable("unexpected input section kind");
74-
}
75-
76-
assert(inputSectionsOrder <= UnspecifiedInputOrder);
77-
}
7840

7941
uint64_t InputSection::getFileSize() const {
8042
return isZeroFill(getFlags()) ? 0 : getSize();

lld/MachO/InputSection.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -302,8 +302,6 @@ bool isEhFrameSection(const InputSection *);
302302
bool isGccExceptTabSection(const InputSection *);
303303

304304
extern std::vector<ConcatInputSection *> inputSections;
305-
// This is used as a counter for specyfing input order for input sections
306-
extern int inputSectionsOrder;
307305

308306
namespace section_names {
309307

@@ -371,7 +369,6 @@ constexpr const char addrSig[] = "__llvm_addrsig";
371369

372370
} // namespace section_names
373371

374-
void addInputSection(InputSection *inputSection);
375372
} // namespace macho
376373

377374
std::string toString(const macho::InputSection *);

lld/MachO/ObjC.cpp

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -790,7 +790,7 @@ void ObjcCategoryMerger::emitAndLinkProtocolList(
790790
infoCategoryWriter.catPtrListInfo.align);
791791
listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
792792
listSec->live = true;
793-
addInputSection(listSec);
793+
allInputSections.push_back(listSec);
794794

795795
listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
796796

@@ -848,7 +848,7 @@ void ObjcCategoryMerger::emitAndLinkPointerList(
848848
infoCategoryWriter.catPtrListInfo.align);
849849
listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
850850
listSec->live = true;
851-
addInputSection(listSec);
851+
allInputSections.push_back(listSec);
852852

853853
listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
854854

@@ -889,7 +889,7 @@ ObjcCategoryMerger::emitCatListEntrySec(const std::string &forCateogryName,
889889
bodyData, infoCategoryWriter.catListInfo.align);
890890
newCatList->parent = infoCategoryWriter.catListInfo.outputSection;
891891
newCatList->live = true;
892-
addInputSection(newCatList);
892+
allInputSections.push_back(newCatList);
893893

894894
newCatList->parent = infoCategoryWriter.catListInfo.outputSection;
895895

@@ -927,7 +927,7 @@ Defined *ObjcCategoryMerger::emitCategoryBody(const std::string &name,
927927
bodyData, infoCategoryWriter.catBodyInfo.align);
928928
newBodySec->parent = infoCategoryWriter.catBodyInfo.outputSection;
929929
newBodySec->live = true;
930-
addInputSection(newBodySec);
930+
allInputSections.push_back(newBodySec);
931931

932932
std::string symName =
933933
objc::symbol_names::category + baseClassName + "_$_(" + name + ")";
@@ -1132,7 +1132,7 @@ void ObjcCategoryMerger::generateCatListForNonErasedCategories(
11321132
infoCategoryWriter.catListInfo.align);
11331133
listSec->parent = infoCategoryWriter.catListInfo.outputSection;
11341134
listSec->live = true;
1135-
addInputSection(listSec);
1135+
allInputSections.push_back(listSec);
11361136

11371137
std::string slotSymName = "<__objc_catlist slot for category ";
11381138
slotSymName += nonErasedCatBody->getName();
@@ -1221,11 +1221,9 @@ void ObjcCategoryMerger::doCleanup() { generatedSectionData.clear(); }
12211221

12221222
StringRef ObjcCategoryMerger::newStringData(const char *str) {
12231223
uint32_t len = strlen(str);
1224-
uint32_t bufSize = len + 1;
1225-
auto &data = newSectionData(bufSize);
1224+
auto &data = newSectionData(len + 1);
12261225
char *strData = reinterpret_cast<char *>(data.data());
1227-
// Copy the string chars and null-terminator
1228-
memcpy(strData, str, bufSize);
1226+
strncpy(strData, str, len);
12291227
return StringRef(strData, len);
12301228
}
12311229

lld/MachO/SyntheticSections.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -793,7 +793,7 @@ void StubHelperSection::setUp() {
793793

794794
in.imageLoaderCache->parent =
795795
ConcatOutputSection::getOrCreateForInput(in.imageLoaderCache);
796-
addInputSection(in.imageLoaderCache);
796+
inputSections.push_back(in.imageLoaderCache);
797797
// Since this isn't in the symbol table or in any input file, the noDeadStrip
798798
// argument doesn't matter.
799799
dyldPrivate =
@@ -855,7 +855,7 @@ ConcatInputSection *ObjCSelRefsSection::makeSelRef(StringRef methname) {
855855
/*addend=*/static_cast<int64_t>(methnameOffset),
856856
/*referent=*/in.objcMethnameSection->isec});
857857
objcSelref->parent = ConcatOutputSection::getOrCreateForInput(objcSelref);
858-
addInputSection(objcSelref);
858+
inputSections.push_back(objcSelref);
859859
objcSelref->isFinal = true;
860860
methnameToSelref[CachedHashStringRef(methname)] = objcSelref;
861861
return objcSelref;

0 commit comments

Comments
 (0)