Skip to content

Commit 5e097c7

Browse files
authored
[BOLT] Hash-based function matching (llvm#95821)
Using the hashes of binary and profiled functions to recover functions with changed names. Test Plan: added hashing-based-function-matching.test.
1 parent a3a44bf commit 5e097c7

File tree

5 files changed

+135
-10
lines changed

5 files changed

+135
-10
lines changed

bolt/docs/CommandLineArgumentReference.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,10 @@
259259

260260
Always use long jumps/nops for Linux kernel static keys
261261

262+
- `--match-profile-with-function-hash`
263+
264+
Match profile with function hash
265+
262266
- `--max-data-relocations=<uint>`
263267

264268
Maximum number of data relocations to process

bolt/lib/Profile/YAMLProfileReader.cpp

Lines changed: 58 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ namespace opts {
2222
extern cl::opt<unsigned> Verbosity;
2323
extern cl::OptionCategory BoltOptCategory;
2424
extern cl::opt<bool> InferStaleProfile;
25+
extern cl::opt<bool> MatchProfileWithFunctionHash;
26+
extern cl::opt<bool> Lite;
2527

2628
static llvm::cl::opt<bool>
2729
IgnoreHash("profile-ignore-hash",
@@ -363,9 +365,19 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
363365
return Profile.Hash == static_cast<uint64_t>(BF.getHash());
364366
};
365367

366-
// We have to do 2 passes since LTO introduces an ambiguity in function
367-
// names. The first pass assigns profiles that match 100% by name and
368-
// by hash. The second pass allows name ambiguity for LTO private functions.
368+
uint64_t MatchedWithExactName = 0;
369+
uint64_t MatchedWithHash = 0;
370+
uint64_t MatchedWithLTOCommonName = 0;
371+
372+
// Computes hash for binary functions.
373+
if (opts::MatchProfileWithFunctionHash)
374+
for (auto &[_, BF] : BC.getBinaryFunctions())
375+
BF.computeHash(YamlBP.Header.IsDFSOrder, YamlBP.Header.HashFunction);
376+
else if (!opts::IgnoreHash)
377+
for (BinaryFunction *BF : ProfileBFs)
378+
BF->computeHash(YamlBP.Header.IsDFSOrder, YamlBP.Header.HashFunction);
379+
380+
// This first pass assigns profiles that match 100% by name and by hash.
369381
for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs)) {
370382
if (!BF)
371383
continue;
@@ -374,15 +386,34 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
374386
// the profile.
375387
Function.setExecutionCount(BinaryFunction::COUNT_NO_PROFILE);
376388

377-
// Recompute hash once per function.
378-
if (!opts::IgnoreHash)
379-
Function.computeHash(YamlBP.Header.IsDFSOrder,
380-
YamlBP.Header.HashFunction);
381-
382-
if (profileMatches(YamlBF, Function))
389+
if (profileMatches(YamlBF, Function)) {
383390
matchProfileToFunction(YamlBF, Function);
391+
++MatchedWithExactName;
392+
}
384393
}
385394

395+
// Uses the strict hash of profiled and binary functions to match functions
396+
// that are not matched by name or common name.
397+
if (opts::MatchProfileWithFunctionHash) {
398+
std::unordered_map<size_t, BinaryFunction *> StrictHashToBF;
399+
StrictHashToBF.reserve(BC.getBinaryFunctions().size());
400+
401+
for (auto &[_, BF] : BC.getBinaryFunctions())
402+
StrictHashToBF[BF.getHash()] = &BF;
403+
404+
for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) {
405+
if (YamlBF.Used)
406+
continue;
407+
auto It = StrictHashToBF.find(YamlBF.Hash);
408+
if (It != StrictHashToBF.end() && !ProfiledFunctions.count(It->second)) {
409+
BinaryFunction *BF = It->second;
410+
matchProfileToFunction(YamlBF, *BF);
411+
++MatchedWithHash;
412+
}
413+
}
414+
}
415+
416+
// This second pass allows name ambiguity for LTO private functions.
386417
for (const auto &[CommonName, LTOProfiles] : LTOCommonNameMap) {
387418
if (!LTOCommonNameFunctionMap.contains(CommonName))
388419
continue;
@@ -396,6 +427,7 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
396427
for (BinaryFunction *BF : Functions) {
397428
if (!ProfiledFunctions.count(BF) && profileMatches(*YamlBF, *BF)) {
398429
matchProfileToFunction(*YamlBF, *BF);
430+
++MatchedWithLTOCommonName;
399431
return true;
400432
}
401433
}
@@ -407,8 +439,10 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
407439
// partially.
408440
if (!ProfileMatched && LTOProfiles.size() == 1 && Functions.size() == 1 &&
409441
!LTOProfiles.front()->Used &&
410-
!ProfiledFunctions.count(*Functions.begin()))
442+
!ProfiledFunctions.count(*Functions.begin())) {
411443
matchProfileToFunction(*LTOProfiles.front(), **Functions.begin());
444+
++MatchedWithLTOCommonName;
445+
}
412446
}
413447

414448
for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs))
@@ -420,6 +454,15 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
420454
errs() << "BOLT-WARNING: profile ignored for function " << YamlBF.Name
421455
<< '\n';
422456

457+
if (opts::Verbosity >= 2) {
458+
outs() << "BOLT-INFO: matched " << MatchedWithExactName
459+
<< " functions with identical names\n";
460+
outs() << "BOLT-INFO: matched " << MatchedWithHash
461+
<< " functions with hash\n";
462+
outs() << "BOLT-INFO: matched " << MatchedWithLTOCommonName
463+
<< " functions with matching LTO common names\n";
464+
}
465+
423466
// Set for parseFunctionProfile().
424467
NormalizeByInsnCount = usesEvent("cycles") || usesEvent("instructions");
425468
NormalizeByCalls = usesEvent("branches");
@@ -439,6 +482,11 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
439482

440483
BC.setNumUnusedProfiledObjects(NumUnused);
441484

485+
if (opts::Lite)
486+
for (BinaryFunction *BF : BC.getAllBinaryFunctions())
487+
if (!BF->hasProfile())
488+
BF->setIgnored();
489+
442490
return Error::success();
443491
}
444492

bolt/lib/Rewrite/RewriteInstance.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ extern cl::opt<bool> Hugify;
8282
extern cl::opt<bool> Instrument;
8383
extern cl::opt<JumpTableSupportLevel> JumpTables;
8484
extern cl::opt<bool> KeepNops;
85+
extern cl::opt<bool> MatchProfileWithFunctionHash;
8586
extern cl::list<std::string> ReorderData;
8687
extern cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions;
8788
extern cl::opt<bool> TerminalTrap;
@@ -2982,6 +2983,9 @@ void RewriteInstance::selectFunctionsToProcess() {
29822983
if (mustSkip(Function))
29832984
return false;
29842985

2986+
if (opts::MatchProfileWithFunctionHash)
2987+
return true;
2988+
29852989
// If the list is not empty, only process functions from the list.
29862990
if (!opts::ForceFunctionNames.empty() || !ForceFunctionsNR.empty()) {
29872991
// Regex check (-funcs and -funcs-file options).

bolt/lib/Utils/CommandLineOpts.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,11 @@ cl::opt<bool>
128128
cl::desc("instrument code to generate accurate profile data"),
129129
cl::cat(BoltOptCategory));
130130

131+
cl::opt<bool>
132+
MatchProfileWithFunctionHash("match-profile-with-function-hash",
133+
cl::desc("Match profile with function hash"),
134+
cl::Hidden, cl::cat(BoltCategory));
135+
131136
cl::opt<std::string>
132137
OutputFilename("o",
133138
cl::desc("<output file>"),
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
## Tests function matching in YAMLProfileReader by function hash.
2+
3+
# REQUIRES: system-linux
4+
# RUN: split-file %s %t
5+
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %t/main.s -o %t.o
6+
# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib
7+
# RUN: llvm-bolt %t.exe -o %t.out --data %t/yaml -v=2 \
8+
# RUN: --print-cfg --match-profile-with-function-hash 2>&1 --profile-ignore-hash=0 | FileCheck %s
9+
10+
# CHECK: BOLT-INFO: matched 1 functions with hash
11+
12+
#--- main.s
13+
.globl main
14+
.type main, @function
15+
main:
16+
.cfi_startproc
17+
.LBB00:
18+
pushq %rbp
19+
movq %rsp, %rbp
20+
subq $16, %rsp
21+
testq %rax, %rax
22+
js .LBB03
23+
.LBB01:
24+
jne .LBB04
25+
.LBB02:
26+
nop
27+
.LBB03:
28+
xorl %eax, %eax
29+
addq $16, %rsp
30+
popq %rbp
31+
retq
32+
.LBB04:
33+
xorl %eax, %eax
34+
addq $16, %rsp
35+
popq %rbp
36+
retq
37+
## For relocations against .text
38+
.LBB05:
39+
call exit
40+
.cfi_endproc
41+
.size main, .-main
42+
43+
#--- yaml
44+
---
45+
header:
46+
profile-version: 1
47+
binary-name: 'hashing-based-function-matching.s.tmp.exe'
48+
binary-build-id: '<unknown>'
49+
profile-flags: [ lbr ]
50+
profile-origin: branch profile reader
51+
profile-events: ''
52+
dfs-order: false
53+
hash-func: xxh3
54+
functions:
55+
- name: main2
56+
fid: 0
57+
hash: 0x72F82DEAA6FE65FB
58+
exec: 1
59+
nblocks: 6
60+
blocks:
61+
- bid: 1
62+
insns: 1
63+
succ: [ { bid: 3, cnt: 1} ]
64+
...

0 commit comments

Comments
 (0)