Skip to content

Commit 96b5e09

Browse files
authored
[BOLT] Support instrumentation hook via DT_FINI_ARRAY (llvm#67348)
BOLT currently hooks its its instrumentation finalization function via `DT_FINI`. However, this method of calling finalization routines is not supported anymore on newer ABIs like RISC-V. `DT_FINI_ARRAY` is preferred there. This patch adds support for hooking into `DT_FINI_ARRAY` instead if the binary does not have a `DT_FINI` entry. If it does, `DT_FINI` takes precedence so this patch should not change how the currently supported instrumentation targets behave. `DT_FINI_ARRAY` points to an array in memory of `DT_FINI_ARRAYSZ` bytes. It consists of pointer-length entries that contain the addresses of finalization functions. However, the addresses are only filled-in by the dynamic linker at load time using relative relocations. This makes hooking via `DT_FINI_ARRAY` a bit more complicated than via `DT_FINI`. The implementation works as follows: - While scanning the binary: find the section where `DT_FINI_ARRAY` points to, read its first dynamic relocation and use its addend to find the address of the fini function we will use to hook; - While writing the output file: overwrite the addend of the dynamic relocation with the address of the runtime library's fini function. Updating the dynamic relocation required a bit of boiler plate: since dynamic relocations are stored in a `std::multiset` which doesn't support getting mutable references to its items, functions were added to `BinarySection` to take an existing relocation and insert a new one.
1 parent 8d72079 commit 96b5e09

File tree

8 files changed

+287
-10
lines changed

8 files changed

+287
-10
lines changed

bolt/include/bolt/Core/BinaryContext.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -680,6 +680,15 @@ class BinaryContext {
680680
/// the execution of the binary is completed.
681681
std::optional<uint64_t> FiniFunctionAddress;
682682

683+
/// DT_FINI.
684+
std::optional<uint64_t> FiniAddress;
685+
686+
/// DT_FINI_ARRAY. Only used when DT_FINI is not set.
687+
std::optional<uint64_t> FiniArrayAddress;
688+
689+
/// DT_FINI_ARRAYSZ. Only used when DT_FINI is not set.
690+
std::optional<uint64_t> FiniArraySize;
691+
683692
/// Page alignment used for code layout.
684693
uint64_t PageAlign{HugePageSize};
685694

bolt/include/bolt/Core/BinarySection.h

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -375,8 +375,12 @@ class BinarySection {
375375
/// Add a dynamic relocation at the given /p Offset.
376376
void addDynamicRelocation(uint64_t Offset, MCSymbol *Symbol, uint64_t Type,
377377
uint64_t Addend, uint64_t Value = 0) {
378-
assert(Offset < getSize() && "offset not within section bounds");
379-
DynamicRelocations.emplace(Relocation{Offset, Symbol, Type, Addend, Value});
378+
addDynamicRelocation(Relocation{Offset, Symbol, Type, Addend, Value});
379+
}
380+
381+
void addDynamicRelocation(const Relocation &Reloc) {
382+
assert(Reloc.Offset < getSize() && "offset not within section bounds");
383+
DynamicRelocations.emplace(Reloc);
380384
}
381385

382386
/// Add relocation against the original contents of this section.
@@ -410,6 +414,18 @@ class BinarySection {
410414
return Itr != DynamicRelocations.end() ? &*Itr : nullptr;
411415
}
412416

417+
std::optional<Relocation> takeDynamicRelocationAt(uint64_t Offset) {
418+
Relocation Key{Offset, 0, 0, 0, 0};
419+
auto Itr = DynamicRelocations.find(Key);
420+
421+
if (Itr == DynamicRelocations.end())
422+
return std::nullopt;
423+
424+
Relocation Reloc = *Itr;
425+
DynamicRelocations.erase(Itr);
426+
return Reloc;
427+
}
428+
413429
uint64_t hash(const BinaryData &BD) const {
414430
std::map<const BinaryData *, uint64_t> Cache;
415431
return hash(BD, Cache);

bolt/include/bolt/Rewrite/RewriteInstance.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,15 @@ class RewriteInstance {
9595
/// from meta data in the file.
9696
void discoverFileObjects();
9797

98+
/// Check whether we should use DT_FINI or DT_FINI_ARRAY for instrumentation.
99+
/// DT_FINI is preferred; DT_FINI_ARRAY is only used when no DT_FINI entry was
100+
/// found.
101+
Error discoverRtFiniAddress();
102+
103+
/// If DT_FINI_ARRAY is used for instrumentation, update the relocation of its
104+
/// first entry to point to the instrumentation library's fini address.
105+
void updateRtFiniReloc();
106+
98107
/// Create and initialize metadata rewriters for this instance.
99108
void initializeMetadataManager();
100109

bolt/lib/Core/Relocation.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -365,7 +365,9 @@ static uint64_t encodeValueAArch64(uint64_t Type, uint64_t Value, uint64_t PC) {
365365
switch (Type) {
366366
default:
367367
llvm_unreachable("unsupported relocation");
368+
case ELF::R_AARCH64_ABS16:
368369
case ELF::R_AARCH64_ABS32:
370+
case ELF::R_AARCH64_ABS64:
369371
break;
370372
case ELF::R_AARCH64_PREL16:
371373
case ELF::R_AARCH64_PREL32:

bolt/lib/Rewrite/RewriteInstance.cpp

Lines changed: 85 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -704,6 +704,10 @@ Error RewriteInstance::run() {
704704
adjustCommandLineOptions();
705705
discoverFileObjects();
706706

707+
if (opts::Instrument && !BC->IsStaticExecutable)
708+
if (Error E = discoverRtFiniAddress())
709+
return E;
710+
707711
preprocessProfileData();
708712

709713
// Skip disassembling if we have a translation table and we are running an
@@ -740,6 +744,9 @@ Error RewriteInstance::run() {
740744

741745
updateMetadata();
742746

747+
if (opts::Instrument && !BC->IsStaticExecutable)
748+
updateRtFiniReloc();
749+
743750
if (opts::LinuxKernelMode) {
744751
errs() << "BOLT-WARNING: not writing the output file for Linux Kernel\n";
745752
return Error::success();
@@ -1280,6 +1287,77 @@ void RewriteInstance::discoverFileObjects() {
12801287
registerFragments();
12811288
}
12821289

1290+
Error RewriteInstance::discoverRtFiniAddress() {
1291+
// Use DT_FINI if it's available.
1292+
if (BC->FiniAddress) {
1293+
BC->FiniFunctionAddress = BC->FiniAddress;
1294+
return Error::success();
1295+
}
1296+
1297+
if (!BC->FiniArrayAddress || !BC->FiniArraySize) {
1298+
return createStringError(
1299+
std::errc::not_supported,
1300+
"Instrumentation needs either DT_FINI or DT_FINI_ARRAY");
1301+
}
1302+
1303+
if (*BC->FiniArraySize < BC->AsmInfo->getCodePointerSize()) {
1304+
return createStringError(std::errc::not_supported,
1305+
"Need at least 1 DT_FINI_ARRAY slot");
1306+
}
1307+
1308+
ErrorOr<BinarySection &> FiniArraySection =
1309+
BC->getSectionForAddress(*BC->FiniArrayAddress);
1310+
if (auto EC = FiniArraySection.getError())
1311+
return errorCodeToError(EC);
1312+
1313+
if (const Relocation *Reloc = FiniArraySection->getDynamicRelocationAt(0)) {
1314+
BC->FiniFunctionAddress = Reloc->Addend;
1315+
return Error::success();
1316+
}
1317+
1318+
if (const Relocation *Reloc = FiniArraySection->getRelocationAt(0)) {
1319+
BC->FiniFunctionAddress = Reloc->Value;
1320+
return Error::success();
1321+
}
1322+
1323+
return createStringError(std::errc::not_supported,
1324+
"No relocation for first DT_FINI_ARRAY slot");
1325+
}
1326+
1327+
void RewriteInstance::updateRtFiniReloc() {
1328+
// Updating DT_FINI is handled by patchELFDynamic.
1329+
if (BC->FiniAddress)
1330+
return;
1331+
1332+
const RuntimeLibrary *RT = BC->getRuntimeLibrary();
1333+
if (!RT || !RT->getRuntimeFiniAddress())
1334+
return;
1335+
1336+
assert(BC->FiniArrayAddress && BC->FiniArraySize &&
1337+
"inconsistent .fini_array state");
1338+
1339+
ErrorOr<BinarySection &> FiniArraySection =
1340+
BC->getSectionForAddress(*BC->FiniArrayAddress);
1341+
assert(FiniArraySection && ".fini_array removed");
1342+
1343+
if (std::optional<Relocation> Reloc =
1344+
FiniArraySection->takeDynamicRelocationAt(0)) {
1345+
assert(Reloc->Addend == BC->FiniFunctionAddress &&
1346+
"inconsistent .fini_array dynamic relocation");
1347+
Reloc->Addend = RT->getRuntimeFiniAddress();
1348+
FiniArraySection->addDynamicRelocation(*Reloc);
1349+
}
1350+
1351+
// Update the static relocation by adding a pending relocation which will get
1352+
// patched when flushPendingRelocations is called in rewriteFile. Note that
1353+
// flushPendingRelocations will calculate the value to patch as
1354+
// "Symbol + Addend". Since we don't have a symbol, just set the addend to the
1355+
// desired value.
1356+
FiniArraySection->addPendingRelocation(Relocation{
1357+
/*Offset*/ 0, /*Symbol*/ nullptr, /*Type*/ Relocation::getAbs64(),
1358+
/*Addend*/ RT->getRuntimeFiniAddress(), /*Value*/ 0});
1359+
}
1360+
12831361
void RewriteInstance::registerFragments() {
12841362
if (!BC->HasSplitFunctions)
12851363
return;
@@ -5135,7 +5213,13 @@ Error RewriteInstance::readELFDynamic(ELFObjectFile<ELFT> *File) {
51355213
}
51365214
break;
51375215
case ELF::DT_FINI:
5138-
BC->FiniFunctionAddress = Dyn.getPtr();
5216+
BC->FiniAddress = Dyn.getPtr();
5217+
break;
5218+
case ELF::DT_FINI_ARRAY:
5219+
BC->FiniArrayAddress = Dyn.getPtr();
5220+
break;
5221+
case ELF::DT_FINI_ARRAYSZ:
5222+
BC->FiniArraySize = Dyn.getPtr();
51395223
break;
51405224
case ELF::DT_RELA:
51415225
DynamicRelocationsAddress = Dyn.getPtr();

bolt/lib/RuntimeLibs/InstrumentationRuntimeLibrary.cpp

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -65,13 +65,6 @@ void InstrumentationRuntimeLibrary::adjustCommandLineOptions(
6565
exit(1);
6666
}
6767

68-
if (!BC.FiniFunctionAddress && !BC.IsStaticExecutable) {
69-
errs() << "BOLT-ERROR: input binary lacks DT_FINI entry in the dynamic "
70-
"section but instrumentation currently relies on patching "
71-
"DT_FINI to write the profile\n";
72-
exit(1);
73-
}
74-
7568
if ((opts::InstrumentationWaitForks || opts::InstrumentationSleepTime) &&
7669
opts::InstrumentationFileAppendPID) {
7770
errs()

bolt/test/AArch64/hook-fini.s

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
## Test the different ways of hooking the fini function for instrumentation (via
2+
## DT_FINI and via DT_FINI_ARRAY). We test the latter for both PIE and non-PIE
3+
## binaries because of the different ways of handling relocations (static or
4+
## dynamic).
5+
## All tests perform the following steps:
6+
## - Compile and link for the case to be tested
7+
## - Some sanity-checks on the dynamic section and relocations in the binary to
8+
## verify it has the shape we want for testing:
9+
## - DT_FINI or DT_FINI_ARRAY in dynamic section
10+
## - No relative relocations for non-PIE
11+
## - Instrument
12+
## - Verify generated binary
13+
# REQUIRES: system-linux,bolt-runtime,target=aarch64{{.*}}
14+
15+
# RUN: %clang %cflags -pie %s -Wl,-q -o %t.exe
16+
# RUN: llvm-readelf -d %t.exe | FileCheck --check-prefix=DYN-FINI %s
17+
# RUN: llvm-readelf -r %t.exe | FileCheck --check-prefix=RELOC-PIE %s
18+
# RUN: llvm-bolt %t.exe -o %t --instrument
19+
# RUN: llvm-readelf -drs %t | FileCheck --check-prefix=CHECK-FINI %s
20+
21+
# RUN: %clang %cflags -pie %s -Wl,-q,-fini=0 -o %t-no-fini.exe
22+
# RUN: llvm-readelf -d %t-no-fini.exe | FileCheck --check-prefix=DYN-NO-FINI %s
23+
# RUN: llvm-readelf -r %t-no-fini.exe | FileCheck --check-prefix=RELOC-PIE %s
24+
# RUN: llvm-bolt %t-no-fini.exe -o %t-no-fini --instrument
25+
# RUN: llvm-readelf -drs %t-no-fini | FileCheck --check-prefix=CHECK-NO-FINI %s
26+
# RUN: llvm-readelf -ds -x .fini_array %t-no-fini | FileCheck --check-prefix=CHECK-NO-FINI-RELOC %s
27+
28+
## Create a dummy shared library to link against to force creation of the dynamic section.
29+
# RUN: %clang %cflags %p/../Inputs/stub.c -fPIC -shared -o %t-stubs.so
30+
# RUN: %clang %cflags %s -no-pie -Wl,-q,-fini=0 %t-stub.so -o %t-no-pie-no-fini.exe
31+
# RUN: llvm-readelf -r %t-no-pie-no-fini.exe | FileCheck --check-prefix=RELOC-NO-PIE %s
32+
# RUN: llvm-bolt %t-no-pie-no-fini.exe -o %t-no-pie-no-fini --instrument
33+
# RUN: llvm-readelf -ds -x .fini_array %t-no-pie-no-fini | FileCheck --check-prefix=CHECK-NO-PIE-NO-FINI %s
34+
35+
## With fini: dynamic section should contain DT_FINI
36+
# DYN-FINI: (FINI)
37+
38+
## Without fini: dynamic section should only contain DT_FINI_ARRAY
39+
# DYN-NO-FINI-NOT: (FINI)
40+
# DYN-NO-FINI: (FINI_ARRAY)
41+
# DYN-NO-FINI: (FINI_ARRAYSZ)
42+
43+
## With PIE: binary should have relative relocations
44+
# RELOC-PIE: R_AARCH64_RELATIVE
45+
46+
## Without PIE: binary should not have relative relocations
47+
# RELOC-NO-PIE-NOT: R_AARCH64_RELATIVE
48+
49+
## Check that DT_FINI is set to __bolt_runtime_fini
50+
# CHECK-FINI: Dynamic section at offset {{.*}} contains {{.*}} entries:
51+
# CHECK-FINI-DAG: (FINI) 0x[[FINI:[[:xdigit:]]+]]
52+
# CHECK-FINI-DAG: (FINI_ARRAY) 0x[[FINI_ARRAY:[[:xdigit:]]+]]
53+
## Check that the dynamic relocation at .fini_array was not patched
54+
# CHECK-FINI: Relocation section '.rela.dyn' at offset {{.*}} contains {{.*}} entries
55+
# CHECK-FINI-NOT: {{0+}}[[FINI_ARRAY]] {{.*}} R_AARCH64_RELATIVE [[FINI]]
56+
# CHECK-FINI: Symbol table '.symtab' contains {{.*}} entries:
57+
# CHECK-FINI: {{0+}}[[FINI]] {{.*}} __bolt_runtime_fini
58+
59+
## Check that DT_FINI_ARRAY has a dynamic relocation for __bolt_runtime_fini
60+
# CHECK-NO-FINI: Dynamic section at offset {{.*}} contains {{.*}} entries:
61+
# CHECK-NO-FINI-NOT: (FINI)
62+
# CHECK-NO-FINI: (FINI_ARRAY) 0x[[FINI_ARRAY:[[:xdigit:]]+]]
63+
# CHECK-NO-FINI: Relocation section '.rela.dyn' at offset {{.*}} contains {{.*}} entries
64+
# CHECK-NO-FINI: {{0+}}[[FINI_ARRAY]] {{.*}} R_AARCH64_RELATIVE [[FINI_ADDR:[[:xdigit:]]+]]
65+
# CHECK-NO-FINI: Symbol table '.symtab' contains {{.*}} entries:
66+
# CHECK-NO-FINI: {{0+}}[[FINI_ADDR]] {{.*}} __bolt_runtime_fini
67+
68+
## Check that the static relocation in .fini_array is patched even for PIE
69+
# CHECK-NO-FINI-RELOC: Dynamic section at offset {{.*}} contains {{.*}} entries:
70+
# CHECK-NO-FINI-RELOC: (FINI_ARRAY) 0x[[FINI_ARRAY:[[:xdigit:]]+]]
71+
# CHECK-NO-FINI-RELOC: Symbol table '.symtab' contains {{.*}} entries:
72+
## Read bytes separately so we can reverse them later
73+
# CHECK-NO-FINI-RELOC: {{0+}}[[FINI_ADDR_B0:[[:xdigit:]]{2}]][[FINI_ADDR_B1:[[:xdigit:]]{2}]][[FINI_ADDR_B2:[[:xdigit:]]{2}]][[FINI_ADDR_B3:[[:xdigit:]]{2}]] {{.*}} __bolt_runtime_fini
74+
# CHECK-NO-FINI-RELOC: Hex dump of section '.fini_array':
75+
# CHECK-NO-FINI-RELOC: 0x{{0+}}[[FINI_ARRAY]] [[FINI_ADDR_B3]][[FINI_ADDR_B2]][[FINI_ADDR_B1]][[FINI_ADDR_B0]] 00000000
76+
77+
## Check that DT_FINI_ARRAY has static relocation applied for __bolt_runtime_fini
78+
# CHECK-NO-PIE-NO-FINI: Dynamic section at offset {{.*}} contains {{.*}} entries:
79+
# CHECK-NO-PIE-NO-FINI-NOT: (FINI)
80+
# CHECK-NO-PIE-NO-FINI: (FINI_ARRAY) 0x[[FINI_ARRAY:[a-f0-9]+]]
81+
# CHECK-NO-PIE-NO-FINI: Symbol table '.symtab' contains {{.*}} entries:
82+
## Read address bytes separately so we can reverse them later
83+
# CHECK-NO-PIE-NO-FINI: {{0+}}[[FINI_ADDR_B0:[[:xdigit:]]{2}]][[FINI_ADDR_B1:[[:xdigit:]]{2}]][[FINI_ADDR_B2:[[:xdigit:]]{2}]][[FINI_ADDR_B3:[[:xdigit:]]{2}]] {{.*}} __bolt_runtime_fini
84+
# CHECK-NO-PIE-NO-FINI: Hex dump of section '.fini_array':
85+
# CHECK-NO-PIE-NO-FINI: 0x{{0+}}[[FINI_ARRAY]] [[FINI_ADDR_B3]][[FINI_ADDR_B2]][[FINI_ADDR_B1]][[FINI_ADDR_B0]] 00000000
86+
87+
.globl _start
88+
.type _start, %function
89+
_start:
90+
# Dummy relocation to force relocation mode.
91+
.reloc 0, R_AARCH64_NONE
92+
ret
93+
.size _start, .-_start
94+
95+
.globl _fini
96+
.type _fini, %function
97+
_fini:
98+
ret
99+
.size _fini, .-_fini
100+
101+
.section .fini_array,"aw"
102+
.align 3
103+
.dword _fini
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
# Test the different ways of hooking the fini function for instrumentation (via
2+
# DT_FINI and via DT_FINI_ARRAY). We test the latter for both PIE and non-PIE
3+
# binaries because of the different ways of handling relocations (static or
4+
# dynamic).
5+
# All tests perform the following steps:
6+
# - Compile and link for the case to be tested
7+
# - Some sanity-checks on the dynamic section and relocations in the binary to
8+
# verify it has the shape we want for testing:
9+
# - DT_FINI or DT_FINI_ARRAY in dynamic section
10+
# - No relative relocations for non-PIE
11+
# - Instrument
12+
# - Run instrumented binary
13+
# - Verify generated profile
14+
REQUIRES: system-linux,bolt-runtime
15+
16+
RUN: %clang %cflags -pie %p/Inputs/basic-instrumentation.s -Wl,-q -o %t.exe
17+
RUN: llvm-readelf -d %t.exe | FileCheck --check-prefix=DYN-FINI %s
18+
RUN: llvm-readelf -r %t.exe | FileCheck --check-prefix=RELOC-PIE %s
19+
RUN: llvm-bolt %t.exe -o %t --instrument \
20+
RUN: --instrumentation-file=%t \
21+
RUN: --instrumentation-file-append-pid
22+
RUN: rm -f %t.*.fdata
23+
RUN: %t
24+
RUN: cat %t.*.fdata | FileCheck %s
25+
26+
RUN: %clang %cflags -pie %p/Inputs/basic-instrumentation.s -Wl,-q,-fini=0 -o %t-no-fini.exe
27+
RUN: llvm-readelf -d %t-no-fini.exe | FileCheck --check-prefix=DYN-NO-FINI %s
28+
RUN: llvm-readelf -r %t-no-fini.exe | FileCheck --check-prefix=RELOC-PIE %s
29+
RUN: llvm-bolt %t-no-fini.exe -o %t-no-fini --instrument \
30+
RUN: --instrumentation-file=%t-no-fini \
31+
RUN: --instrumentation-file-append-pid
32+
RUN: rm -f %t-no-fini.*.fdata
33+
RUN: %t-no-fini
34+
RUN: cat %t-no-fini.*.fdata | FileCheck %s
35+
36+
RUN: %clang %cflags -no-pie %p/Inputs/basic-instrumentation.s -Wl,-q,-fini=0 -o %t-no-pie-no-fini.exe
37+
RUN: llvm-readelf -d %t-no-pie-no-fini.exe | FileCheck --check-prefix=DYN-NO-FINI %s
38+
RUN: llvm-readelf -r %t-no-pie-no-fini.exe | FileCheck --check-prefix=RELOC-NO-PIE %s
39+
RUN: llvm-bolt %t-no-pie-no-fini.exe -o %t-no-pie-no-fini --instrument \
40+
RUN: --instrumentation-file=%t-no-pie-no-fini \
41+
RUN: --instrumentation-file-append-pid
42+
RUN: rm -f %t-no-pie-no-fini.*.fdata
43+
RUN: %t-no-pie-no-fini
44+
RUN: cat %t-no-pie-no-fini.*.fdata | FileCheck %s
45+
46+
# With fini: dynamic section should contain DT_FINI
47+
DYN-FINI: (FINI)
48+
49+
# Without fini: dynamic section should only contain DT_FINI_ARRAY
50+
DYN-NO-FINI-NOT: (FINI)
51+
DYN-NO-FINI: (FINI_ARRAY)
52+
DYN-NO-FINI: (FINI_ARRAYSZ)
53+
54+
# With PIE: binary should have relative relocations
55+
RELOC-PIE: R_AARCH64_RELATIVE
56+
57+
# Without PIE: binary should not have relative relocations
58+
RELOC-NO-PIE-NOT: R_AARCH64_RELATIVE
59+
60+
# The instrumented profile should at least say main was called once
61+
CHECK: main 0 0 1{{$}}

0 commit comments

Comments
 (0)