Skip to content

Commit 5771c98

Browse files
committed
[XRay] Change xray_instr_map sled addresses from absolute to PC relative for x86-64
xray_instr_map contains absolute addresses of sleds, which are relocated by `R_*_RELATIVE` when linked in -pie or -shared mode. By making these addresses relative to PC, we can avoid the dynamic relocations and remove the SHF_WRITE flag from xray_instr_map. We can thus save VM pages containg xray_instr_map (because they are not modified). This patch changes x86-64 and bumps the sled version to 2. Subsequent changes will change powerpc64le and AArch64. Reviewed By: dberris, ianlevesque Differential Revision: https://reviews.llvm.org/D78082
1 parent 44a8c54 commit 5771c98

File tree

10 files changed

+105
-64
lines changed

10 files changed

+105
-64
lines changed

compiler-rt/lib/xray/xray_interface.cpp

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -264,14 +264,14 @@ XRayPatchingStatus controlPatching(bool Enable) XRAY_NEVER_INSTRUMENT {
264264
// now we're assuming we can mprotect the whole section of text between the
265265
// minimum sled address and the maximum sled address (+ the largest sled
266266
// size).
267-
auto MinSled = InstrMap.Sleds[0];
268-
auto MaxSled = InstrMap.Sleds[InstrMap.Entries - 1];
267+
auto *MinSled = &InstrMap.Sleds[0];
268+
auto *MaxSled = &InstrMap.Sleds[InstrMap.Entries - 1];
269269
for (std::size_t I = 0; I < InstrMap.Entries; I++) {
270270
const auto &Sled = InstrMap.Sleds[I];
271-
if (Sled.Address < MinSled.Address)
272-
MinSled = Sled;
273-
if (Sled.Address > MaxSled.Address)
274-
MaxSled = Sled;
271+
if (Sled.address() < MinSled->address())
272+
MinSled = &Sled;
273+
if (Sled.address() > MaxSled->address())
274+
MaxSled = &Sled;
275275
}
276276

277277
const size_t PageSize = flags()->xray_page_size_override > 0
@@ -283,9 +283,10 @@ XRayPatchingStatus controlPatching(bool Enable) XRAY_NEVER_INSTRUMENT {
283283
}
284284

285285
void *PageAlignedAddr =
286-
reinterpret_cast<void *>(MinSled.Address & ~(PageSize - 1));
286+
reinterpret_cast<void *>(MinSled->address() & ~(PageSize - 1));
287287
size_t MProtectLen =
288-
(MaxSled.Address - reinterpret_cast<uptr>(PageAlignedAddr)) + cSledLength;
288+
(MaxSled->address() - reinterpret_cast<uptr>(PageAlignedAddr)) +
289+
cSledLength;
289290
MProtectHelper Protector(PageAlignedAddr, MProtectLen, PageSize);
290291
if (Protector.MakeWriteable() == -1) {
291292
Report("Failed mprotect: %d\n", errno);
@@ -337,20 +338,21 @@ XRayPatchingStatus mprotectAndPatchFunction(int32_t FuncId,
337338
auto SledRange = InstrMap.SledsIndex[FuncId - 1];
338339
auto *f = SledRange.Begin;
339340
auto *e = SledRange.End;
340-
auto MinSled = *f;
341-
auto MaxSled = *(SledRange.End - 1);
341+
auto *MinSled = f;
342+
auto *MaxSled = (SledRange.End - 1);
342343
while (f != e) {
343-
if (f->Address < MinSled.Address)
344-
MinSled = *f;
345-
if (f->Address > MaxSled.Address)
346-
MaxSled = *f;
344+
if (f->address() < MinSled->address())
345+
MinSled = f;
346+
if (f->address() > MaxSled->address())
347+
MaxSled = f;
347348
++f;
348349
}
349350

350351
void *PageAlignedAddr =
351-
reinterpret_cast<void *>(MinSled.Address & ~(PageSize - 1));
352+
reinterpret_cast<void *>(MinSled->address() & ~(PageSize - 1));
352353
size_t MProtectLen =
353-
(MaxSled.Address - reinterpret_cast<uptr>(PageAlignedAddr)) + cSledLength;
354+
(MaxSled->address() - reinterpret_cast<uptr>(PageAlignedAddr)) +
355+
cSledLength;
354356
MProtectHelper Protector(PageAlignedAddr, MProtectLen, PageSize);
355357
if (Protector.MakeWriteable() == -1) {
356358
Report("Failed mprotect: %d\n", errno);

compiler-rt/lib/xray/xray_interface_internal.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,26 @@ struct XRaySledEntry {
2929
unsigned char AlwaysInstrument;
3030
unsigned char Version;
3131
unsigned char Padding[13]; // Need 32 bytes
32+
uint64_t address() const {
33+
#ifndef __x86_64__
34+
// R_MIPS_PC64 does not exist. Use absolute address even for version 2.
35+
return Address;
36+
#else
37+
// TODO Eventually all targets but MIPS64 should take this branch.
38+
if (Version < 2)
39+
return Address;
40+
// The target address is relative to the location of the Address variable.
41+
return reinterpret_cast<uint64_t>(&Address) + Address;
42+
#endif
43+
}
3244
#elif SANITIZER_WORDSIZE == 32
3345
uint32_t Address;
3446
uint32_t Function;
3547
unsigned char Kind;
3648
unsigned char AlwaysInstrument;
3749
unsigned char Version;
3850
unsigned char Padding[5]; // Need 16 bytes
51+
uint32_t address() const { return Address; }
3952
#else
4053
#error "Unsupported word size."
4154
#endif

compiler-rt/lib/xray/xray_x86_64.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
151151
// opcode and first operand.
152152
//
153153
// Prerequisite is to compute the relative offset to the trampoline's address.
154-
const uint64_t Address = Sled.Address;
154+
const uint64_t Address = Sled.address();
155155
int64_t TrampolineOffset = reinterpret_cast<int64_t>(Trampoline) -
156156
(static_cast<int64_t>(Address) + 11);
157157
if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
@@ -197,7 +197,7 @@ bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
197197
//
198198
// Prerequisite is to compute the relative offset fo the
199199
// __xray_FunctionExit function's address.
200-
const uint64_t Address = Sled.Address;
200+
const uint64_t Address = Sled.address();
201201
int64_t TrampolineOffset = reinterpret_cast<int64_t>(__xray_FunctionExit) -
202202
(static_cast<int64_t>(Address) + 11);
203203
if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
@@ -225,7 +225,7 @@ bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
225225
const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
226226
// Here we do the dance of replacing the tail call sled with a similar
227227
// sequence as the entry sled, but calls the tail exit sled instead.
228-
const uint64_t Address = Sled.Address;
228+
const uint64_t Address = Sled.address();
229229
int64_t TrampolineOffset =
230230
reinterpret_cast<int64_t>(__xray_FunctionTailExit) -
231231
(static_cast<int64_t>(Address) + 11);
@@ -270,19 +270,20 @@ bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
270270
//
271271
// ---
272272
//
273-
// In Version 1:
273+
// In Version 1 or 2:
274274
//
275275
// The jump offset is now 15 bytes (0x0f), so when restoring the nopw back
276276
// to a jmp, use 15 bytes instead.
277277
//
278-
const uint64_t Address = Sled.Address;
278+
const uint64_t Address = Sled.address();
279279
if (Enable) {
280280
std::atomic_store_explicit(
281281
reinterpret_cast<std::atomic<uint16_t> *>(Address), NopwSeq,
282282
std::memory_order_release);
283283
} else {
284284
switch (Sled.Version) {
285285
case 1:
286+
case 2:
286287
std::atomic_store_explicit(
287288
reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp15Seq,
288289
std::memory_order_release);
@@ -317,7 +318,7 @@ bool patchTypedEvent(const bool Enable, const uint32_t FuncId,
317318
// unstashes the registers and returns. If the arguments are already in
318319
// the correct registers, the stashing and unstashing become equivalently
319320
// sized nops.
320-
const uint64_t Address = Sled.Address;
321+
const uint64_t Address = Sled.address();
321322
if (Enable) {
322323
std::atomic_store_explicit(
323324
reinterpret_cast<std::atomic<uint16_t> *>(Address), NopwSeq,

llvm/include/llvm/CodeGen/AsmPrinter.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -286,7 +286,7 @@ class AsmPrinter : public MachineFunctionPass {
286286
const class Function *Fn;
287287
uint8_t Version;
288288

289-
void emit(int, MCStreamer *, const MCSymbol *) const;
289+
void emit(int, MCStreamer *, const MCExpr *, const MCSymbol *) const;
290290
};
291291

292292
// All the sleds to be emitted.

llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3188,8 +3188,12 @@ void AsmPrinterHandler::markFunctionEnd() {}
31883188
// describes each instrumentation point. When XRay patches your code, the index
31893189
// into this table will be given to your handler as a patch point identifier.
31903190
void AsmPrinter::XRayFunctionEntry::emit(int Bytes, MCStreamer *Out,
3191+
const MCExpr *Location,
31913192
const MCSymbol *CurrentFnSym) const {
3192-
Out->emitSymbolValue(Sled, Bytes);
3193+
if (Location)
3194+
Out->emitValueImpl(Location, Bytes);
3195+
else
3196+
Out->emitSymbolValue(Sled, Bytes);
31933197
Out->emitSymbolValue(CurrentFnSym, Bytes);
31943198
auto Kind8 = static_cast<uint8_t>(Kind);
31953199
Out->emitBinaryData(StringRef(reinterpret_cast<const char *>(&Kind8), 1));
@@ -3209,9 +3213,14 @@ void AsmPrinter::emitXRayTable() {
32093213
const Function &F = MF->getFunction();
32103214
MCSection *InstMap = nullptr;
32113215
MCSection *FnSledIndex = nullptr;
3212-
if (MF->getSubtarget().getTargetTriple().isOSBinFormatELF()) {
3216+
const Triple &TT = TM.getTargetTriple();
3217+
// Version 2 uses a PC-relative address on all supported targets.
3218+
bool PCRel = TT.isX86();
3219+
if (TT.isOSBinFormatELF()) {
32133220
auto LinkedToSym = cast<MCSymbolELF>(CurrentFnSym);
3214-
auto Flags = ELF::SHF_WRITE | ELF::SHF_ALLOC | ELF::SHF_LINK_ORDER;
3221+
auto Flags = ELF::SHF_ALLOC | ELF::SHF_LINK_ORDER;
3222+
if (!PCRel)
3223+
Flags |= ELF::SHF_WRITE;
32153224
StringRef GroupName;
32163225
if (F.hasComdat()) {
32173226
Flags |= ELF::SHF_GROUP;
@@ -3240,8 +3249,17 @@ void AsmPrinter::emitXRayTable() {
32403249
MCSymbol *SledsStart = OutContext.createTempSymbol("xray_sleds_start", true);
32413250
OutStreamer->SwitchSection(InstMap);
32423251
OutStreamer->emitLabel(SledsStart);
3243-
for (const auto &Sled : Sleds)
3244-
Sled.emit(WordSizeBytes, OutStreamer.get(), CurrentFnSym);
3252+
for (const auto &Sled : Sleds) {
3253+
const MCExpr *Location = nullptr;
3254+
if (PCRel) {
3255+
MCSymbol *Dot = OutContext.createTempSymbol();
3256+
OutStreamer->emitLabel(Dot);
3257+
Location = MCBinaryExpr::createSub(
3258+
MCSymbolRefExpr::create(Sled.Sled, OutContext),
3259+
MCSymbolRefExpr::create(Dot, OutContext), OutContext);
3260+
}
3261+
Sled.emit(WordSizeBytes, OutStreamer.get(), Location, CurrentFnSym);
3262+
}
32453263
MCSymbol *SledsEnd = OutContext.createTempSymbol("xray_sleds_end", true);
32463264
OutStreamer->emitLabel(SledsEnd);
32473265

llvm/lib/Target/X86/X86MCInstLower.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1508,10 +1508,10 @@ void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI,
15081508

15091509
OutStreamer->AddComment("xray custom event end.");
15101510

1511-
// Record the sled version. Older versions of this sled were spelled
1512-
// differently, so we let the runtime handle the different offsets we're
1513-
// using.
1514-
recordSled(CurSled, MI, SledKind::CUSTOM_EVENT, 1);
1511+
// Record the sled version. Version 0 of this sled was spelled differently, so
1512+
// we let the runtime handle the different offsets we're using. Version 2
1513+
// changed the absolute address to a PC-relative address.
1514+
recordSled(CurSled, MI, SledKind::CUSTOM_EVENT, 2);
15151515
}
15161516

15171517
void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI,
@@ -1612,7 +1612,7 @@ void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI,
16121612
OutStreamer->AddComment("xray typed event end.");
16131613

16141614
// Record the sled version.
1615-
recordSled(CurSled, MI, SledKind::TYPED_EVENT, 0);
1615+
recordSled(CurSled, MI, SledKind::TYPED_EVENT, 2);
16161616
}
16171617

16181618
void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI,
@@ -1652,7 +1652,7 @@ void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI,
16521652
// FIXME: Find another less hacky way do force the relative jump.
16531653
OutStreamer->emitBytes("\xeb\x09");
16541654
EmitNops(*OutStreamer, 9, Subtarget->is64Bit(), getSubtargetInfo());
1655-
recordSled(CurSled, MI, SledKind::FUNCTION_ENTER);
1655+
recordSled(CurSled, MI, SledKind::FUNCTION_ENTER, 2);
16561656
}
16571657

16581658
void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI,
@@ -1684,7 +1684,7 @@ void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI,
16841684
Ret.addOperand(MaybeOperand.getValue());
16851685
OutStreamer->emitInstruction(Ret, getSubtargetInfo());
16861686
EmitNops(*OutStreamer, 10, Subtarget->is64Bit(), getSubtargetInfo());
1687-
recordSled(CurSled, MI, SledKind::FUNCTION_EXIT);
1687+
recordSled(CurSled, MI, SledKind::FUNCTION_EXIT, 2);
16881688
}
16891689

16901690
void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI,
@@ -1708,7 +1708,7 @@ void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI,
17081708
OutStreamer->emitBytes("\xeb\x09");
17091709
EmitNops(*OutStreamer, 9, Subtarget->is64Bit(), getSubtargetInfo());
17101710
OutStreamer->emitLabel(Target);
1711-
recordSled(CurSled, MI, SledKind::TAIL_CALL);
1711+
recordSled(CurSled, MI, SledKind::TAIL_CALL, 2);
17121712

17131713
unsigned OpCode = MI.getOperand(0).getImm();
17141714
OpCode = convertTailJumpOpcode(OpCode);

llvm/test/CodeGen/X86/xray-attribute-instrumentation.ll

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,12 @@ NotEqual:
4949
}
5050
; CHECK-LABEL: xray_instr_map
5151
; CHECK-LABEL: Lxray_sleds_start1:
52-
; CHECK: .quad {{.*}}xray_sled_2
53-
; CHECK: .quad {{.*}}xray_sled_3
54-
; CHECK: .quad {{.*}}xray_sled_4
52+
; CHECK: Ltmp2:
53+
; CHECK-NEXT: .quad {{.*}}xray_sled_2-{{\.?}}Ltmp2
54+
; CHECK: Ltmp3:
55+
; CHECK-NEXT: .quad {{.*}}xray_sled_3-{{\.?}}Ltmp3
56+
; CHECK: Ltmp4:
57+
; CHECK-NEXT: .quad {{.*}}xray_sled_4-{{\.?}}Ltmp4
5558
; CHECK-LABEL: Lxray_sleds_end1:
5659
; CHECK-LABEL: xray_fn_idx
5760
; CHECK: .quad {{.*}}xray_sleds_start1

llvm/test/CodeGen/X86/xray-log-args.ll

Lines changed: 25 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -7,33 +7,37 @@ define i32 @callee(i32 %arg) nounwind noinline uwtable "function-instrument"="xr
77
ret i32 %arg
88
}
99
; CHECK-LABEL: Lxray_sleds_start0:
10-
; CHECK: .quad {{\.?}}Lxray_sled_0
11-
; CHECK: .quad {{_?}}callee
12-
; CHECK: .byte 0x03
13-
; CHECK: .byte 0x01
14-
; CHECK: .byte 0x00
15-
; CHECK: .{{(zero|space)}} 13
16-
; CHECK: .quad {{\.?}}Lxray_sled_1
17-
; CHECK: .quad {{_?}}callee
18-
; CHECK: .byte 0x01
19-
; CHECK: .byte 0x01
20-
; CHECK: .byte 0x00
10+
; CHECK-NEXT: Ltmp0:
11+
; CHECK-NEXT: .quad {{\.?}}Lxray_sled_0-{{\.?}}Ltmp0
12+
; CHECK-NEXT: .quad {{_?}}callee
13+
; CHECK-NEXT: .byte 0x03
14+
; CHECK-NEXT: .byte 0x01
15+
; CHECK-NEXT: .byte 0x02
16+
; CHECK: .{{(zero|space)}} 13
17+
; CHECK: Ltmp1:
18+
; CHECK-NEXT: .quad {{\.?}}Lxray_sled_1-{{\.?}}Ltmp1
19+
; CHECK-NEXT: .quad {{_?}}callee
20+
; CHECK-NEXT: .byte 0x01
21+
; CHECK-NEXT: .byte 0x01
22+
; CHECK-NEXT: .byte 0x02
2123
; CHECK: .{{(zero|space)}} 13
2224

2325
define i32 @caller(i32 %arg) nounwind noinline uwtable "function-instrument"="xray-always" "xray-log-args"="1" {
2426
%retval = tail call i32 @callee(i32 %arg)
2527
ret i32 %retval
2628
}
2729
; CHECK-LABEL: Lxray_sleds_start1:
28-
; CHECK: .quad {{\.?}}Lxray_sled_2
29-
; CHECK: .quad {{_?}}caller
30-
; CHECK: .byte 0x03
31-
; CHECK: .byte 0x01
32-
; CHECK: .byte 0x00
30+
; CHECK-NEXT: Ltmp3:
31+
; CHECK-NEXT: .quad {{\.?}}Lxray_sled_2-{{\.?}}Ltmp3
32+
; CHECK-NEXT: .quad {{_?}}caller
33+
; CHECK-NEXT: .byte 0x03
34+
; CHECK-NEXT: .byte 0x01
35+
; CHECK-NEXT: .byte 0x02
3336
; CHECK: .{{(zero|space)}} 13
34-
; CHECK: .quad {{\.?}}Lxray_sled_3
35-
; CHECK: .quad {{_?}}caller
36-
; CHECK: .byte 0x02
37-
; CHECK: .byte 0x01
38-
; CHECK: .byte 0x00
37+
; CHECK: Ltmp4:
38+
; CHECK-NEXT: .quad {{\.?}}Lxray_sled_3-{{\.?}}Ltmp4
39+
; CHECK-NEXT: .quad {{_?}}caller
40+
; CHECK-NEXT: .byte 0x02
41+
; CHECK-NEXT: .byte 0x01
42+
; CHECK-NEXT: .byte 0x02
3943
; CHECK: .{{(zero|space)}} 13

llvm/test/CodeGen/X86/xray-section-group.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,14 @@
55
define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always" {
66
; CHECK: .section .text.foo,"ax",@progbits
77
ret i32 0
8-
; CHECK: .section xray_instr_map,"awo",@progbits,foo{{$}}
8+
; CHECK: .section xray_instr_map,"ao",@progbits,foo{{$}}
99
}
1010

1111
$bar = comdat any
1212
define i32 @bar() nounwind noinline uwtable "function-instrument"="xray-always" comdat($bar) {
1313
; CHECK: .section .text.bar,"axG",@progbits,bar,comdat
1414
ret i32 1
15-
; CHECK: .section xray_instr_map,"aGwo",@progbits,bar,comdat,bar{{$}}
15+
; CHECK: .section xray_instr_map,"aGo",@progbits,bar,comdat,bar{{$}}
1616
}
1717

1818
; CHECK-OBJ: section xray_instr_map:

llvm/test/DebugInfo/X86/xray-split-dwarf-interaction.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,11 @@
2525
; `a::b()` is actually associated with the function's symbol instead of the
2626
; .debug_types.dwo section.
2727
;
28-
; CHECK-ASM: xray_fn_idx,"awo",@progbits,_ZN1a1bEv{{$}}
28+
; CHECK-ASM: xray_fn_idx,"ao",@progbits,_ZN1a1bEv{{$}}
2929
;
3030
; CHECK-ELF-DAG: [[FSECT:[0-9]+]]] .text._ZN1a1bEv PROGBITS
3131
; CHECK-ELF-DAG: [{{.*}}] .debug_types.dwo PROGBITS
32-
; CHECK-ELF-DAG: [{{.*}}] xray_instr_map PROGBITS {{.*}} {{.*}} {{.*}} {{.*}} WAL [[FSECT]]
32+
; CHECK-ELF-DAG: [{{.*}}] xray_instr_map PROGBITS {{.*}} {{.*}} {{.*}} {{.*}} AL [[FSECT]]
3333
target triple = "x86_64-pc-linux"
3434

3535
%class.a = type { i8 }

0 commit comments

Comments
 (0)