Skip to content

Commit 17ed8f2

Browse files
committed
[BOLT][AArch64] Handle adrp+ld64 linker relaxations
Linker might relax adrp + ldr got address loading to adrp + add for local non-preemptible symbols (e.g. hidden/protected symbols in executable). As usually linker doesn't change relocations properly after relaxation, so we have to handle such cases by ourselves. To do that during relocations reading we change LD64 reloc to ADD if instruction mismatch found and introduce FixRelaxationPass that searches for ADRP+ADD pairs and after performing some checks we're replacing ADRP target symbol to already fixed ADDs one. Vladislav Khmelevsky, Advanced Software Technology Lab, Huawei Differential Revision: https://reviews.llvm.org/D138097
1 parent 63173d1 commit 17ed8f2

12 files changed

+374
-13
lines changed

bolt/include/bolt/Core/MCPlusBuilder.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1420,6 +1420,11 @@ class MCPlusBuilder {
14201420
llvm_unreachable("not implemented");
14211421
}
14221422

1423+
virtual bool matchAdrpAddPair(const MCInst &Adrp, const MCInst &Add) const {
1424+
llvm_unreachable("not implemented");
1425+
return false;
1426+
}
1427+
14231428
virtual int getShortJmpEncodingSize() const {
14241429
llvm_unreachable("not implemented");
14251430
}

bolt/include/bolt/Core/Relocation.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,10 @@ struct Relocation {
5858
/// Skip relocations that we don't want to handle in BOLT
5959
static bool skipRelocationType(uint64_t Type);
6060

61-
/// Handle special cases when relocation should not be processed by BOLT
62-
static bool skipRelocationProcess(uint64_t Type, uint64_t Contents);
61+
/// Handle special cases when relocation should not be processed by BOLT or
62+
/// change relocation \p Type to proper one before continuing if \p Contents
63+
/// and \P Type mismatch occured.
64+
static bool skipRelocationProcess(uint64_t &Type, uint64_t Contents);
6365

6466
// Adjust value depending on relocation type (make it PC relative or not)
6567
static uint64_t adjustValue(uint64_t Type, uint64_t Value,
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
//===- bolt/Passes/ADRRelaxationPass.h --------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This file declares the FixRelaxations class, which locates instructions with
10+
// wrong targets and fixes them. Such problems usually occures when linker
11+
// relaxes (changes) instructions, but doesn't fix relocations types properly
12+
// for them.
13+
//
14+
//===----------------------------------------------------------------------===//
15+
16+
#ifndef BOLT_PASSES_FIXRELAXATIONPASS_H
17+
#define BOLT_PASSES_FIXRELAXATIONPASS_H
18+
19+
#include "bolt/Passes/BinaryPasses.h"
20+
21+
namespace llvm {
22+
namespace bolt {
23+
24+
class FixRelaxations : public BinaryFunctionPass {
25+
void runOnFunction(BinaryFunction &Function);
26+
27+
public:
28+
explicit FixRelaxations(const cl::opt<bool> &PrintPass)
29+
: BinaryFunctionPass(PrintPass) {}
30+
31+
const char *getName() const override { return "fix-relaxations"; }
32+
33+
/// Pass entry point
34+
void runOnFunctions(BinaryContext &BC) override;
35+
};
36+
37+
} // namespace bolt
38+
} // namespace llvm
39+
40+
#endif

bolt/include/bolt/Rewrite/RewriteInstance.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,7 @@ class RewriteInstance {
247247
/// The \p SymbolName, \p SymbolAddress, \p Addend and \p ExtractedValue
248248
/// parameters will be set on success. The \p Skip argument indicates
249249
/// that the relocation was analyzed, but it must not be processed.
250-
bool analyzeRelocation(const object::RelocationRef &Rel, uint64_t RType,
250+
bool analyzeRelocation(const object::RelocationRef &Rel, uint64_t &RType,
251251
std::string &SymbolName, bool &IsSectionRelocation,
252252
uint64_t &SymbolAddress, int64_t &Addend,
253253
uint64_t &ExtractedValue, bool &Skip) const;

bolt/lib/Core/Relocation.cpp

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -171,11 +171,11 @@ bool skipRelocationTypeAArch64(uint64_t Type) {
171171
return Type == ELF::R_AARCH64_NONE || Type == ELF::R_AARCH64_LD_PREL_LO19;
172172
}
173173

174-
bool skipRelocationProcessX86(uint64_t Type, uint64_t Contents) {
174+
bool skipRelocationProcessX86(uint64_t &Type, uint64_t Contents) {
175175
return false;
176176
}
177177

178-
bool skipRelocationProcessAArch64(uint64_t Type, uint64_t Contents) {
178+
bool skipRelocationProcessAArch64(uint64_t &Type, uint64_t Contents) {
179179
auto IsMov = [](uint64_t Contents) -> bool {
180180
// The bits 28-23 are 0b100101
181181
return (Contents & 0x1f800000) == 0x12800000;
@@ -191,12 +191,25 @@ bool skipRelocationProcessAArch64(uint64_t Type, uint64_t Contents) {
191191
return (Contents & 0x9f000000) == 0x10000000;
192192
};
193193

194+
auto IsAddImm = [](uint64_t Contents) -> bool {
195+
// The bits 30-23 are 0b00100010
196+
return (Contents & 0x7F800000) == 0x11000000;
197+
};
198+
194199
auto IsNop = [](uint64_t Contents) -> bool { return Contents == 0xd503201f; };
195200

196201
// The linker might eliminate the instruction and replace it with NOP, ignore
197202
if (IsNop(Contents))
198203
return true;
199204

205+
// The linker might relax ADRP+LDR instruction sequence for loading symbol
206+
// address from GOT table to ADRP+ADD sequence that would point to the
207+
// binary-local symbol. Change relocation type in order to process it right.
208+
if (Type == ELF::R_AARCH64_LD64_GOT_LO12_NC && IsAddImm(Contents)) {
209+
Type = ELF::R_AARCH64_ADD_ABS_LO12_NC;
210+
return false;
211+
}
212+
200213
// The linker might perform TLS relocations relaxations, such as
201214
// changed TLS access model (e.g. changed global dynamic model
202215
// to initial exec), thus changing the instructions. The static
@@ -548,7 +561,7 @@ bool Relocation::skipRelocationType(uint64_t Type) {
548561
return skipRelocationTypeX86(Type);
549562
}
550563

551-
bool Relocation::skipRelocationProcess(uint64_t Type, uint64_t Contents) {
564+
bool Relocation::skipRelocationProcess(uint64_t &Type, uint64_t Contents) {
552565
if (Arch == Triple::aarch64)
553566
return skipRelocationProcessAArch64(Type, Contents);
554567
return skipRelocationProcessX86(Type, Contents);

bolt/lib/Passes/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ add_llvm_library(LLVMBOLTPasses
1313
DataflowInfoManager.cpp
1414
FrameAnalysis.cpp
1515
FrameOptimizer.cpp
16+
FixRelaxationPass.cpp
1617
HFSort.cpp
1718
HFSortPlus.cpp
1819
Hugify.cpp

bolt/lib/Passes/FixRelaxationPass.cpp

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
#include "bolt/Passes/FixRelaxationPass.h"
2+
#include "bolt/Core/ParallelUtilities.h"
3+
4+
using namespace llvm;
5+
6+
namespace llvm {
7+
namespace bolt {
8+
9+
// This function finds ADRP+ADD instruction sequences that originally before
10+
// linker relaxations were ADRP+LDR. We've modified LDR/ADD relocation properly
11+
// during relocation reading, so its targeting right symbol. As for ADRP its
12+
// target is wrong before this pass since we won't be able to recognize and
13+
// properly change R_AARCH64_ADR_GOT_PAGE relocation to
14+
// R_AARCH64_ADR_PREL_PG_HI21 during relocation reading. Now we're searching for
15+
// ADRP+ADD sequences, checking that ADRP points to the GOT-table symbol and the
16+
// target of ADD is another symbol. When found change ADRP symbol reference to
17+
// the ADDs one.
18+
void FixRelaxations::runOnFunction(BinaryFunction &BF) {
19+
BinaryContext &BC = BF.getBinaryContext();
20+
for (BinaryBasicBlock &BB : BF) {
21+
for (auto II = BB.begin(); II != BB.end(); ++II) {
22+
MCInst &Adrp = *II;
23+
if (BC.MIB->isPseudo(Adrp) || !BC.MIB->isADRP(Adrp))
24+
continue;
25+
26+
const MCSymbol *AdrpSymbol = BC.MIB->getTargetSymbol(Adrp);
27+
if (!AdrpSymbol || AdrpSymbol->getName() != "__BOLT_got_zero")
28+
continue;
29+
30+
auto NextII = std::next(II);
31+
if (NextII == BB.end())
32+
continue;
33+
34+
const MCInst &Add = *NextII;
35+
if (!BC.MIB->matchAdrpAddPair(Adrp, Add))
36+
continue;
37+
38+
const MCSymbol *Symbol = BC.MIB->getTargetSymbol(Add);
39+
if (!Symbol || AdrpSymbol == Symbol)
40+
continue;
41+
42+
const int64_t Addend = BC.MIB->getTargetAddend(Add);
43+
BC.MIB->setOperandToSymbolRef(Adrp, /*OpNum*/ 1, Symbol, Addend,
44+
BC.Ctx.get(), ELF::R_AARCH64_NONE);
45+
}
46+
}
47+
}
48+
49+
void FixRelaxations::runOnFunctions(BinaryContext &BC) {
50+
if (!BC.isAArch64() || !BC.HasRelocations)
51+
return;
52+
53+
ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
54+
runOnFunction(BF);
55+
};
56+
57+
ParallelUtilities::runOnEachFunction(
58+
BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFun, nullptr,
59+
"FixRelaxations");
60+
}
61+
62+
} // namespace bolt
63+
} // namespace llvm

bolt/lib/Rewrite/BinaryPassManager.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "bolt/Passes/AllocCombiner.h"
1313
#include "bolt/Passes/AsmDump.h"
1414
#include "bolt/Passes/CMOVConversion.h"
15+
#include "bolt/Passes/FixRelaxationPass.h"
1516
#include "bolt/Passes/FrameOptimizer.h"
1617
#include "bolt/Passes/Hugify.h"
1718
#include "bolt/Passes/IdenticalCodeFolding.h"
@@ -179,6 +180,11 @@ static cl::opt<bool>
179180
PrintStoke("print-stoke", cl::desc("print functions after stoke analysis"),
180181
cl::cat(BoltOptCategory));
181182

183+
static cl::opt<bool>
184+
PrintFixRelaxations("print-fix-relaxations",
185+
cl::desc("print functions after fix relaxations pass"),
186+
cl::cat(BoltOptCategory));
187+
182188
static cl::opt<bool> PrintVeneerElimination(
183189
"print-veneer-elimination",
184190
cl::desc("print functions after veneer elimination pass"),
@@ -315,9 +321,12 @@ void BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
315321
Manager.registerPass(std::make_unique<AsmDumpPass>(),
316322
opts::AsmDump.getNumOccurrences());
317323

318-
if (BC.isAArch64())
324+
if (BC.isAArch64()) {
325+
Manager.registerPass(std::make_unique<FixRelaxations>(PrintFixRelaxations));
326+
319327
Manager.registerPass(
320328
std::make_unique<VeneerElimination>(PrintVeneerElimination));
329+
}
321330

322331
// Here we manage dependencies/order manually, since passes are run in the
323332
// order they're registered.

bolt/lib/Rewrite/RewriteInstance.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1882,7 +1882,7 @@ uint32_t getRelocationSymbol(const ELFObjectFileBase *Obj,
18821882
} // anonymous namespace
18831883

18841884
bool RewriteInstance::analyzeRelocation(
1885-
const RelocationRef &Rel, uint64_t RType, std::string &SymbolName,
1885+
const RelocationRef &Rel, uint64_t &RType, std::string &SymbolName,
18861886
bool &IsSectionRelocation, uint64_t &SymbolAddress, int64_t &Addend,
18871887
uint64_t &ExtractedValue, bool &Skip) const {
18881888
Skip = false;
@@ -2554,7 +2554,8 @@ void RewriteInstance::handleRelocation(const SectionRef &RelocatedSection,
25542554
}
25552555

25562556
if (ForceRelocation) {
2557-
std::string Name = Relocation::isGOT(RType) ? "Zero" : SymbolName;
2557+
std::string Name =
2558+
Relocation::isGOT(RType) ? "__BOLT_got_zero" : SymbolName;
25582559
ReferencedSymbol = BC->registerNameAtAddress(Name, 0, 0, 0);
25592560
SymbolAddress = 0;
25602561
if (Relocation::isGOT(RType))

bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,10 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
6363
return Inst.getOpcode() == AArch64::ADR;
6464
}
6565

66+
bool isAddXri(const MCInst &Inst) const {
67+
return Inst.getOpcode() == AArch64::ADDXri;
68+
}
69+
6670
void getADRReg(const MCInst &Inst, MCPhysReg &RegName) const override {
6771
assert((isADR(Inst) || isADRP(Inst)) && "Not an ADR instruction");
6872
assert(MCPlus::getNumPrimeOperands(Inst) != 0 &&
@@ -365,12 +369,11 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
365369

366370
// Auto-select correct operand number
367371
if (OpNum == 0) {
368-
if (isConditionalBranch(Inst) || isADR(Inst) || isADRP(Inst))
372+
if (isConditionalBranch(Inst) || isADR(Inst) || isADRP(Inst) ||
373+
isMOVW(Inst))
369374
OpNum = 1;
370-
if (isTB(Inst))
375+
if (isTB(Inst) || isAddXri(Inst))
371376
OpNum = 2;
372-
if (isMOVW(Inst))
373-
OpNum = 1;
374377
}
375378

376379
return true;
@@ -1072,6 +1075,19 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
10721075
return 3;
10731076
}
10741077

1078+
bool matchAdrpAddPair(const MCInst &Adrp, const MCInst &Add) const override {
1079+
if (!isADRP(Adrp) || !isAddXri(Add))
1080+
return false;
1081+
1082+
assert(Adrp.getOperand(0).isReg() &&
1083+
"Unexpected operand in ADRP instruction");
1084+
MCPhysReg AdrpReg = Adrp.getOperand(0).getReg();
1085+
assert(Add.getOperand(1).isReg() &&
1086+
"Unexpected operand in ADDXri instruction");
1087+
MCPhysReg AddReg = Add.getOperand(1).getReg();
1088+
return AdrpReg == AddReg;
1089+
}
1090+
10751091
bool replaceImmWithSymbolRef(MCInst &Inst, const MCSymbol *Symbol,
10761092
int64_t Addend, MCContext *Ctx, int64_t &Value,
10771093
uint64_t RelType) const override {

0 commit comments

Comments
 (0)