Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
28a20b4
[VectorCombine] Avoid inserting freeze when scalarizing extend-extrac…
juliannagele Nov 4, 2025
0307147
[NFC][SPIRV] Add AMDGCN SPIR-V specific defaults to the BE (#165815)
AlexVlx Nov 4, 2025
747050b
[MLIR][XeGPU][VectorToXeGPU] Lower vector.load/store/transfer_read/tr…
dchigarev Nov 4, 2025
8a84b28
[SimplifyCFG] Eliminate dead edges of switches according to the domai…
dtcxzyw Nov 4, 2025
f771f1e
[llvm-dwarfdump][CMake] Link against BinaryFormat (#166364)
Michael137 Nov 4, 2025
2e89b71
[C23] Correctly handle missing embed with -MG (#166188)
AaronBallman Nov 4, 2025
89c2617
[X86] bittest-big-integer.ll - add test showing multiple uses of the …
RKSimon Nov 4, 2025
12f392c
[AArch64][SME] Support `aarch64-split-sve-objects` with VLAs/realignm…
MacDue Nov 4, 2025
389aa46
[mlir][DataFlow] Add visitBlockTransfer hook to dense analyses (#166263)
fabianmcg Nov 4, 2025
ed45c05
[MLIR][XeGPU] fix load/store/prefetch op offset verifier (#166137)
tkarna Nov 4, 2025
a01e4da
[PowerPC] Ensure correct codgen for MMA functions for cpu=future (#16…
lei137 Nov 4, 2025
2237a18
[SPIRV] Enable OpenCL max_work_group_size translation via `SPV_INTEL_…
AlexVlx Nov 4, 2025
bdf0248
[clang][Driver] Fix crash in --offload-new-driver and -save-temps. (#…
mgcarrasco Nov 4, 2025
718a3b2
[BOLT][AArch64] Run LDR relaxation (#165787)
yozhu Nov 4, 2025
b258681
[NFC] add LLVM_ABI to function getMemcmp declaration (#166192)
diggerlin Nov 4, 2025
3170345
[X86] Fix LEA compression on 64 bit (#166334)
tomershafir Nov 4, 2025
5ba746d
[NFC][TableGen] Use namespace qualifier to define `RecordKeeperImpl` …
jurahul Nov 4, 2025
a2495ff
[NFC][TableGen] Emit empty lines after/before namespace scope (#166217)
jurahul Nov 4, 2025
c2269c8
[ADT] Move llvm::to_address to STLForwardCompat.h (NFC) (#166315)
kazutakahirata Nov 4, 2025
3700587
[BinaryFormat] Remove redundant declarations (NFC) (#166316)
kazutakahirata Nov 4, 2025
502742b
[llvm] Proofread MergeFunctions.rst (#166317)
kazutakahirata Nov 4, 2025
50faea2
[llvm] Use conventional enum declarations (NFC) (#166318)
kazutakahirata Nov 4, 2025
bcb3d2f
build_llvm_release.bat fixes (#166385)
zmodem Nov 4, 2025
2286118
[SPIRV] Enable `bfloat16` arithmetic (#166031)
AlexVlx Nov 4, 2025
71022d1
[Clang] [Docs] Add some CMake example code for linking against libcla…
Sirraide Nov 4, 2025
6c563dc
[mlir][XeGPU] Add optional layout attribute to LoadGather StoreScatte…
dchigarev Nov 4, 2025
a50d036
[NFC] [Build Fix] Fix failing test case due to missing host arch. (#1…
mgcarrasco Nov 4, 2025
4749bf5
[lldb] When starting in a hidden frame, don't skip over hidden frames…
Michael137 Nov 4, 2025
ed7d6c3
[ADT] Deprecate a soft-deprecated APInt constructor (#166314)
kazutakahirata Nov 4, 2025
78769d5
[LLDB] Don't check for libcxx if LLDB_ENFORCE_STRICT_TEST_REQUIREMENT…
adrian-prantl Nov 4, 2025
cc3ad20
[analyzer] Revert incorrect LazyCoumpoundVal changes (#163461)
marco-antognini-sonarsource Nov 4, 2025
2dc0fa1
[Flang] Nested directives are comments (#166348)
Meinersbur Nov 4, 2025
8d8e9eb
merge main into amd-staging
ronlieb Nov 4, 2025
0e0ec98
Revert "[C2y] Support WG14 N3457, the __COUNTER__ macro (#162662)"
ronlieb Nov 4, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions bolt/include/bolt/Core/MCPlusBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -840,6 +840,16 @@ class MCPlusBuilder {
return false;
}

virtual bool isLDRWl(const MCInst &Inst) const {
llvm_unreachable("not implemented");
return false;
}

virtual bool isLDRXl(const MCInst &Inst) const {
llvm_unreachable("not implemented");
return false;
}

virtual bool isMOVW(const MCInst &Inst) const {
llvm_unreachable("not implemented");
return false;
Expand Down Expand Up @@ -1789,6 +1799,19 @@ class MCPlusBuilder {
llvm_unreachable("not implemented");
}

/// Take \p LDRInst and return ADRP+LDR instruction sequence - for
///
/// ldr x0, [label]
///
/// the following sequence will be generated:
///
/// adrp x0, PageBase(label)
/// ldr x0, [x0, PageOffset(label)]
virtual InstructionListType createAdrpLdr(const MCInst &LDRInst,
MCContext *Ctx) const {
llvm_unreachable("not implemented");
}

/// Return not 0 if the instruction CurInst, in combination with the recent
/// history of disassembled instructions supplied by [Begin, End), is a linker
/// generated veneer/stub that needs patching. This happens in AArch64 when
Expand Down
Original file line number Diff line number Diff line change
@@ -1,34 +1,34 @@
//===- bolt/Passes/ADRRelaxationPass.h --------------------------*- C++ -*-===//
//===- bolt/Passes/AArch64RelaxationPass.h ----------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file declares the ADRRelaxationPass class, which replaces AArch64
// non-local ADR instructions with ADRP + ADD due to small offset range of ADR
// instruction (+- 1MB) which could be easily overflowed after BOLT
// optimizations. Such problems are usually connected with errata 843419
// https://developer.arm.com/documentation/epm048406/2100/
// This file declares the AArch64RelaxationPass class, which replaces AArch64
// non-local ADR/LDR instructions with ADRP + ADD/LDR due to small offset
// range of ADR and LDR instruction (+- 1MB) which could be easily overflowed
// after BOLT optimizations. Such problems are usually connected with errata
// 843419: https://developer.arm.com/documentation/epm048406/2100/
// The linker could replace ADRP instruction with ADR in some cases.
//
//===----------------------------------------------------------------------===//

#ifndef BOLT_PASSES_ADRRELAXATIONPASS_H
#define BOLT_PASSES_ADRRELAXATIONPASS_H
#ifndef BOLT_PASSES_AARCH64RELAXATIONPASS_H
#define BOLT_PASSES_AARCH64RELAXATIONPASS_H

#include "bolt/Passes/BinaryPasses.h"

namespace llvm {
namespace bolt {

class ADRRelaxationPass : public BinaryFunctionPass {
class AArch64RelaxationPass : public BinaryFunctionPass {
public:
explicit ADRRelaxationPass(const cl::opt<bool> &PrintPass)
explicit AArch64RelaxationPass(const cl::opt<bool> &PrintPass)
: BinaryFunctionPass(PrintPass) {}

const char *getName() const override { return "adr-relaxation"; }
const char *getName() const override { return "aarch64-relaxation"; }

/// Pass entry point
Error runOnFunctions(BinaryContext &BC) override;
Expand Down
2 changes: 1 addition & 1 deletion bolt/include/bolt/Passes/FixRelaxationPass.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
//===- bolt/Passes/ADRRelaxationPass.h --------------------------*- C++ -*-===//
//===- bolt/Passes/FixRelaxationPass.h --------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
//===- bolt/Passes/ADRRelaxationPass.cpp ----------------------------------===//
//===- bolt/Passes/AArch64RelaxationPass.cpp ------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements the ADRRelaxationPass class.
// This file implements the AArch64RelaxationPass class.
//
//===----------------------------------------------------------------------===//

#include "bolt/Passes/ADRRelaxationPass.h"
#include "bolt/Passes/AArch64RelaxationPass.h"
#include "bolt/Core/ParallelUtilities.h"
#include "bolt/Utils/CommandLineOpts.h"
#include <iterator>
Expand All @@ -20,10 +20,10 @@ using namespace llvm;
namespace opts {
extern cl::OptionCategory BoltCategory;

static cl::opt<bool>
AdrPassOpt("adr-relaxation",
cl::desc("Replace ARM non-local ADR instructions with ADRP"),
cl::init(true), cl::cat(BoltCategory), cl::ReallyHidden);
static cl::opt<bool> AArch64PassOpt(
"aarch64-relaxation",
cl::desc("Replace ARM non-local ADR/LDR instructions with ADRP"),
cl::init(true), cl::cat(BoltCategory), cl::ReallyHidden);
} // namespace opts

namespace llvm {
Expand All @@ -35,18 +35,21 @@ namespace bolt {
// jobs and checking the exit flag after it.
static bool PassFailed = false;

void ADRRelaxationPass::runOnFunction(BinaryFunction &BF) {
void AArch64RelaxationPass::runOnFunction(BinaryFunction &BF) {
if (PassFailed)
return;

BinaryContext &BC = BF.getBinaryContext();
for (BinaryBasicBlock &BB : BF) {
for (auto It = BB.begin(); It != BB.end(); ++It) {
MCInst &Inst = *It;
if (!BC.MIB->isADR(Inst))
bool IsADR = BC.MIB->isADR(Inst);

// TODO: Handle other types of LDR (literal, PC-relative) instructions.
if (!IsADR && !BC.MIB->isLDRXl(Inst) && !BC.MIB->isLDRWl(Inst))
continue;

const MCSymbol *Symbol = BC.MIB->getTargetSymbol(Inst);
const MCSymbol *Symbol = BC.MIB->getTargetSymbol(Inst, IsADR ? 0 : 1);
if (!Symbol)
continue;

Expand All @@ -56,25 +59,27 @@ void ADRRelaxationPass::runOnFunction(BinaryFunction &BF) {
continue;
}

// Don't relax ADR if it points to the same function and is in the main
// fragment and BF initial size is < 1MB.
// Don't relax ADR/LDR if it points to the same function and is in the
// main fragment and BF initial size is < 1MB.
const unsigned OneMB = 0x100000;
if (BF.getSize() < OneMB) {
BinaryFunction *TargetBF = BC.getFunctionForSymbol(Symbol);
if (TargetBF == &BF && !BB.isSplit())
continue;

// No relaxation needed if ADR references a basic block in the same
// No relaxation needed if ADR/LDR references a basic block in the same
// fragment.
if (BinaryBasicBlock *TargetBB = BF.getBasicBlockForLabel(Symbol))
if (BB.getFragmentNum() == TargetBB->getFragmentNum())
continue;
}

InstructionListType AdrpAdd;
InstructionListType AdrpMaterialization;
{
auto L = BC.scopeLock();
AdrpAdd = BC.MIB->undoAdrpAddRelaxation(Inst, BC.Ctx.get());
AdrpMaterialization =
IsADR ? BC.MIB->undoAdrpAddRelaxation(Inst, BC.Ctx.get())
: BC.MIB->createAdrpLdr(Inst, BC.Ctx.get());
}

if (It != BB.begin() && BC.MIB->isNoop(*std::prev(It))) {
Expand All @@ -88,18 +93,18 @@ void ADRRelaxationPass::runOnFunction(BinaryFunction &BF) {
// invalidate this offset, so we have to rely on linker-inserted NOP to
// replace it with ADRP, and abort if it is not present.
auto L = BC.scopeLock();
BC.errs() << "BOLT-ERROR: cannot relax ADR in non-simple function "
<< BF << '\n';
BC.errs() << "BOLT-ERROR: cannot relax " << (IsADR ? "ADR" : "LDR")
<< " in non-simple function " << BF << '\n';
PassFailed = true;
return;
}
It = BB.replaceInstruction(It, AdrpAdd);
It = BB.replaceInstruction(It, AdrpMaterialization);
}
}
}

Error ADRRelaxationPass::runOnFunctions(BinaryContext &BC) {
if (!opts::AdrPassOpt || !BC.HasRelocations)
Error AArch64RelaxationPass::runOnFunctions(BinaryContext &BC) {
if (!opts::AArch64PassOpt || !BC.HasRelocations)
return Error::success();

ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
Expand All @@ -108,7 +113,7 @@ Error ADRRelaxationPass::runOnFunctions(BinaryContext &BC) {

ParallelUtilities::runOnEachFunction(
BC, ParallelUtilities::SchedulingPolicy::SP_TRIVIAL, WorkFun, nullptr,
"ADRRelaxationPass");
"AArch64RelaxationPass");

if (PassFailed)
return createFatalBOLTError("");
Expand Down
2 changes: 1 addition & 1 deletion bolt/lib/Passes/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
add_llvm_library(LLVMBOLTPasses
ADRRelaxationPass.cpp
AArch64RelaxationPass.cpp
Aligner.cpp
AllocCombiner.cpp
AsmDump.cpp
Expand Down
12 changes: 6 additions & 6 deletions bolt/lib/Rewrite/BinaryPassManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//

#include "bolt/Rewrite/BinaryPassManager.h"
#include "bolt/Passes/ADRRelaxationPass.h"
#include "bolt/Passes/AArch64RelaxationPass.h"
#include "bolt/Passes/Aligner.h"
#include "bolt/Passes/AllocCombiner.h"
#include "bolt/Passes/AsmDump.h"
Expand Down Expand Up @@ -129,10 +129,10 @@ static cl::opt<bool> PrintJTFootprintReduction(
cl::desc("print function after jt-footprint-reduction pass"), cl::Hidden,
cl::cat(BoltOptCategory));

static cl::opt<bool>
PrintAdrRelaxation("print-adr-relaxation",
cl::desc("print functions after ADR Relaxation pass"),
cl::Hidden, cl::cat(BoltOptCategory));
static cl::opt<bool> PrintAArch64Relaxation(
"print-adr-ldr-relaxation",
cl::desc("print functions after ADR/LDR Relaxation pass"), cl::Hidden,
cl::cat(BoltOptCategory));

static cl::opt<bool>
PrintLongJmp("print-longjmp",
Expand Down Expand Up @@ -517,7 +517,7 @@ Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {

if (BC.isAArch64()) {
Manager.registerPass(
std::make_unique<ADRRelaxationPass>(PrintAdrRelaxation));
std::make_unique<AArch64RelaxationPass>(PrintAArch64Relaxation));

// Tighten branches according to offset differences between branch and
// targets. No extra instructions after this pass, otherwise we may have
Expand Down
42 changes: 42 additions & 0 deletions bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ static InstructionListType createIncMemory(MCPhysReg RegTo, MCPhysReg RegTmp) {
atomicAdd(Insts.back(), RegTo, RegTmp);
return Insts;
}

class AArch64MCPlusBuilder : public MCPlusBuilder {
public:
using MCPlusBuilder::MCPlusBuilder;
Expand Down Expand Up @@ -583,6 +584,14 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
return Inst.getOpcode() == AArch64::ADDXri;
}

bool isLDRWl(const MCInst &Inst) const override {
return Inst.getOpcode() == AArch64::LDRWl;
}

bool isLDRXl(const MCInst &Inst) const override {
return Inst.getOpcode() == AArch64::LDRXl;
}

MCPhysReg getADRReg(const MCInst &Inst) const {
assert((isADR(Inst) || isADRP(Inst)) && "Not an ADR instruction");
assert(MCPlus::getNumPrimeOperands(Inst) != 0 &&
Expand All @@ -602,6 +611,39 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
return materializeAddress(Target, Ctx, Reg, Addend);
}

InstructionListType createAdrpLdr(const MCInst &LDRInst,
MCContext *Ctx) const override {
assert((isLDRXl(LDRInst) || isLDRWl(LDRInst)) &&
"LDR (literal, 32 or 64-bit integer load) instruction expected");
assert(LDRInst.getOperand(0).isReg() &&
"unexpected operand in LDR instruction");
const MCPhysReg DataReg = LDRInst.getOperand(0).getReg();
const MCPhysReg AddrReg =
isLDRXl(LDRInst) ? DataReg
: (MCPhysReg)RegInfo->getMatchingSuperReg(
DataReg, AArch64::sub_32,
&RegInfo->getRegClass(AArch64::GPR64RegClassID));
const MCSymbol *Target = getTargetSymbol(LDRInst, 1);
assert(Target && "missing target symbol in LDR instruction");

InstructionListType Insts(2);
Insts[0].setOpcode(AArch64::ADRP);
Insts[0].clear();
Insts[0].addOperand(MCOperand::createReg(AddrReg));
Insts[0].addOperand(MCOperand::createImm(0));
setOperandToSymbolRef(Insts[0], /* OpNum */ 1, Target, 0, Ctx,
ELF::R_AARCH64_NONE);
Insts[1].setOpcode(isLDRXl(LDRInst) ? AArch64::LDRXui : AArch64::LDRWui);
Insts[1].clear();
Insts[1].addOperand(MCOperand::createReg(DataReg));
Insts[1].addOperand(MCOperand::createReg(AddrReg));
Insts[1].addOperand(MCOperand::createImm(0));
Insts[1].addOperand(MCOperand::createImm(0));
setOperandToSymbolRef(Insts[1], /* OpNum */ 2, Target, 0, Ctx,
ELF::R_AARCH64_ADD_ABS_LO12_NC);
return Insts;
}

bool isTB(const MCInst &Inst) const {
return (Inst.getOpcode() == AArch64::TBNZW ||
Inst.getOpcode() == AArch64::TBNZX ||
Expand Down
Loading