-
Notifications
You must be signed in to change notification settings - Fork 14k
[LLD][MachO][NFC] Refactor LOH code #141153
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-lld Author: Ellis Hoag (ellishg) ChangesIn Full diff: https://github.com/llvm/llvm-project/pull/141153.diff 1 Files Affected:
diff --git a/lld/MachO/Arch/ARM64.cpp b/lld/MachO/Arch/ARM64.cpp
index 849b309edeb26..c882e106e5b07 100644
--- a/lld/MachO/Arch/ARM64.cpp
+++ b/lld/MachO/Arch/ARM64.cpp
@@ -15,7 +15,6 @@
#include "lld/Common/ErrorHandler.h"
#include "mach-o/compact_unwind_encoding.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/LEB128.h"
@@ -393,25 +392,26 @@ static void writeImmediateLdr(void *loc, const Ldr &ldr) {
// ->
// adr xM, _foo
// nop
-static void applyAdrpAdd(uint8_t *buf, const ConcatInputSection *isec,
+static bool applyAdrpAdd(uint8_t *buf, const ConcatInputSection *isec,
uint64_t offset1, uint64_t offset2) {
uint32_t ins1 = read32le(buf + offset1);
uint32_t ins2 = read32le(buf + offset2);
Adrp adrp;
Add add;
if (!parseAdrp(ins1, adrp) || !parseAdd(ins2, add))
- return;
+ return false;
if (adrp.destRegister != add.srcRegister)
- return;
+ return false;
uint64_t addr1 = isec->getVA() + offset1;
uint64_t referent = pageBits(addr1) + adrp.addend + add.addend;
int64_t delta = referent - addr1;
if (!isValidAdrOffset(delta))
- return;
+ return false;
writeAdr(buf + offset1, add.destRegister, delta);
writeNop(buf + offset2);
+ return true;
}
// Transforms two adrp instructions into a single adrp if their referent
@@ -496,16 +496,12 @@ static void applyAdrpAddLdr(uint8_t *buf, const ConcatInputSection *isec,
uint64_t offset1, uint64_t offset2,
uint64_t offset3) {
uint32_t ins1 = read32le(buf + offset1);
- Adrp adrp;
- if (!parseAdrp(ins1, adrp))
- return;
uint32_t ins2 = read32le(buf + offset2);
- Add add;
- if (!parseAdd(ins2, add))
- return;
uint32_t ins3 = read32le(buf + offset3);
+ Adrp adrp;
+ Add add;
Ldr ldr;
- if (!parseLdr(ins3, ldr))
+ if (!parseAdrp(ins1, adrp) || !parseAdd(ins2, add) || !parseLdr(ins3, ldr))
return;
if (adrp.destRegister != add.srcRegister)
return;
@@ -528,18 +524,8 @@ static void applyAdrpAddLdr(uint8_t *buf, const ConcatInputSection *isec,
return;
}
- // Load the target address into a register and load from there indirectly.
- // adr x1, _foo
- // nop
- // ldr x2, [x1, #off]
- int64_t adrOffset = referent - addr1;
- if (isValidAdrOffset(adrOffset)) {
- writeAdr(buf + offset1, ldr.baseRegister, adrOffset);
- // Note: ld64 moves the offset into the adr instruction for AdrpAddLdr, but
- // not for AdrpLdrGotLdr. Its effect is the same either way.
- writeNop(buf + offset2);
+ if (applyAdrpAdd(buf, isec, offset1, offset2))
return;
- }
// Move the target's page offset into the ldr's immediate offset.
// adrp x0, _foo@PAGE
@@ -575,12 +561,10 @@ static void applyAdrpLdrGotLdr(uint8_t *buf, const ConcatInputSection *isec,
// ldr x3, [x2, #off]
uint32_t ins1 = read32le(buf + offset1);
- Adrp adrp;
- if (!parseAdrp(ins1, adrp))
- return;
uint32_t ins3 = read32le(buf + offset3);
+ Adrp adrp;
Ldr ldr3;
- if (!parseLdr(ins3, ldr3))
+ if (!parseAdrp(ins1, adrp) || !parseLdr(ins3, ldr3))
return;
if (ldr2.baseRegister != adrp.destRegister)
@@ -607,33 +591,32 @@ static void applyAdrpLdrGotLdr(uint8_t *buf, const ConcatInputSection *isec,
}
}
-static uint64_t readValue(const uint8_t *&ptr, const uint8_t *end) {
- unsigned int n = 0;
- uint64_t value = decodeULEB128(ptr, &n, end);
- ptr += n;
- return value;
-}
-
template <typename Callback>
static void forEachHint(ArrayRef<uint8_t> data, Callback callback) {
std::array<uint64_t, 3> args;
- for (const uint8_t *p = data.begin(), *end = data.end(); p < end;) {
- uint64_t type = readValue(p, end);
+ auto readNext = [&]() -> uint64_t {
+ unsigned int n = 0;
+ uint64_t value = decodeULEB128(data.data(), &n, data.end());
+ data = data.drop_front(n);
+ return value;
+ };
+
+ while (!data.empty()) {
+ uint64_t type = readNext();
if (type == 0)
break;
- uint64_t argCount = readValue(p, end);
+ uint64_t argCount = readNext();
+ for (unsigned i = 0; i < argCount; ++i) {
+ uint64_t arg = readNext();
+ if (i < 3)
+ args[i] = arg;
+ }
// All known LOH types as of 2022-09 have 3 or fewer arguments; skip others.
- if (argCount > 3) {
- for (unsigned i = 0; i < argCount; ++i)
- readValue(p, end);
+ if (argCount > 3)
continue;
- }
-
- for (unsigned i = 0; i < argCount; ++i)
- args[i] = readValue(p, end);
- callback(type, ArrayRef<uint64_t>(args.data(), argCount));
+ callback(type, ArrayRef(args.data(), argCount));
}
}
|
@llvm/pr-subscribers-lld-macho Author: Ellis Hoag (ellishg) ChangesIn Full diff: https://github.com/llvm/llvm-project/pull/141153.diff 1 Files Affected:
diff --git a/lld/MachO/Arch/ARM64.cpp b/lld/MachO/Arch/ARM64.cpp
index 849b309edeb26..c882e106e5b07 100644
--- a/lld/MachO/Arch/ARM64.cpp
+++ b/lld/MachO/Arch/ARM64.cpp
@@ -15,7 +15,6 @@
#include "lld/Common/ErrorHandler.h"
#include "mach-o/compact_unwind_encoding.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/LEB128.h"
@@ -393,25 +392,26 @@ static void writeImmediateLdr(void *loc, const Ldr &ldr) {
// ->
// adr xM, _foo
// nop
-static void applyAdrpAdd(uint8_t *buf, const ConcatInputSection *isec,
+static bool applyAdrpAdd(uint8_t *buf, const ConcatInputSection *isec,
uint64_t offset1, uint64_t offset2) {
uint32_t ins1 = read32le(buf + offset1);
uint32_t ins2 = read32le(buf + offset2);
Adrp adrp;
Add add;
if (!parseAdrp(ins1, adrp) || !parseAdd(ins2, add))
- return;
+ return false;
if (adrp.destRegister != add.srcRegister)
- return;
+ return false;
uint64_t addr1 = isec->getVA() + offset1;
uint64_t referent = pageBits(addr1) + adrp.addend + add.addend;
int64_t delta = referent - addr1;
if (!isValidAdrOffset(delta))
- return;
+ return false;
writeAdr(buf + offset1, add.destRegister, delta);
writeNop(buf + offset2);
+ return true;
}
// Transforms two adrp instructions into a single adrp if their referent
@@ -496,16 +496,12 @@ static void applyAdrpAddLdr(uint8_t *buf, const ConcatInputSection *isec,
uint64_t offset1, uint64_t offset2,
uint64_t offset3) {
uint32_t ins1 = read32le(buf + offset1);
- Adrp adrp;
- if (!parseAdrp(ins1, adrp))
- return;
uint32_t ins2 = read32le(buf + offset2);
- Add add;
- if (!parseAdd(ins2, add))
- return;
uint32_t ins3 = read32le(buf + offset3);
+ Adrp adrp;
+ Add add;
Ldr ldr;
- if (!parseLdr(ins3, ldr))
+ if (!parseAdrp(ins1, adrp) || !parseAdd(ins2, add) || !parseLdr(ins3, ldr))
return;
if (adrp.destRegister != add.srcRegister)
return;
@@ -528,18 +524,8 @@ static void applyAdrpAddLdr(uint8_t *buf, const ConcatInputSection *isec,
return;
}
- // Load the target address into a register and load from there indirectly.
- // adr x1, _foo
- // nop
- // ldr x2, [x1, #off]
- int64_t adrOffset = referent - addr1;
- if (isValidAdrOffset(adrOffset)) {
- writeAdr(buf + offset1, ldr.baseRegister, adrOffset);
- // Note: ld64 moves the offset into the adr instruction for AdrpAddLdr, but
- // not for AdrpLdrGotLdr. Its effect is the same either way.
- writeNop(buf + offset2);
+ if (applyAdrpAdd(buf, isec, offset1, offset2))
return;
- }
// Move the target's page offset into the ldr's immediate offset.
// adrp x0, _foo@PAGE
@@ -575,12 +561,10 @@ static void applyAdrpLdrGotLdr(uint8_t *buf, const ConcatInputSection *isec,
// ldr x3, [x2, #off]
uint32_t ins1 = read32le(buf + offset1);
- Adrp adrp;
- if (!parseAdrp(ins1, adrp))
- return;
uint32_t ins3 = read32le(buf + offset3);
+ Adrp adrp;
Ldr ldr3;
- if (!parseLdr(ins3, ldr3))
+ if (!parseAdrp(ins1, adrp) || !parseLdr(ins3, ldr3))
return;
if (ldr2.baseRegister != adrp.destRegister)
@@ -607,33 +591,32 @@ static void applyAdrpLdrGotLdr(uint8_t *buf, const ConcatInputSection *isec,
}
}
-static uint64_t readValue(const uint8_t *&ptr, const uint8_t *end) {
- unsigned int n = 0;
- uint64_t value = decodeULEB128(ptr, &n, end);
- ptr += n;
- return value;
-}
-
template <typename Callback>
static void forEachHint(ArrayRef<uint8_t> data, Callback callback) {
std::array<uint64_t, 3> args;
- for (const uint8_t *p = data.begin(), *end = data.end(); p < end;) {
- uint64_t type = readValue(p, end);
+ auto readNext = [&]() -> uint64_t {
+ unsigned int n = 0;
+ uint64_t value = decodeULEB128(data.data(), &n, data.end());
+ data = data.drop_front(n);
+ return value;
+ };
+
+ while (!data.empty()) {
+ uint64_t type = readNext();
if (type == 0)
break;
- uint64_t argCount = readValue(p, end);
+ uint64_t argCount = readNext();
+ for (unsigned i = 0; i < argCount; ++i) {
+ uint64_t arg = readNext();
+ if (i < 3)
+ args[i] = arg;
+ }
// All known LOH types as of 2022-09 have 3 or fewer arguments; skip others.
- if (argCount > 3) {
- for (unsigned i = 0; i < argCount; ++i)
- readValue(p, end);
+ if (argCount > 3)
continue;
- }
-
- for (unsigned i = 0; i < argCount; ++i)
- args[i] = readValue(p, end);
- callback(type, ArrayRef<uint64_t>(args.data(), argCount));
+ callback(type, ArrayRef(args.data(), argCount));
}
}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM!
In `applyAdrpAddLdr()` we make a transformation that is identical to the one in `applyAdrpAdd()`, so lets reuse that code. Also refactor `forEachHint()` to use more `ArrayRef` and move around some lines for consistancy.
In
applyAdrpAddLdr()
we make a transformation that is identical to the one inapplyAdrpAdd()
, so lets reuse that code. Also refactorforEachHint()
to use moreArrayRef
and move around some lines for consistancy.