Skip to content

[LLD][MachO][NFC] Refactor LOH code #141153

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 27, 2025
Merged

Conversation

ellishg
Copy link
Contributor

@ellishg ellishg commented May 22, 2025

In applyAdrpAddLdr() we make a transformation that is identical to the one in applyAdrpAdd(), so lets reuse that code. Also refactor forEachHint() to use more ArrayRef and move around some lines for consistancy.

@ellishg ellishg requested review from BertalanD and alx32 May 22, 2025 22:33
@ellishg ellishg marked this pull request as ready for review May 22, 2025 22:33
@llvmbot
Copy link
Member

llvmbot commented May 22, 2025

@llvm/pr-subscribers-lld

Author: Ellis Hoag (ellishg)

Changes

In applyAdrpAddLdr() we make a transformation that is identical to the one in applyAdrpAdd(), so lets reuse that code. Also refactor forEachHint() to use more ArrayRef and move around some lines for consistancy.


Full diff: https://github.com/llvm/llvm-project/pull/141153.diff

1 Files Affected:

  • (modified) lld/MachO/Arch/ARM64.cpp (+28-45)
diff --git a/lld/MachO/Arch/ARM64.cpp b/lld/MachO/Arch/ARM64.cpp
index 849b309edeb26..c882e106e5b07 100644
--- a/lld/MachO/Arch/ARM64.cpp
+++ b/lld/MachO/Arch/ARM64.cpp
@@ -15,7 +15,6 @@
 #include "lld/Common/ErrorHandler.h"
 #include "mach-o/compact_unwind_encoding.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
 #include "llvm/BinaryFormat/MachO.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/LEB128.h"
@@ -393,25 +392,26 @@ static void writeImmediateLdr(void *loc, const Ldr &ldr) {
 // ->
 //   adr  xM, _foo
 //   nop
-static void applyAdrpAdd(uint8_t *buf, const ConcatInputSection *isec,
+static bool applyAdrpAdd(uint8_t *buf, const ConcatInputSection *isec,
                          uint64_t offset1, uint64_t offset2) {
   uint32_t ins1 = read32le(buf + offset1);
   uint32_t ins2 = read32le(buf + offset2);
   Adrp adrp;
   Add add;
   if (!parseAdrp(ins1, adrp) || !parseAdd(ins2, add))
-    return;
+    return false;
   if (adrp.destRegister != add.srcRegister)
-    return;
+    return false;
 
   uint64_t addr1 = isec->getVA() + offset1;
   uint64_t referent = pageBits(addr1) + adrp.addend + add.addend;
   int64_t delta = referent - addr1;
   if (!isValidAdrOffset(delta))
-    return;
+    return false;
 
   writeAdr(buf + offset1, add.destRegister, delta);
   writeNop(buf + offset2);
+  return true;
 }
 
 // Transforms two adrp instructions into a single adrp if their referent
@@ -496,16 +496,12 @@ static void applyAdrpAddLdr(uint8_t *buf, const ConcatInputSection *isec,
                             uint64_t offset1, uint64_t offset2,
                             uint64_t offset3) {
   uint32_t ins1 = read32le(buf + offset1);
-  Adrp adrp;
-  if (!parseAdrp(ins1, adrp))
-    return;
   uint32_t ins2 = read32le(buf + offset2);
-  Add add;
-  if (!parseAdd(ins2, add))
-    return;
   uint32_t ins3 = read32le(buf + offset3);
+  Adrp adrp;
+  Add add;
   Ldr ldr;
-  if (!parseLdr(ins3, ldr))
+  if (!parseAdrp(ins1, adrp) || !parseAdd(ins2, add) || !parseLdr(ins3, ldr))
     return;
   if (adrp.destRegister != add.srcRegister)
     return;
@@ -528,18 +524,8 @@ static void applyAdrpAddLdr(uint8_t *buf, const ConcatInputSection *isec,
     return;
   }
 
-  // Load the target address into a register and load from there indirectly.
-  //   adr x1, _foo
-  //   nop
-  //   ldr x2, [x1, #off]
-  int64_t adrOffset = referent - addr1;
-  if (isValidAdrOffset(adrOffset)) {
-    writeAdr(buf + offset1, ldr.baseRegister, adrOffset);
-    // Note: ld64 moves the offset into the adr instruction for AdrpAddLdr, but
-    // not for AdrpLdrGotLdr. Its effect is the same either way.
-    writeNop(buf + offset2);
+  if (applyAdrpAdd(buf, isec, offset1, offset2))
     return;
-  }
 
   // Move the target's page offset into the ldr's immediate offset.
   //   adrp x0, _foo@PAGE
@@ -575,12 +561,10 @@ static void applyAdrpLdrGotLdr(uint8_t *buf, const ConcatInputSection *isec,
     // ldr  x3, [x2, #off]
 
     uint32_t ins1 = read32le(buf + offset1);
-    Adrp adrp;
-    if (!parseAdrp(ins1, adrp))
-      return;
     uint32_t ins3 = read32le(buf + offset3);
+    Adrp adrp;
     Ldr ldr3;
-    if (!parseLdr(ins3, ldr3))
+    if (!parseAdrp(ins1, adrp) || !parseLdr(ins3, ldr3))
       return;
 
     if (ldr2.baseRegister != adrp.destRegister)
@@ -607,33 +591,32 @@ static void applyAdrpLdrGotLdr(uint8_t *buf, const ConcatInputSection *isec,
   }
 }
 
-static uint64_t readValue(const uint8_t *&ptr, const uint8_t *end) {
-  unsigned int n = 0;
-  uint64_t value = decodeULEB128(ptr, &n, end);
-  ptr += n;
-  return value;
-}
-
 template <typename Callback>
 static void forEachHint(ArrayRef<uint8_t> data, Callback callback) {
   std::array<uint64_t, 3> args;
 
-  for (const uint8_t *p = data.begin(), *end = data.end(); p < end;) {
-    uint64_t type = readValue(p, end);
+  auto readNext = [&]() -> uint64_t {
+    unsigned int n = 0;
+    uint64_t value = decodeULEB128(data.data(), &n, data.end());
+    data = data.drop_front(n);
+    return value;
+  };
+
+  while (!data.empty()) {
+    uint64_t type = readNext();
     if (type == 0)
       break;
 
-    uint64_t argCount = readValue(p, end);
+    uint64_t argCount = readNext();
+    for (unsigned i = 0; i < argCount; ++i) {
+      uint64_t arg = readNext();
+      if (i < 3)
+        args[i] = arg;
+    }
     // All known LOH types as of 2022-09 have 3 or fewer arguments; skip others.
-    if (argCount > 3) {
-      for (unsigned i = 0; i < argCount; ++i)
-        readValue(p, end);
+    if (argCount > 3)
       continue;
-    }
-
-    for (unsigned i = 0; i < argCount; ++i)
-      args[i] = readValue(p, end);
-    callback(type, ArrayRef<uint64_t>(args.data(), argCount));
+    callback(type, ArrayRef(args.data(), argCount));
   }
 }
 

@llvmbot
Copy link
Member

llvmbot commented May 22, 2025

@llvm/pr-subscribers-lld-macho

Author: Ellis Hoag (ellishg)

Changes

In applyAdrpAddLdr() we make a transformation that is identical to the one in applyAdrpAdd(), so lets reuse that code. Also refactor forEachHint() to use more ArrayRef and move around some lines for consistancy.


Full diff: https://github.com/llvm/llvm-project/pull/141153.diff

1 Files Affected:

  • (modified) lld/MachO/Arch/ARM64.cpp (+28-45)
diff --git a/lld/MachO/Arch/ARM64.cpp b/lld/MachO/Arch/ARM64.cpp
index 849b309edeb26..c882e106e5b07 100644
--- a/lld/MachO/Arch/ARM64.cpp
+++ b/lld/MachO/Arch/ARM64.cpp
@@ -15,7 +15,6 @@
 #include "lld/Common/ErrorHandler.h"
 #include "mach-o/compact_unwind_encoding.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
 #include "llvm/BinaryFormat/MachO.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/LEB128.h"
@@ -393,25 +392,26 @@ static void writeImmediateLdr(void *loc, const Ldr &ldr) {
 // ->
 //   adr  xM, _foo
 //   nop
-static void applyAdrpAdd(uint8_t *buf, const ConcatInputSection *isec,
+static bool applyAdrpAdd(uint8_t *buf, const ConcatInputSection *isec,
                          uint64_t offset1, uint64_t offset2) {
   uint32_t ins1 = read32le(buf + offset1);
   uint32_t ins2 = read32le(buf + offset2);
   Adrp adrp;
   Add add;
   if (!parseAdrp(ins1, adrp) || !parseAdd(ins2, add))
-    return;
+    return false;
   if (adrp.destRegister != add.srcRegister)
-    return;
+    return false;
 
   uint64_t addr1 = isec->getVA() + offset1;
   uint64_t referent = pageBits(addr1) + adrp.addend + add.addend;
   int64_t delta = referent - addr1;
   if (!isValidAdrOffset(delta))
-    return;
+    return false;
 
   writeAdr(buf + offset1, add.destRegister, delta);
   writeNop(buf + offset2);
+  return true;
 }
 
 // Transforms two adrp instructions into a single adrp if their referent
@@ -496,16 +496,12 @@ static void applyAdrpAddLdr(uint8_t *buf, const ConcatInputSection *isec,
                             uint64_t offset1, uint64_t offset2,
                             uint64_t offset3) {
   uint32_t ins1 = read32le(buf + offset1);
-  Adrp adrp;
-  if (!parseAdrp(ins1, adrp))
-    return;
   uint32_t ins2 = read32le(buf + offset2);
-  Add add;
-  if (!parseAdd(ins2, add))
-    return;
   uint32_t ins3 = read32le(buf + offset3);
+  Adrp adrp;
+  Add add;
   Ldr ldr;
-  if (!parseLdr(ins3, ldr))
+  if (!parseAdrp(ins1, adrp) || !parseAdd(ins2, add) || !parseLdr(ins3, ldr))
     return;
   if (adrp.destRegister != add.srcRegister)
     return;
@@ -528,18 +524,8 @@ static void applyAdrpAddLdr(uint8_t *buf, const ConcatInputSection *isec,
     return;
   }
 
-  // Load the target address into a register and load from there indirectly.
-  //   adr x1, _foo
-  //   nop
-  //   ldr x2, [x1, #off]
-  int64_t adrOffset = referent - addr1;
-  if (isValidAdrOffset(adrOffset)) {
-    writeAdr(buf + offset1, ldr.baseRegister, adrOffset);
-    // Note: ld64 moves the offset into the adr instruction for AdrpAddLdr, but
-    // not for AdrpLdrGotLdr. Its effect is the same either way.
-    writeNop(buf + offset2);
+  if (applyAdrpAdd(buf, isec, offset1, offset2))
     return;
-  }
 
   // Move the target's page offset into the ldr's immediate offset.
   //   adrp x0, _foo@PAGE
@@ -575,12 +561,10 @@ static void applyAdrpLdrGotLdr(uint8_t *buf, const ConcatInputSection *isec,
     // ldr  x3, [x2, #off]
 
     uint32_t ins1 = read32le(buf + offset1);
-    Adrp adrp;
-    if (!parseAdrp(ins1, adrp))
-      return;
     uint32_t ins3 = read32le(buf + offset3);
+    Adrp adrp;
     Ldr ldr3;
-    if (!parseLdr(ins3, ldr3))
+    if (!parseAdrp(ins1, adrp) || !parseLdr(ins3, ldr3))
       return;
 
     if (ldr2.baseRegister != adrp.destRegister)
@@ -607,33 +591,32 @@ static void applyAdrpLdrGotLdr(uint8_t *buf, const ConcatInputSection *isec,
   }
 }
 
-static uint64_t readValue(const uint8_t *&ptr, const uint8_t *end) {
-  unsigned int n = 0;
-  uint64_t value = decodeULEB128(ptr, &n, end);
-  ptr += n;
-  return value;
-}
-
 template <typename Callback>
 static void forEachHint(ArrayRef<uint8_t> data, Callback callback) {
   std::array<uint64_t, 3> args;
 
-  for (const uint8_t *p = data.begin(), *end = data.end(); p < end;) {
-    uint64_t type = readValue(p, end);
+  auto readNext = [&]() -> uint64_t {
+    unsigned int n = 0;
+    uint64_t value = decodeULEB128(data.data(), &n, data.end());
+    data = data.drop_front(n);
+    return value;
+  };
+
+  while (!data.empty()) {
+    uint64_t type = readNext();
     if (type == 0)
       break;
 
-    uint64_t argCount = readValue(p, end);
+    uint64_t argCount = readNext();
+    for (unsigned i = 0; i < argCount; ++i) {
+      uint64_t arg = readNext();
+      if (i < 3)
+        args[i] = arg;
+    }
     // All known LOH types as of 2022-09 have 3 or fewer arguments; skip others.
-    if (argCount > 3) {
-      for (unsigned i = 0; i < argCount; ++i)
-        readValue(p, end);
+    if (argCount > 3)
       continue;
-    }
-
-    for (unsigned i = 0; i < argCount; ++i)
-      args[i] = readValue(p, end);
-    callback(type, ArrayRef<uint64_t>(args.data(), argCount));
+    callback(type, ArrayRef(args.data(), argCount));
   }
 }
 

Copy link
Member

@BertalanD BertalanD left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM!

@ellishg ellishg merged commit b5588ce into llvm:main May 27, 2025
11 checks passed
@ellishg ellishg deleted the macho-loh-refactor-hints branch May 27, 2025 15:24
sivan-shani pushed a commit to sivan-shani/llvm-project that referenced this pull request Jun 3, 2025
In `applyAdrpAddLdr()` we make a transformation that is identical to the
one in `applyAdrpAdd()`, so lets reuse that code. Also refactor
`forEachHint()` to use more `ArrayRef` and move around some lines for
consistancy.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants