From 441e5115224a00d4772ed899f59b3aab166f80de Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Tue, 11 Nov 2025 15:30:41 -0800
Subject: [PATCH 01/32] AMDGPU: Update register class numbers in test (#167601)

---
 llvm/test/CodeGen/AMDGPU/limit-coalesce.mir | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/limit-coalesce.mir b/llvm/test/CodeGen/AMDGPU/limit-coalesce.mir
index e5b68b48158da..a245c475638f2 100644
--- a/llvm/test/CodeGen/AMDGPU/limit-coalesce.mir
+++ b/llvm/test/CodeGen/AMDGPU/limit-coalesce.mir
@@ -17,7 +17,7 @@ body:             |
     ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub1:sgpr_64 = COPY $sgpr17
     ; CHECK-NEXT: [[COPY:%[0-9]+]].sub0:sgpr_64 = COPY $sgpr16
     ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-    ; CHECK-NEXT: INLINEASM &"; def $0", 0 /* attdialect */, 3407882 /* regdef:VReg_64 */, def %4
+    ; CHECK-NEXT: INLINEASM &"; def $0", 0 /* attdialect */, 2818058 /* regdef:VReg_64 */, def %4
     ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:vreg_128 = COPY %4.sub1
     ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR [[V_MOV_B32_e32_]], [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (s128), addrspace 1)
     ; CHECK-NEXT: SI_RETURN
@@ -26,7 +26,7 @@ body:             |
     undef %2.sub0:sgpr_64 = COPY killed %1
     %2.sub1:sgpr_64 = COPY killed %0
     %3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-    INLINEASM &"; def $0", 0 /* attdialect */, 3407882 /* regdef:VReg_64 */, def %4:vreg_64
+    INLINEASM &"; def $0", 0 /* attdialect */, 2818058 /* regdef:VReg_64 */, def %4:vreg_64
     undef %5.sub0:vreg_128 = COPY killed %4.sub1
     GLOBAL_STORE_DWORDX4_SADDR killed %3, killed %5, killed %2, 0, 0, implicit $exec :: (store (s128), addrspace 1)
     SI_RETURN

From e3a9ac5e24d08cef3160fe3e242a4afe1b6d95a4 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Tue, 11 Nov 2025 15:31:52 -0800
Subject: [PATCH 02/32] AMDGPU: Remove wrapper around TRI::getRegClass
 (#159885)

This shadows the member in the base class, but differs slightly
in behavior. The base method doesn't check for the invalid case.
---
 llvm/lib/Target/AMDGPU/SIFoldOperands.cpp |  7 ++++---
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp    |  5 +++--
 llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 11 -----------
 llvm/lib/Target/AMDGPU/SIRegisterInfo.h   |  2 --
 4 files changed, 7 insertions(+), 18 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 293005c759e53..2c00e23d113cb 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1284,10 +1284,11 @@ void SIFoldOperandsImpl::foldOperand(
         continue;
 
       const int SrcIdx = MovOp == AMDGPU::V_MOV_B16_t16_e64 ? 2 : 1;
-      const TargetRegisterClass *MovSrcRC =
-          TRI->getRegClass(TII->getOpRegClassID(MovDesc.operands()[SrcIdx]));
 
-      if (MovSrcRC) {
+      int16_t RegClassID = TII->getOpRegClassID(MovDesc.operands()[SrcIdx]);
+      if (RegClassID != -1) {
+        const TargetRegisterClass *MovSrcRC = TRI->getRegClass(RegClassID);
+
         if (UseSubReg)
           MovSrcRC = TRI->getMatchingSuperRegClass(SrcRC, MovSrcRC, UseSubReg);
 
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index e5f0e3e631988..4c4625b8834ee 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -6032,7 +6032,7 @@ const TargetRegisterClass *SIInstrInfo::getRegClass(const MCInstrDesc &TID,
     return nullptr;
   const MCOperandInfo &OpInfo = TID.operands()[OpNum];
   int16_t RegClass = getOpRegClassID(OpInfo);
-  return RI.getRegClass(RegClass);
+  return RegClass < 0 ? nullptr : RI.getRegClass(RegClass);
 }
 
 const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI,
@@ -6050,7 +6050,8 @@ const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI,
     return RI.getPhysRegBaseClass(Reg);
   }
 
-  return RI.getRegClass(getOpRegClassID(Desc.operands()[OpNo]));
+  int16_t RegClass = getOpRegClassID(Desc.operands()[OpNo]);
+  return RegClass < 0 ? nullptr : RI.getRegClass(RegClass);
 }
 
 void SIInstrInfo::legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const {
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 8fba74831811f..ad79bdf3190f0 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -3908,17 +3908,6 @@ const TargetRegisterClass *SIRegisterInfo::getVGPR64Class() const {
                                 : &AMDGPU::VReg_64RegClass;
 }
 
-// FIXME: This should be deleted
-const TargetRegisterClass *
-SIRegisterInfo::getRegClass(unsigned RCID) const {
-  switch ((int)RCID) {
-  case -1:
-    return nullptr;
-  default:
-    return AMDGPUGenRegisterInfo::getRegClass(RCID);
-  }
-}
-
 // Find reaching register definition
 MachineInstr *SIRegisterInfo::findReachingDef(Register Reg, unsigned SubReg,
                                               MachineInstr &Use,
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index 7b91ba7bc581f..813f6bb1a503a 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -391,8 +391,6 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
 
   MCRegister getExec() const;
 
-  const TargetRegisterClass *getRegClass(unsigned RCID) const;
-
   // Find reaching register definition
   MachineInstr *findReachingDef(Register Reg, unsigned SubReg,
                                 MachineInstr &Use,

From a664f584f9596bf61aa9be35967dc578e58f6ca3 Mon Sep 17 00:00:00 2001
From: Hanumanth <hhanuman@mathworks.com>
Date: Tue, 11 Nov 2025 18:36:41 -0500
Subject: [PATCH 03/32] [mlir][memref] Fix runtime verification for
 memref.subview for empty memref subviews  (#166581)

This PR applies the same fix from #166569 to `memref.subview`. That PR
fixed the issue for `tensor.extract_slice`, and this one addresses the
identical problem for `memref.subview`.

The runtime verification for `memref.subview` incorrectly rejects valid
empty subviews (size=0) starting at the memref boundary.

**Example that demonstrates the issue:**

```mlir
func.func @subview_with_empty_slice(%memref: memref<10x4x1xf32, strided<[?, ?, ?], offset: ?>>,
                                     %dim_0: index,
                                     %dim_1: index,
                                     %dim_2: index,
                                     %offset: index) {
    // When called with: offset=10, dim_0=0, dim_1=4, dim_2=1
    // Runtime verification fails: "offset 0 is out-of-bounds"
    %subview = memref.subview %memref[%offset, 0, 0] [%dim_0, %dim_1, %dim_2] [1, 1, 1] :
        memref<10x4x1xf32, strided<[?, ?, ?], offset: ?>> to
        memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>>
    return
}
```

When `%offset=10` and `%dim_0=0`, we're creating an empty subview (zero
elements along dimension 0) starting at the boundary. The current
verification enforces `offset < dim_size`, which evaluates to `10 < 10`
and fails. I feel this should be valid since no memory is accessed.

**The fix:**

Same as #166569 - make the offset check conditional on subview size:
- Empty subview (size == 0): allow `0 <= offset <= dim_size`
- Non-empty subview (size > 0): require `0 <= offset < dim_size`

Please see #166569 for motivation and rationale.

---

Co-authored-by: Hanumanth Hanumantharayappa <hhanuman@ah-hhanuman-l.dhcp.mathworks.com>
---
 .../Transforms/RuntimeOpVerification.cpp      | 89 ++++++++++++-------
 .../MemRef/subview-runtime-verification.mlir  | 15 ++++
 2 files changed, 70 insertions(+), 34 deletions(-)

diff --git a/mlir/lib/Dialect/MemRef/Transforms/RuntimeOpVerification.cpp b/mlir/lib/Dialect/MemRef/Transforms/RuntimeOpVerification.cpp
index 14152c5a1af0c..e5cc41e2c43ba 100644
--- a/mlir/lib/Dialect/MemRef/Transforms/RuntimeOpVerification.cpp
+++ b/mlir/lib/Dialect/MemRef/Transforms/RuntimeOpVerification.cpp
@@ -268,61 +268,82 @@ struct SubViewOpInterface
     MemRefType sourceType = subView.getSource().getType();
 
     // For each dimension, assert that:
-    // 0 <= offset < dim_size
-    // 0 <= offset + (size - 1) * stride < dim_size
+    // For empty slices (size == 0)   : 0 <= offset <= dim_size
+    // For non-empty slices (size > 0): 0 <= offset < dim_size
+    //                                  0 <= offset + (size - 1) * stride
+    //                                  dim_size
     Value zero = arith::ConstantIndexOp::create(builder, loc, 0);
     Value one = arith::ConstantIndexOp::create(builder, loc, 1);
+
     auto metadataOp =
         ExtractStridedMetadataOp::create(builder, loc, subView.getSource());
+
     for (int64_t i : llvm::seq<int64_t>(0, sourceType.getRank())) {
-      // Reset insertion point to before the operation for each dimension
+      // Reset insertion point to before the operation for each dimension.
       builder.setInsertionPoint(subView);
+
       Value offset = getValueOrCreateConstantIndexOp(
           builder, loc, subView.getMixedOffsets()[i]);
       Value size = getValueOrCreateConstantIndexOp(builder, loc,
                                                    subView.getMixedSizes()[i]);
       Value stride = getValueOrCreateConstantIndexOp(
           builder, loc, subView.getMixedStrides()[i]);
-
-      // Verify that offset is in-bounds.
       Value dimSize = metadataOp.getSizes()[i];
-      Value offsetInBounds =
-          generateInBoundsCheck(builder, loc, offset, zero, dimSize);
-      cf::AssertOp::create(builder, loc, offsetInBounds,
+
+      // Verify that offset is in-bounds (conditional on slice size).
+      Value sizeIsZero = arith::CmpIOp::create(
+          builder, loc, arith::CmpIPredicate::eq, size, zero);
+      auto offsetCheckIf = scf::IfOp::create(
+          builder, loc, sizeIsZero,
+          [&](OpBuilder &b, Location loc) {
+            // For empty slices, offset can be at the boundary: 0 <= offset <=
+            // dimSize.
+            Value offsetGEZero = arith::CmpIOp::create(
+                b, loc, arith::CmpIPredicate::sge, offset, zero);
+            Value offsetLEDimSize = arith::CmpIOp::create(
+                b, loc, arith::CmpIPredicate::sle, offset, dimSize);
+            Value emptyOffsetValid =
+                arith::AndIOp::create(b, loc, offsetGEZero, offsetLEDimSize);
+            scf::YieldOp::create(b, loc, emptyOffsetValid);
+          },
+          [&](OpBuilder &b, Location loc) {
+            // For non-empty slices, offset must be a valid index: 0 <= offset
+            // dimSize.
+            Value offsetInBounds =
+                generateInBoundsCheck(b, loc, offset, zero, dimSize);
+            scf::YieldOp::create(b, loc, offsetInBounds);
+          });
+
+      Value offsetCondition = offsetCheckIf.getResult(0);
+      cf::AssertOp::create(builder, loc, offsetCondition,
                            generateErrorMessage(op, "offset " +
                                                         std::to_string(i) +
                                                         " is out-of-bounds"));
 
-      // Only verify if size > 0
+      // Verify that the slice endpoint is in-bounds (only for non-empty
+      // slices).
       Value sizeIsNonZero = arith::CmpIOp::create(
           builder, loc, arith::CmpIPredicate::sgt, size, zero);
+      auto ifOp = scf::IfOp::create(
+          builder, loc, sizeIsNonZero,
+          [&](OpBuilder &b, Location loc) {
+            // Verify that slice does not run out-of-bounds.
+            Value sizeMinusOne = arith::SubIOp::create(b, loc, size, one);
+            Value sizeMinusOneTimesStride =
+                arith::MulIOp::create(b, loc, sizeMinusOne, stride);
+            Value lastPos =
+                arith::AddIOp::create(b, loc, offset, sizeMinusOneTimesStride);
+            Value lastPosInBounds =
+                generateInBoundsCheck(b, loc, lastPos, zero, dimSize);
+            scf::YieldOp::create(b, loc, lastPosInBounds);
+          },
+          [&](OpBuilder &b, Location loc) {
+            Value trueVal =
+                arith::ConstantOp::create(b, loc, b.getBoolAttr(true));
+            scf::YieldOp::create(b, loc, trueVal);
+          });
 
-      auto ifOp = scf::IfOp::create(builder, loc, builder.getI1Type(),
-                                    sizeIsNonZero, /*withElseRegion=*/true);
-
-      // Populate the "then" region (for size > 0).
-      builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
-
-      // Verify that slice does not run out-of-bounds.
-      Value sizeMinusOne = arith::SubIOp::create(builder, loc, size, one);
-      Value sizeMinusOneTimesStride =
-          arith::MulIOp::create(builder, loc, sizeMinusOne, stride);
-      Value lastPos =
-          arith::AddIOp::create(builder, loc, offset, sizeMinusOneTimesStride);
-      Value lastPosInBounds =
-          generateInBoundsCheck(builder, loc, lastPos, zero, dimSize);
-
-      scf::YieldOp::create(builder, loc, lastPosInBounds);
-
-      // Populate the "else" region (for size == 0).
-      builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
-      Value trueVal =
-          arith::ConstantOp::create(builder, loc, builder.getBoolAttr(true));
-      scf::YieldOp::create(builder, loc, trueVal);
-
-      builder.setInsertionPointAfter(ifOp);
       Value finalCondition = ifOp.getResult(0);
-
       cf::AssertOp::create(
           builder, loc, finalCondition,
           generateErrorMessage(op,
diff --git a/mlir/test/Integration/Dialect/MemRef/subview-runtime-verification.mlir b/mlir/test/Integration/Dialect/MemRef/subview-runtime-verification.mlir
index 84875675ac3d0..09cfee16ccd00 100644
--- a/mlir/test/Integration/Dialect/MemRef/subview-runtime-verification.mlir
+++ b/mlir/test/Integration/Dialect/MemRef/subview-runtime-verification.mlir
@@ -50,6 +50,17 @@ func.func @subview_zero_size_dim(%memref: memref<10x4x1xf32, strided<[?, ?, ?],
     return
 }
 
+func.func @subview_with_empty_slice(%memref: memref<10x4x1xf32, strided<[?, ?, ?], offset: ?>>, 
+                                 %dim_0: index, 
+                                 %dim_1: index, 
+                                 %dim_2: index,
+                                 %offset: index) {
+    %subview = memref.subview %memref[%offset, 0, 0] [%dim_0, %dim_1, %dim_2] [1, 1, 1] :
+        memref<10x4x1xf32, strided<[?, ?, ?], offset: ?>> to
+        memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>>
+    return
+}
+
 
 func.func @main() {
   %0 = arith.constant 0 : index
@@ -127,5 +138,9 @@ func.func @main() {
   func.call @subview_zero_size_dim(%alloca_10x4x1_dyn_stride, %dim_0, %dim_1, %dim_2)
                                         : (memref<10x4x1xf32, strided<[?, ?, ?], offset: ?>>, index, index, index) -> ()
 
+  // CHECK-NOT: ERROR: Runtime op verification failed
+  %offset = arith.constant 10 : index
+  func.call @subview_with_empty_slice(%alloca_10x4x1_dyn_stride, %dim_0, %dim_1, %dim_2, %offset)
+                                        : (memref<10x4x1xf32, strided<[?, ?, ?], offset: ?>>, index, index, index, index) -> ()
   return
 }

From 81964597f9918e1f294f5b9de27ee662005b8c58 Mon Sep 17 00:00:00 2001
From: Hanumanth <hhanuman@mathworks.com>
Date: Tue, 11 Nov 2025 18:37:15 -0500
Subject: [PATCH 04/32] [mlir][tensor] Fix runtime verification for
 tensor.extract_slice for empty tensor slices  (#166569)

I hit another runtime verification issue (similar to
https://github.com/llvm/llvm-project/pull/164878) while working with
TFLite models. The verifier is incorrectly rejecting
`tensor.extract_slice` operations when extracting an empty slice
(size=0) that starts exactly at the tensor boundary.

The current runtime verification unconditionally enforces `offset <
dim_size`. This makes sense for non-empty slices, but it's too strict
for empty slices, causing false positives that lead to spurious runtime
assertions.

**Simple example that demonstrates the issue:**

```mlir
func.func @extract_empty_slice(%tensor: tensor<?xf32>, %offset: index, %size: index) {
  // When called with: tensor size=10, offset=10, size=0
  // Runtime verification fails: "offset 0 is out-of-bounds"
  %slice = tensor.extract_slice %tensor[%offset] [%size] [1]
    : tensor<?xf32> to tensor<?xf32>
  return
}
```

For the above example, the check evaluates `10 < 10` which is false, so
verification fails. However, I believe this operation should be valid -
we're extracting zero elements, so there's no actual out-of-bounds
access.

**Real-world repro from the TensorFlow Lite models:**

This issue manifests while lowering TFLite models and a lot of our
system tests are failing due to this. Here's a simplified version
showing the problematic pattern:

In this code, `%extracted_slice_0` becomes an empty tensor when SSA
value `%15` reaches 10 (on the final loop iteration), making `%16 = 0`.
The operation extracts zero elements along dimension 0, which is
semantically valid but fails runtime verification.

```mlir
func.func @simplified_repro_from_tensorflowlite_model(%arg0: tensor<10x4x1xf32>) -> tensor<10x4x1xf32> {
  %c0 = arith.constant 0 : index
  %c1 = arith.constant 1 : index
  %c2 = arith.constant 2 : index
  %c10 = arith.constant 10 : index
  %c-1 = arith.constant -1 : index

  %0 = "tosa.const"() <{values = dense<0> : tensor<i32>}> : () -> tensor<i32>
  %1 = "tosa.const"() <{values = dense<1> : tensor<i32>}> : () -> tensor<i32>
  %2 = "tosa.const"() <{values = dense<10> : tensor<i32>}> : () -> tensor<i32>
  %3 = "tosa.const"() <{values = dense<-1> : tensor<2xi32>}> : () -> tensor<2xi32>
  %4 = "tosa.const"() <{values = dense<0> : tensor<2xi32>}> : () -> tensor<2xi32>
  %5 = "tosa.const"() <{values = dense<0.000000e+00> : tensor<1x4x1xf32>}> : () -> tensor<1x4x1xf32>
  %c4_1 = tosa.const_shape  {values = dense<1> : tensor<1xindex>} : () -> !tosa.shape<1>

  %6:2 = scf.while (%arg1 = %0, %arg2 = %arg0)
    : (tensor<i32>, tensor<10x4x1xf32>) -> (tensor<i32>, tensor<10x4x1xf32>) {
    %7 = tosa.greater %2, %arg1 : (tensor<i32>, tensor<i32>) -> tensor<i1>
    %extracted = tensor.extract %7[] : tensor<i1>
    scf.condition(%extracted) %arg1, %arg2 : tensor<i32>, tensor<10x4x1xf32>
  } do {
  ^bb0(%arg1: tensor<i32>, %arg2: tensor<10x4x1xf32>):
    %7 = tosa.add %arg1, %1 : (tensor<i32>, tensor<i32>) -> tensor<i32>

    // First slice
    %8 = tosa.reshape %arg1, %c4_1 : (tensor<i32>, !tosa.shape<1>) -> tensor<1xi32>
    %9 = tosa.concat %8, %3 {axis = 0 : i32} : (tensor<1xi32>, tensor<2xi32>) -> tensor<3xi32>

    %extracted_0 = tensor.extract %9[%c0] : tensor<3xi32>
    %10 = index.casts %extracted_0 : i32 to index
    %11 = arith.cmpi eq, %10, %c-1 : index
    %12 = arith.select %11, %c10, %10 : index

    %extracted_slice = tensor.extract_slice %arg2[0, 0, 0] [%12, 4, 1] [1, 1, 1]
      : tensor<10x4x1xf32> to tensor<?x4x1xf32>

    // Second slice - this is where the failure occurs
    %13 = tosa.reshape %7, %c4_1 : (tensor<i32>, !tosa.shape<1>) -> tensor<1xi32>
    %14 = tosa.concat %13, %4 {axis = 0 : i32} : (tensor<1xi32>, tensor<2xi32>) -> tensor<3xi32>

    %extracted_1 = tensor.extract %14[%c0] : tensor<3xi32>
    %15 = index.castu %extracted_1 : i32 to index
    %16 = arith.subi %c10, %15 : index  // size = 10 - offset

    %extracted_2 = tensor.extract %14[%c1] : tensor<3xi32>
    %17 = index.castu %extracted_2 : i32 to index

    %extracted_3 = tensor.extract %14[%c2] : tensor<3xi32>
    %18 = index.castu %extracted_3 : i32 to index

    // On the last loop iteration: %15=10, %16=0
    // %extracted_slice_0 becomes an empty tensor
    // Runtime verification fails: "offset 0 is out-of-bounds"
    %extracted_slice_0 = tensor.extract_slice %arg2[%15, %17, %18] [%16, 4, 1] [1, 1, 1]
      : tensor<10x4x1xf32> to tensor<?x4x1xf32>

    %19 = tosa.concat %extracted_slice, %5, %extracted_slice_0 {axis = 0 : i32}
      : (tensor<?x4x1xf32>, tensor<1x4x1xf32>, tensor<?x4x1xf32>) -> tensor<10x4x1xf32>

    scf.yield %7, %19 : tensor<i32>, tensor<10x4x1xf32>
  }

  return %6#1 : tensor<10x4x1xf32>
}
```
**The fix:**

Make the offset check conditional on slice size:
- Empty slice (size == 0): allow `0 <= offset <= dim_size`
- Non-empty slice (size > 0): require `0 <= offset < dim_size`


**Question for reviewers:**
Should we also relax the static verifier to allow this edge case?
Currently, the static verifier rejects the following IR:

```mlir
%tensor = arith.constant dense<1.0> : tensor<10xf32>
%slice = tensor.extract_slice %tensor[10] [0] [1] : tensor<10xf32> to tensor<0xf32>
```
Since we're allowing it at runtime for dynamic shapes, it seems
inconsistent to reject it statically. However, I wanted to get feedback
before making that change - this PR focuses only on the runtime
verification fix for dynamic shapes.

P.S. We have a similar issue with `memref.subview`. I will send a
separate patch for the issue.

Co-authored-by: Hanumanth Hanumantharayappa <hhanuman@ah-hhanuman-l.dhcp.mathworks.com>
---
 .../Transforms/RuntimeOpVerification.cpp      | 85 ++++++++++++-------
 .../extract_slice-runtime-verification.mlir   |  9 ++
 2 files changed, 61 insertions(+), 33 deletions(-)

diff --git a/mlir/lib/Dialect/Tensor/Transforms/RuntimeOpVerification.cpp b/mlir/lib/Dialect/Tensor/Transforms/RuntimeOpVerification.cpp
index 753cb95b1c906..d35f458cbdb36 100644
--- a/mlir/lib/Dialect/Tensor/Transforms/RuntimeOpVerification.cpp
+++ b/mlir/lib/Dialect/Tensor/Transforms/RuntimeOpVerification.cpp
@@ -155,13 +155,15 @@ struct ExtractSliceOpInterface
     RankedTensorType sourceType = extractSliceOp.getSource().getType();
 
     // For each dimension, assert that:
-    // 0 <= offset < dim_size
-    // 0 <= offset + (size - 1) * stride < dim_size
+    // For empty slices (size == 0)   : 0 <= offset <= dim_size
+    // For non-empty slices (size > 0): 0 <= offset < dim_size
+    //                                  0 <= offset + (size - 1) * stride <
+    //                                  dim_size
     Value zero = arith::ConstantIndexOp::create(builder, loc, 0);
     Value one = arith::ConstantIndexOp::create(builder, loc, 1);
 
     for (int64_t i : llvm::seq<int64_t>(0, sourceType.getRank())) {
-      // Reset insertion point to before the operation for each dimension
+
       builder.setInsertionPoint(extractSliceOp);
 
       Value offset = getValueOrCreateConstantIndexOp(
@@ -170,46 +172,63 @@ struct ExtractSliceOpInterface
           builder, loc, extractSliceOp.getMixedSizes()[i]);
       Value stride = getValueOrCreateConstantIndexOp(
           builder, loc, extractSliceOp.getMixedStrides()[i]);
-
-      // Verify that offset is in-bounds.
       Value dimSize = builder.createOrFold<tensor::DimOp>(
           loc, extractSliceOp.getSource(), i);
-      Value offsetInBounds =
-          generateInBoundsCheck(builder, loc, offset, zero, dimSize);
-      cf::AssertOp::create(builder, loc, offsetInBounds,
+
+      // Verify that offset is in-bounds (conditional on slice size).
+      Value sizeIsZero = arith::CmpIOp::create(
+          builder, loc, arith::CmpIPredicate::eq, size, zero);
+      auto offsetCheckIf = scf::IfOp::create(
+          builder, loc, sizeIsZero,
+          [&](OpBuilder &b, Location loc) {
+            // For empty slices, offset can be at the boundary: 0 <= offset <=
+            // dimSize.
+            Value offsetGEZero = arith::CmpIOp::create(
+                b, loc, arith::CmpIPredicate::sge, offset, zero);
+            Value offsetLEDimSize = arith::CmpIOp::create(
+                b, loc, arith::CmpIPredicate::sle, offset, dimSize);
+            Value emptyOffsetValid =
+                arith::AndIOp::create(b, loc, offsetGEZero, offsetLEDimSize);
+            scf::YieldOp::create(b, loc, emptyOffsetValid);
+          },
+          [&](OpBuilder &b, Location loc) {
+            // For non-empty slices, offset must be a valid index: 0 <= offset <
+            // dimSize.
+            Value offsetInBounds =
+                generateInBoundsCheck(b, loc, offset, zero, dimSize);
+            scf::YieldOp::create(b, loc, offsetInBounds);
+          });
+
+      Value offsetCondition = offsetCheckIf.getResult(0);
+      cf::AssertOp::create(builder, loc, offsetCondition,
                            generateErrorMessage(op, "offset " +
                                                         std::to_string(i) +
                                                         " is out-of-bounds"));
 
-      // Only verify if size > 0
+      // Verify that the slice endpoint is in-bounds (only for non-empty
+      // slices).
       Value sizeIsNonZero = arith::CmpIOp::create(
           builder, loc, arith::CmpIPredicate::sgt, size, zero);
+      auto ifOp = scf::IfOp::create(
+          builder, loc, sizeIsNonZero,
+          [&](OpBuilder &b, Location loc) {
+            // Verify that slice does not run out-of-bounds.
+            Value sizeMinusOne = arith::SubIOp::create(b, loc, size, one);
+            Value sizeMinusOneTimesStride =
+                arith::MulIOp::create(b, loc, sizeMinusOne, stride);
+            Value lastPos =
+                arith::AddIOp::create(b, loc, offset, sizeMinusOneTimesStride);
+            Value lastPosInBounds =
+                generateInBoundsCheck(b, loc, lastPos, zero, dimSize);
+            scf::YieldOp::create(b, loc, lastPosInBounds);
+          },
+          [&](OpBuilder &b, Location loc) {
+            Value trueVal =
+                arith::ConstantOp::create(b, loc, b.getBoolAttr(true));
+            scf::YieldOp::create(b, loc, trueVal);
+          });
 
-      auto ifOp = scf::IfOp::create(builder, loc, builder.getI1Type(),
-                                    sizeIsNonZero, /*withElseRegion=*/true);
-
-      // Populate the "then" region (for size > 0).
-      builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
-
-      // Verify that slice does not run out-of-bounds.
-      Value sizeMinusOne = arith::SubIOp::create(builder, loc, size, one);
-      Value sizeMinusOneTimesStride =
-          arith::MulIOp::create(builder, loc, sizeMinusOne, stride);
-      Value lastPos =
-          arith::AddIOp::create(builder, loc, offset, sizeMinusOneTimesStride);
-      Value lastPosInBounds =
-          generateInBoundsCheck(builder, loc, lastPos, zero, dimSize);
-      scf::YieldOp::create(builder, loc, lastPosInBounds);
-
-      // Populate the "else" region (for size == 0).
-      builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
-      Value trueVal =
-          arith::ConstantOp::create(builder, loc, builder.getBoolAttr(true));
-      scf::YieldOp::create(builder, loc, trueVal);
-
-      builder.setInsertionPointAfter(ifOp);
       Value finalCondition = ifOp.getResult(0);
-
       cf::AssertOp::create(
           builder, loc, finalCondition,
           generateErrorMessage(
diff --git a/mlir/test/Integration/Dialect/Tensor/extract_slice-runtime-verification.mlir b/mlir/test/Integration/Dialect/Tensor/extract_slice-runtime-verification.mlir
index a77fa310a3699..745eea37f7fca 100644
--- a/mlir/test/Integration/Dialect/Tensor/extract_slice-runtime-verification.mlir
+++ b/mlir/test/Integration/Dialect/Tensor/extract_slice-runtime-verification.mlir
@@ -39,6 +39,11 @@ func.func @extract_slice_zero_size_dim(%arg0: tensor<10x4x1xf32>, %dim_0: index,
     return
 }
 
+func.func @extract_slice_empty_tensor(%arg0: tensor<10x4x1xf32>, %dim_0: index, %dim_1: index, %dim_2: index, %offset: index) {
+    tensor.extract_slice %arg0[%offset, 0, 0] [%dim_0, %dim_1, %dim_2] [1, 1, 1] : tensor<10x4x1xf32> to tensor<?x?x?xf32>
+    return
+}
+
 
 func.func @main() {
   %0 = arith.constant 0 : index
@@ -115,5 +120,9 @@ func.func @main() {
   %dim_2 = arith.constant 1 : index
   func.call @extract_slice_zero_size_dim(%cst10x4x1xf32, %dim_0, %dim_1, %dim_2) : (tensor<10x4x1xf32>, index, index, index) -> ()
 
+  // CHECK-NOT: ERROR: Runtime op verification failed
+  %offset = arith.constant 10 : index  
+  func.call @extract_slice_empty_tensor(%cst10x4x1xf32, %dim_0, %dim_1, %dim_2, %offset) : (tensor<10x4x1xf32>, index, index, index, index) -> ()
+
   return
 }

From b07f8b0d98d9679ef2af91dc2a8fa8d7155157e6 Mon Sep 17 00:00:00 2001
From: anoopkg6 <anoop.kumar6@ibm.com>
Date: Tue, 11 Nov 2025 17:46:30 -0600
Subject: [PATCH 05/32] [JITLINK] Fix large offset issue (#167600)

Removed large offset test. It caused issue with ARM 32-bit because of
large offset.

Co-authored-by: anoopkg6 <anoopkg6@github.com>
---
 .../JITLink/systemz/ELF_systemz_reloc_pcdbl.s | 30 +++++++------------
 1 file changed, 10 insertions(+), 20 deletions(-)

diff --git a/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_reloc_pcdbl.s b/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_reloc_pcdbl.s
index efe8357e76bef..6a7ca8bd6e2a6 100644
--- a/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_reloc_pcdbl.s
+++ b/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_reloc_pcdbl.s
@@ -1,33 +1,34 @@
 # REQUIRES: system-linux
 # RUN: llvm-mc -triple=systemz-unknown-linux -mcpu=z16 -position-independent \
 # RUN:         -defsym OFF12=0xffe -defsym OFF16=4 -defsym OFF24=6 \
-# RUN:         -defsym OFF32=6 -filetype=obj -o %t.o %s
+# RUN:         -filetype=obj -o %t.o %s
 #
 # RUN: llvm-jitlink -noexec -abs OFF12=0xffe -abs OFF16=4 -abs OFF24=6 \
-# RUN:                      -abs OFF32=6 -check=%s %t.o
+# RUN:                      -check=%s %t.o
 #
 # RUN: llvm-mc -triple=systemz-unknown-linux -mcpu=z16 -position-independent \
 # RUN:         -defsym OFF12=6 -defsym OFF16=0xfffe -defsym OFF24=6 \
-# RUN:         -defsym OFF32=6 -filetype=obj -o %t.o %s
+# RUN:         -filetype=obj -o %t.o %s
 #
 # RUN: llvm-jitlink -noexec -abs OFF12=6 -abs OFF16=0xfffe -abs OFF24=6 \
-# RUN:                      -abs OFF32=6 -check=%s %t.o
+# RUN:                      -check=%s %t.o
 #
 # RUN: llvm-mc -triple=systemz-unknown-linux -mcpu=z16 -position-independent \
 # RUN:         -defsym OFF12=6 -defsym OFF16=4 -defsym OFF24=0xfffffe \
-# RUN:         -defsym OFF32=6 -filetype=obj -o %t.o %s
+# RUN:         -filetype=obj -o %t.o %s
 #
 # RUN: llvm-jitlink -noexec -abs OFF12=6 -abs OFF16=4 -abs OFF24=0xfffffe \
-# RUN:                      -abs OFF32=6 -check=%s %t.o
+# RUN:                      -check=%s %t.o
 #
 # RUN: llvm-mc -triple=systemz-unknown-linux -mcpu=z16 -position-independent \
 # RUN:         -defsym OFF12=6 -defsym OFF16=4 -defsym OFF24=6 \
-# RUN:         -defsym OFF32=0xffffffc8 -filetype=obj -o %t.o %s
+# RUN:         -filetype=obj -o %t.o %s
 #
 # RUN: llvm-jitlink -noexec -abs OFF12=6 -abs OFF16=4 -abs OFF24=6 \
-# RUN:                      -abs OFF32=0xffffffc8 -check=%s %t.o
+# RUN:                      -check=%s %t.o
 
-# Check R_390_PC*dbl relocations.
+# Check R_390_PC*dbl relocations.  R_390_PC32_DBL test is in 
+# ELF_systemz_reloc_abs32.s because of large offset. 
 
         .text
         .section        .text.main
@@ -49,17 +50,6 @@ test_pc16dbl:
         jne  test_pc16dbl 
         .size test_pc16dbl,.-test_pc16dbl
 
-# R_390_PC32DBL
-# jitlink-check: *{4}(test_pc32dbl + 2) = (OFF32 >> 1)
-        .globl  test_pc32dbl
-        .p2align 3 
-test_pc32dbl:
-        jge   .Lpc32dbl
-	.space OFF32 - 6 
-.Lpc32dbl:
-        jgne  test_pc32dbl
-        .size test_pc32dbl,.-test_pc32dbl
-
 # R_390_PC12DBL
 # jitlink-check: ((*{2} (test_pc12dbl + 1)) & 0x0fff) = (OFF12 >> 1)
         .globl  test_pc12dbl

From ce175995533c7407a00d72633f867bf93d4d4499 Mon Sep 17 00:00:00 2001
From: Asher Mancinelli <ashermancinelli@gmail.com>
Date: Tue, 11 Nov 2025 15:49:45 -0800
Subject: [PATCH 06/32] [MLIR][Python] Add wrappers for scf.index_switch
 (#167458)

The C++ index switch op has utilities for `getCaseBlock(int i)` and
`getDefaultBlock()`, so these have been added.
Optional body builder args have been added: one for the default case and
one for the switch cases.
---
 mlir/python/mlir/dialects/scf.py |  75 ++++++++++++++++++
 mlir/test/python/dialects/scf.py | 126 ++++++++++++++++++++++++++++++-
 mlir/test/python/ir/operation.py |   4 +-
 3 files changed, 198 insertions(+), 7 deletions(-)

diff --git a/mlir/python/mlir/dialects/scf.py b/mlir/python/mlir/dialects/scf.py
index 678ceeebac204..9e22df3dd50a9 100644
--- a/mlir/python/mlir/dialects/scf.py
+++ b/mlir/python/mlir/dialects/scf.py
@@ -12,6 +12,7 @@
     from ._ods_common import (
         get_op_result_or_value as _get_op_result_or_value,
         get_op_results_or_values as _get_op_results_or_values,
+        get_op_result_or_op_results as _get_op_result_or_op_results,
         _cext as _ods_cext,
     )
 except ImportError as e:
@@ -254,3 +255,77 @@ def for_(
             yield iv, iter_args[0], for_op.results[0]
         else:
             yield iv
+
+
+@_ods_cext.register_operation(_Dialect, replace=True)
+class IndexSwitchOp(IndexSwitchOp):
+    __doc__ = IndexSwitchOp.__doc__
+
+    def __init__(
+        self,
+        results,
+        arg,
+        cases,
+        case_body_builder=None,
+        default_body_builder=None,
+        loc=None,
+        ip=None,
+    ):
+        cases = DenseI64ArrayAttr.get(cases)
+        super().__init__(
+            results, arg, cases, num_caseRegions=len(cases), loc=loc, ip=ip
+        )
+        for region in self.regions:
+            region.blocks.append()
+
+        if default_body_builder is not None:
+            with InsertionPoint(self.default_block):
+                default_body_builder(self)
+
+        if case_body_builder is not None:
+            for i, case in enumerate(cases):
+                with InsertionPoint(self.case_block(i)):
+                    case_body_builder(self, i, self.cases[i])
+
+    @property
+    def default_region(self) -> Region:
+        return self.regions[0]
+
+    @property
+    def default_block(self) -> Block:
+        return self.default_region.blocks[0]
+
+    @property
+    def case_regions(self) -> Sequence[Region]:
+        return self.regions[1:]
+
+    def case_region(self, i: int) -> Region:
+        return self.case_regions[i]
+
+    @property
+    def case_blocks(self) -> Sequence[Block]:
+        return [region.blocks[0] for region in self.case_regions]
+
+    def case_block(self, i: int) -> Block:
+        return self.case_regions[i].blocks[0]
+
+
+def index_switch(
+    results,
+    arg,
+    cases,
+    case_body_builder=None,
+    default_body_builder=None,
+    loc=None,
+    ip=None,
+) -> Union[OpResult, OpResultList, IndexSwitchOp]:
+    op = IndexSwitchOp(
+        results=results,
+        arg=arg,
+        cases=cases,
+        case_body_builder=case_body_builder,
+        default_body_builder=default_body_builder,
+        loc=loc,
+        ip=ip,
+    )
+    return _get_op_result_or_op_results(op)
diff --git a/mlir/test/python/dialects/scf.py b/mlir/test/python/dialects/scf.py
index 62d11d5e189c8..0c0c9b986562b 100644
--- a/mlir/test/python/dialects/scf.py
+++ b/mlir/test/python/dialects/scf.py
@@ -1,10 +1,14 @@
 # RUN: %PYTHON %s | FileCheck %s
 
 from mlir.ir import *
-from mlir.dialects import arith
-from mlir.dialects import func
-from mlir.dialects import memref
-from mlir.dialects import scf
+from mlir.extras import types as T
+from mlir.dialects import (
+    arith,
+    func,
+    memref,
+    scf,
+    cf,
+)
 from mlir.passmanager import PassManager
 
 
@@ -355,3 +359,117 @@ def simple_if_else(cond):
 # CHECK:   scf.yield %[[TWO]], %[[THREE]]
 # CHECK: arith.addi %[[RET]]#0, %[[RET]]#1
 # CHECK: return
+
+
+@constructAndPrintInModule
+def testIndexSwitch():
+    i32 = T.i32()
+
+    @func.FuncOp.from_py_func(T.index(), results=[i32])
+    def index_switch(index):
+        c1 = arith.constant(i32, 1)
+        c0 = arith.constant(i32, 0)
+        value = arith.constant(i32, 5)
+        switch_op = scf.IndexSwitchOp([i32], index, range(3))
+
+        assert switch_op.regions[0] == switch_op.default_region
+        assert switch_op.regions[1] == switch_op.case_regions[0]
+        assert switch_op.regions[1] == switch_op.case_region(0)
+        assert len(switch_op.case_regions) == 3
+        assert len(switch_op.regions) == 4
+
+        with InsertionPoint(switch_op.default_block):
+            cf.assert_(arith.constant(T.bool(), 0), "Whoops!")
+            scf.yield_([c1])
+
+        for i, block in enumerate(switch_op.case_blocks):
+            with InsertionPoint(block):
+                scf.yield_([arith.constant(i32, i)])
+
+        func.return_([switch_op.results[0]])
+
+    return index_switch
+
+
+# CHECK-LABEL:   func.func @index_switch(
+# CHECK-SAME:      %[[ARG0:.*]]: index) -> i32 {
+# CHECK:           %[[CONSTANT_0:.*]] = arith.constant 1 : i32
+# CHECK:           %[[CONSTANT_1:.*]] = arith.constant 0 : i32
+# CHECK:           %[[CONSTANT_2:.*]] = arith.constant 5 : i32
+# CHECK:           %[[INDEX_SWITCH_0:.*]] = scf.index_switch %[[ARG0]] -> i32
+# CHECK:           case 0 {
+# CHECK:             %[[CONSTANT_3:.*]] = arith.constant 0 : i32
+# CHECK:             scf.yield %[[CONSTANT_3]] : i32
+# CHECK:           }
+# CHECK:           case 1 {
+# CHECK:             %[[CONSTANT_4:.*]] = arith.constant 1 : i32
+# CHECK:             scf.yield %[[CONSTANT_4]] : i32
+# CHECK:           }
+# CHECK:           case 2 {
+# CHECK:             %[[CONSTANT_5:.*]] = arith.constant 2 : i32
+# CHECK:             scf.yield %[[CONSTANT_5]] : i32
+# CHECK:           }
+# CHECK:           default {
+# CHECK:             %[[CONSTANT_6:.*]] = arith.constant false
+# CHECK:             cf.assert %[[CONSTANT_6]], "Whoops!"
+# CHECK:             scf.yield %[[CONSTANT_0]] : i32
+# CHECK:           }
+# CHECK:           return %[[INDEX_SWITCH_0]] : i32
+# CHECK:         }
+
+
+@constructAndPrintInModule
+def testIndexSwitchWithBodyBuilders():
+    i32 = T.i32()
+
+    @func.FuncOp.from_py_func(T.index(), results=[i32])
+    def index_switch(index):
+        c1 = arith.constant(i32, 1)
+        c0 = arith.constant(i32, 0)
+        value = arith.constant(i32, 5)
+
+        def default_body_builder(switch_op):
+            cf.assert_(arith.constant(T.bool(), 0), "Whoops!")
+            scf.yield_([c1])
+
+        def case_body_builder(switch_op, case_index: int, case_value: int):
+            scf.yield_([arith.constant(i32, case_value)])
+
+        result = scf.index_switch(
+            results=[i32],
+            arg=index,
+            cases=range(3),
+            case_body_builder=case_body_builder,
+            default_body_builder=default_body_builder,
+        )
+
+        func.return_([result])
+
+    return index_switch
+
+
+# CHECK-LABEL:   func.func @index_switch(
+# CHECK-SAME:      %[[ARG0:.*]]: index) -> i32 {
+# CHECK:           %[[CONSTANT_0:.*]] = arith.constant 1 : i32
+# CHECK:           %[[CONSTANT_1:.*]] = arith.constant 0 : i32
+# CHECK:           %[[CONSTANT_2:.*]] = arith.constant 5 : i32
+# CHECK:           %[[INDEX_SWITCH_0:.*]] = scf.index_switch %[[ARG0]] -> i32
+# CHECK:           case 0 {
+# CHECK:             %[[CONSTANT_3:.*]] = arith.constant 0 : i32
+# CHECK:             scf.yield %[[CONSTANT_3]] : i32
+# CHECK:           }
+# CHECK:           case 1 {
+# CHECK:             %[[CONSTANT_4:.*]] = arith.constant 1 : i32
+# CHECK:             scf.yield %[[CONSTANT_4]] : i32
+# CHECK:           }
+# CHECK:           case 2 {
+# CHECK:             %[[CONSTANT_5:.*]] = arith.constant 2 : i32
+# CHECK:             scf.yield %[[CONSTANT_5]] : i32
+# CHECK:           }
+# CHECK:           default {
+# CHECK:             %[[CONSTANT_6:.*]] = arith.constant false
+# CHECK:             cf.assert %[[CONSTANT_6]], "Whoops!"
+# CHECK:             scf.yield %[[CONSTANT_0]] : i32
+# CHECK:           }
+# CHECK:           return %[[INDEX_SWITCH_0]] : i32
+# CHECK:         }
diff --git a/mlir/test/python/ir/operation.py b/mlir/test/python/ir/operation.py
index 1bdd345d98c05..66ba5d28e49b2 100644
--- a/mlir/test/python/ir/operation.py
+++ b/mlir/test/python/ir/operation.py
@@ -1212,9 +1212,7 @@ def testIndexSwitch():
             @func.FuncOp.from_py_func(T.index())
             def index_switch(index):
                 c1 = arith.constant(i32, 1)
-                switch_op = scf.IndexSwitchOp(
-                    results_=[i32], arg=index, cases=range(3), num_caseRegions=3
-                )
+                switch_op = scf.IndexSwitchOp(results=[i32], arg=index, cases=range(3))
 
                 assert len(switch_op.regions) == 4
                 assert len(switch_op.regions[2:]) == 2

From 95dfe79cac47f2a4280b5b67e193c94c412ae600 Mon Sep 17 00:00:00 2001
From: Prabhu Rajasekaran <prabhukr@google.com>
Date: Tue, 11 Nov 2025 16:04:39 -0800
Subject: [PATCH 07/32] [MachO] Fix test failure. (#167598)

Add requires to not run `invalid-section-index.s` test in non aarch64
supported environments.
---
 llvm/test/MC/MachO/invalid-section-index.s | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/llvm/test/MC/MachO/invalid-section-index.s b/llvm/test/MC/MachO/invalid-section-index.s
index 55a0ce5b40ea7..104e8a82e43af 100644
--- a/llvm/test/MC/MachO/invalid-section-index.s
+++ b/llvm/test/MC/MachO/invalid-section-index.s
@@ -1,6 +1,8 @@
+// REQUIRES: aarch64-registered-target
+
 /// Test that when there are more than 255 sections, error is shown specifying too many sections.
 
-// RUN: not llvm-mc -filetype=obj -triple arm64-apple-macos %s -o - 2>&1 | FileCheck %s --check-prefix=MACHOERROR
+// RUN: not llvm-mc -filetype=obj -triple arm64-apple-darwin %s -o - 2>&1 | FileCheck %s --check-prefix=MACHOERROR
 
 // MACHOERROR: error: Too many sections!
 // MACHOERROR-NEXT: error: Invalid section index!

From fcba3040107944604904aeb146c26ec0628160f4 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Tue, 11 Nov 2025 16:05:05 -0800
Subject: [PATCH 08/32] AMDGPU: Remove override of TargetInstrInfo::getRegClass
 (#159886)

This should not be overridable and the special case hacks
have been replaced with RegClassByHwMode
---
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 12 ------------
 llvm/lib/Target/AMDGPU/SIInstrInfo.h   |  3 ---
 2 files changed, 15 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 4c4625b8834ee..6e39a2de9b805 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -6023,18 +6023,6 @@ SIInstrInfo::getWholeWaveFunctionSetup(MachineFunction &MF) const {
   llvm_unreachable("Couldn't find SI_SETUP_WHOLE_WAVE_FUNC instruction");
 }
 
-// FIXME: This should not be an overridable function. All subtarget dependent
-// operand modifications should go through isLookupRegClassByHwMode in the
-// generic handling.
-const TargetRegisterClass *SIInstrInfo::getRegClass(const MCInstrDesc &TID,
-                                                    unsigned OpNum) const {
-  if (OpNum >= TID.getNumOperands())
-    return nullptr;
-  const MCOperandInfo &OpInfo = TID.operands()[OpNum];
-  int16_t RegClass = getOpRegClassID(OpInfo);
-  return RegClass < 0 ? nullptr : RI.getRegClass(RegClass);
-}
-
 const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI,
                                                       unsigned OpNo) const {
   const MCInstrDesc &Desc = get(MI.getOpcode());
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index c048b85b1e99a..2ecd94186e1e0 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -1619,9 +1619,6 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
   /// Return true if this opcode should not be used by codegen.
   bool isAsmOnlyOpcode(int MCOp) const;
 
-  const TargetRegisterClass *getRegClass(const MCInstrDesc &TID,
-                                         unsigned OpNum) const override;
-
   void fixImplicitOperands(MachineInstr &MI) const;
 
   MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,

From 362119d03cb5156599825cd43ceff65b4dd54068 Mon Sep 17 00:00:00 2001
From: LLVM GN Syncbot <llvmgnsyncbot@gmail.com>
Date: Wed, 12 Nov 2025 00:11:10 +0000
Subject: [PATCH 09/32] [gn build] Port 5c3323a59fd2

---
 llvm/utils/gn/secondary/libcxx/include/BUILD.gn | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn
index b36466b3fac8b..3ca6b7fa0565e 100644
--- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn
+++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn
@@ -1712,7 +1712,6 @@ if (current_toolchain == default_toolchain) {
       "sstream",
       "stack",
       "stdatomic.h",
-      "stdbool.h",
       "stddef.h",
       "stdexcept",
       "stdio.h",

From 2acd65288696c854028d2e61482e0b884af44062 Mon Sep 17 00:00:00 2001
From: jeffreytan81 <jeffreytan@meta.com>
Date: Tue, 11 Nov 2025 16:31:57 -0800
Subject: [PATCH 10/32] Fix lldb-dap non-leaf frame source resolution issue
 (#165944)

Summary
-------

While dogfooding lldb-dap, I observed that VSCode frequently displays
certain stack frames as greyed out. Although these frames have valid
debug information, double-clicking them shows disassembly instead of
source code. However, running `bt` from the LLDB command line correctly
displays source file and line information for these same frames,
indicating this is an lldb-dap specific issue.

Root Cause
----------

Investigation revealed that `DAP::ResolveSource()` incorrectly uses a
frame's PC address directly to determine whether valid source line
information exists. This approach works for leaf frames, but fails for
non-leaf (caller) frames where the PC points to the return address
immediately after a call instruction. This return address may fall into
compiler-generated code with no associated line information, even though
the actual call site has valid source location data.

The correct approach is to use the symbol context's line entry, which
LLDB resolves by effectively checking PC-1 for non-leaf frames, properly
identifying the line information for the call instruction rather than
the return address.

Testing
-------

Manually tested with VSCode debugging sessions on production workloads.
Verified that non-leaf frames now correctly display source code instead
of disassembly view.

Before the change symptom:
<img width="1013" height="216" alt="image"
src="https://github.com/user-attachments/assets/9487fbc0-f438-4892-a8d2-1437dc25399b"
/>


And here is after the fix:
<img width="1068" height="198" alt="image"
src="https://github.com/user-attachments/assets/0d2ebaa7-cca6-4983-a1d1-1a26ae62c86f"
/>

---------

Co-authored-by: Jeffrey Tan <jeffreytan@fb.com>
---
 .../stackTraceCompilerGeneratedCode/Makefile  |  3 +
 ...TestDAP_stackTraceCompilerGeneratedCode.py | 66 +++++++++++++++++++
 .../stackTraceCompilerGeneratedCode/main.c    | 19 ++++++
 lldb/tools/lldb-dap/DAP.cpp                   | 12 ++--
 lldb/tools/lldb-dap/ProtocolUtils.cpp         |  7 +-
 lldb/tools/lldb-dap/ProtocolUtils.h           |  3 +-
 6 files changed, 100 insertions(+), 10 deletions(-)
 create mode 100644 lldb/test/API/tools/lldb-dap/stackTraceCompilerGeneratedCode/Makefile
 create mode 100644 lldb/test/API/tools/lldb-dap/stackTraceCompilerGeneratedCode/TestDAP_stackTraceCompilerGeneratedCode.py
 create mode 100644 lldb/test/API/tools/lldb-dap/stackTraceCompilerGeneratedCode/main.c

diff --git a/lldb/test/API/tools/lldb-dap/stackTraceCompilerGeneratedCode/Makefile b/lldb/test/API/tools/lldb-dap/stackTraceCompilerGeneratedCode/Makefile
new file mode 100644
index 0000000000000..10495940055b6
--- /dev/null
+++ b/lldb/test/API/tools/lldb-dap/stackTraceCompilerGeneratedCode/Makefile
@@ -0,0 +1,3 @@
+C_SOURCES := main.c
+
+include Makefile.rules
diff --git a/lldb/test/API/tools/lldb-dap/stackTraceCompilerGeneratedCode/TestDAP_stackTraceCompilerGeneratedCode.py b/lldb/test/API/tools/lldb-dap/stackTraceCompilerGeneratedCode/TestDAP_stackTraceCompilerGeneratedCode.py
new file mode 100644
index 0000000000000..4ddf92402ad8a
--- /dev/null
+++ b/lldb/test/API/tools/lldb-dap/stackTraceCompilerGeneratedCode/TestDAP_stackTraceCompilerGeneratedCode.py
@@ -0,0 +1,66 @@
+"""
+Test lldb-dap stackTrace request for compiler generated code
+"""
+
+import os
+
+import lldbdap_testcase
+from lldbsuite.test.decorators import *
+from lldbsuite.test.lldbtest import *
+
+
+class TestDAP_stackTraceCompilerGeneratedCode(lldbdap_testcase.DAPTestCaseBase):
+    def test_non_leaf_frame_compiler_generate_code(self):
+        """
+        Test that non-leaf frames with compiler-generated code are properly resolved.
+
+        This test verifies that LLDB correctly handles stack frames containing
+        compiler-generated code (code without valid source location information).
+        When a non-leaf frame contains compiler-generated code immediately after a
+        call instruction, LLDB should resolve the frame's source location to the
+        call instruction's line, rather than to the compiler-generated code that
+        follows, which lacks proper symbolication information.
+        """
+        program = self.getBuildArtifact("a.out")
+        self.build_and_launch(program)
+        source = "main.c"
+
+        # Set breakpoint inside bar() function
+        lines = [line_number(source, "// breakpoint here")]
+        breakpoint_ids = self.set_source_breakpoints(source, lines)
+        self.assertEqual(
+            len(breakpoint_ids), len(lines), "expect correct number of breakpoints"
+        )
+
+        self.continue_to_breakpoints(breakpoint_ids)
+
+        # Get the stack frames: [0] = bar(), [1] = foo(), [2] = main()
+        stack_frames = self.get_stackFrames()
+        self.assertGreater(len(stack_frames), 2, "Expected more than 2 stack frames")
+
+        # Examine the foo() frame (stack_frames[1])
+        # This is the critical frame containing compiler-generated code
+        foo_frame = stack_frames[1]
+
+        # Verify that the frame's line number points to the bar() call,
+        # not to the compiler-generated code after it
+        foo_call_bar_source_line = foo_frame.get("line")
+        self.assertEqual(
+            foo_call_bar_source_line,
+            line_number(source, "foo call bar"),
+            "Expected foo call bar to be the source line of the frame",
+        )
+
+        # Verify the source file name is correctly resolved
+        foo_source_name = foo_frame.get("source", {}).get("name")
+        self.assertEqual(
+            foo_source_name, "main.c", "Expected foo source name to be main.c"
+        )
+
+        # When lldb fails to symbolicate a frame it will emit a fake assembly
+        # source with path of format <module>`<symbol> or <module>`<address> with
+        # sourceReference to retrieve disassembly source file.
+        # Verify that this didn't happen - the path should be a real file path.
+        foo_path = foo_frame.get("source", {}).get("path")
+        self.assertNotIn("`", foo_path, "Expected foo source path to not contain `")
+        self.continue_to_exit()
diff --git a/lldb/test/API/tools/lldb-dap/stackTraceCompilerGeneratedCode/main.c b/lldb/test/API/tools/lldb-dap/stackTraceCompilerGeneratedCode/main.c
new file mode 100644
index 0000000000000..dd3fcc295d492
--- /dev/null
+++ b/lldb/test/API/tools/lldb-dap/stackTraceCompilerGeneratedCode/main.c
@@ -0,0 +1,19 @@
+void bar() {
+  int val = 32; // breakpoint here
+}
+
+void at_line_zero() {}
+
+int foo();
+
+int main(int argc, char const *argv[]) {
+  foo();
+  return 0;
+}
+
+int foo() {
+  bar(); // foo call bar
+#line 0 "test.cpp"
+  at_line_zero();
+  return 0;
+}
diff --git a/lldb/tools/lldb-dap/DAP.cpp b/lldb/tools/lldb-dap/DAP.cpp
index f009a902f79e7..11aed33886edb 100644
--- a/lldb/tools/lldb-dap/DAP.cpp
+++ b/lldb/tools/lldb-dap/DAP.cpp
@@ -657,18 +657,20 @@ std::optional<protocol::Source> DAP::ResolveSource(const lldb::SBFrame &frame) {
   if (!frame.IsValid())
     return std::nullopt;
 
-  const lldb::SBAddress frame_pc = frame.GetPCAddress();
-  if (DisplayAssemblySource(debugger, frame_pc))
+  const lldb::SBLineEntry frame_line_entry = frame.GetLineEntry();
+  if (DisplayAssemblySource(debugger, frame_line_entry)) {
+    const lldb::SBAddress frame_pc = frame.GetPCAddress();
     return ResolveAssemblySource(frame_pc);
+  }
 
-  return CreateSource(frame.GetLineEntry().GetFileSpec());
+  return CreateSource(frame_line_entry.GetFileSpec());
 }
 
 std::optional<protocol::Source> DAP::ResolveSource(lldb::SBAddress address) {
-  if (DisplayAssemblySource(debugger, address))
+  lldb::SBLineEntry line_entry = GetLineEntryForAddress(target, address);
+  if (DisplayAssemblySource(debugger, line_entry))
     return ResolveAssemblySource(address);
 
-  lldb::SBLineEntry line_entry = GetLineEntryForAddress(target, address);
   if (!line_entry.IsValid())
     return std::nullopt;
 
diff --git a/lldb/tools/lldb-dap/ProtocolUtils.cpp b/lldb/tools/lldb-dap/ProtocolUtils.cpp
index 868c67ca72986..acf31b03f7af0 100644
--- a/lldb/tools/lldb-dap/ProtocolUtils.cpp
+++ b/lldb/tools/lldb-dap/ProtocolUtils.cpp
@@ -27,7 +27,7 @@ using namespace lldb_dap::protocol;
 namespace lldb_dap {
 
 static bool ShouldDisplayAssemblySource(
-    lldb::SBAddress address,
+    lldb::SBLineEntry line_entry,
     lldb::StopDisassemblyType stop_disassembly_display) {
   if (stop_disassembly_display == lldb::eStopDisassemblyTypeNever)
     return false;
@@ -37,7 +37,6 @@ static bool ShouldDisplayAssemblySource(
 
   // A line entry of 0 indicates the line is compiler generated i.e. no source
   // file is associated with the frame.
-  auto line_entry = address.GetLineEntry();
   auto file_spec = line_entry.GetFileSpec();
   if (!file_spec.IsValid() || line_entry.GetLine() == 0 ||
       line_entry.GetLine() == LLDB_INVALID_LINE_NUMBER)
@@ -174,10 +173,10 @@ bool IsAssemblySource(const protocol::Source &source) {
 }
 
 bool DisplayAssemblySource(lldb::SBDebugger &debugger,
-                           lldb::SBAddress address) {
+                           lldb::SBLineEntry line_entry) {
   const lldb::StopDisassemblyType stop_disassembly_display =
       GetStopDisassemblyDisplay(debugger);
-  return ShouldDisplayAssemblySource(address, stop_disassembly_display);
+  return ShouldDisplayAssemblySource(line_entry, stop_disassembly_display);
 }
 
 std::string GetLoadAddressString(const lldb::addr_t addr) {
diff --git a/lldb/tools/lldb-dap/ProtocolUtils.h b/lldb/tools/lldb-dap/ProtocolUtils.h
index a1f7ae0661914..f4d576ba9f608 100644
--- a/lldb/tools/lldb-dap/ProtocolUtils.h
+++ b/lldb/tools/lldb-dap/ProtocolUtils.h
@@ -53,7 +53,8 @@ std::optional<protocol::Source> CreateSource(const lldb::SBFileSpec &file);
 /// Checks if the given source is for assembly code.
 bool IsAssemblySource(const protocol::Source &source);
 
-bool DisplayAssemblySource(lldb::SBDebugger &debugger, lldb::SBAddress address);
+bool DisplayAssemblySource(lldb::SBDebugger &debugger,
+                           lldb::SBLineEntry line_entry);
 
 /// Get the address as a 16-digit hex string, e.g. "0x0000000000012345"
 std::string GetLoadAddressString(const lldb::addr_t addr);

From 79d9ae7a777a03452991d222642ffdb6687d9210 Mon Sep 17 00:00:00 2001
From: Chinmay Deshpande <chdeshpa@amd.com>
Date: Tue, 11 Nov 2025 16:37:42 -0800
Subject: [PATCH 11/32] [AMDGPU][GISel] Add RegBankLegalize support for
 G_AMDGPU_WAVE_ADDRESS (#167456)

---
 llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp      | 2 ++
 .../GlobalISel/regbankselect-amdgpu-wave-address.mir       | 7 +++----
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index 615b911a22903..90114e44f1a48 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -931,6 +931,8 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
       .Any({{UniP4}, {{SgprP4}, {}}})
       .Any({{UniP8}, {{SgprP8}, {}}});
 
+  addRulesForGOpcs({G_AMDGPU_WAVE_ADDRESS}).Any({{UniP5}, {{SgprP5}, {}}});
+
   bool hasSALUFloat = ST->hasSALUFloatInsts();
 
   addRulesForGOpcs({G_FADD}, Standard)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgpu-wave-address.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgpu-wave-address.mir
index f372c1f81948f..59716a250ff59 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgpu-wave-address.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgpu-wave-address.mir
@@ -1,6 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -o - %s | FileCheck %s
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" -regbankselect-greedy -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" -regbankselect-fast -o - %s | FileCheck %s
 
 # TODO: We could use scalar
 ---
@@ -25,8 +25,7 @@ body: |
     ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
     ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:sgpr(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p5) = COPY [[AMDGPU_WAVE_ADDRESS]](p5)
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
-    ; CHECK-NEXT: G_STORE [[COPY]](p5), [[COPY1]](p1) :: (store (p5), addrspace 1)
+    ; CHECK-NEXT: G_STORE [[COPY]](p5), [[DEF]](p1) :: (store (p5), addrspace 1)
     %0:_(p1) = G_IMPLICIT_DEF
     %1:_(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32
     G_STORE %1, %0 :: (store (p5), addrspace 1)

From ea10026b64f66b3b69c0545db20f9daa8579f5cb Mon Sep 17 00:00:00 2001
From: hev <wangrui@loongson.cn>
Date: Wed, 12 Nov 2025 08:51:08 +0800
Subject: [PATCH 12/32] Reland "[LoongArch] Add `isSafeToMove` hook to prevent
 unsafe instruction motion" (#167465)

This patch introduces a new virtual method
`TargetInstrInfo::isSafeToMove()` to allow backends to control whether a
machine instruction can be safely moved by optimization passes.

The `BranchFolder` pass now respects this hook when hoisting common
code. By default, all instructions are considered safe to to move.

For LoongArch, `isSafeToMove()` is overridden to prevent
relocation-related instruction sequences (e.g. PC-relative addressing
and calls) from being broken by instruction motion. Correspondingly,
`isSchedulingBoundary()` is updated to reuse this logic for consistency.

Relands #163725
---
 llvm/include/llvm/CodeGen/TargetInstrInfo.h   | 11 ++++
 llvm/lib/CodeGen/BranchFolding.cpp            |  5 ++
 .../Target/LoongArch/LoongArchInstrInfo.cpp   | 45 +++++++++------
 .../lib/Target/LoongArch/LoongArchInstrInfo.h |  3 +
 llvm/test/CodeGen/LoongArch/issue163681.ll    | 56 +++++++++++++++++++
 5 files changed, 102 insertions(+), 18 deletions(-)
 create mode 100644 llvm/test/CodeGen/LoongArch/issue163681.ll

diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index 43f28ed79f9dd..18142c2c0adf3 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -1765,6 +1765,17 @@ class LLVM_ABI TargetInstrInfo : public MCInstrInfo {
     return true;
   }
 
+  /// Return true if it's safe to move a machine instruction.
+  /// This allows the backend to prevent certain special instruction
+  /// sequences from being broken by instruction motion in optimization
+  /// passes.
+  /// By default, this returns true for every instruction.
+  virtual bool isSafeToMove(const MachineInstr &MI,
+                            const MachineBasicBlock *MBB,
+                            const MachineFunction &MF) const {
+    return true;
+  }
+
   /// Test if the given instruction should be considered a scheduling boundary.
   /// This primarily includes labels and terminators.
   virtual bool isSchedulingBoundary(const MachineInstr &MI,
diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp
index 7292bc2be0df2..0b212fb0beb20 100644
--- a/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/llvm/lib/CodeGen/BranchFolding.cpp
@@ -1979,6 +1979,7 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
   MachineBasicBlock::iterator FIB = FBB->begin();
   MachineBasicBlock::iterator TIE = TBB->end();
   MachineBasicBlock::iterator FIE = FBB->end();
+  MachineFunction &MF = *TBB->getParent();
   while (TIB != TIE && FIB != FIE) {
     // Skip dbg_value instructions. These do not count.
     TIB = skipDebugInstructionsForward(TIB, TIE, false);
@@ -1993,6 +1994,10 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
       // Hard to reason about register liveness with predicated instruction.
       break;
 
+    if (!TII->isSafeToMove(*TIB, TBB, MF))
+      // Don't hoist the instruction if it isn't safe to move.
+      break;
+
     bool IsSafe = true;
     for (MachineOperand &MO : TIB->operands()) {
       // Don't attempt to hoist instructions with register masks.
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
index 9a33dccd002c7..9fc862af7ea24 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
@@ -378,12 +378,9 @@ bool LoongArchInstrInfo::isBranchOffsetInRange(unsigned BranchOp,
   }
 }
 
-bool LoongArchInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
-                                              const MachineBasicBlock *MBB,
-                                              const MachineFunction &MF) const {
-  if (TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF))
-    return true;
-
+bool LoongArchInstrInfo::isSafeToMove(const MachineInstr &MI,
+                                      const MachineBasicBlock *MBB,
+                                      const MachineFunction &MF) const {
   auto MII = MI.getIterator();
   auto MIE = MBB->end();
 
@@ -429,25 +426,25 @@ bool LoongArchInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
     auto MO2 = Lu32I->getOperand(2).getTargetFlags();
     if (MO0 == LoongArchII::MO_PCREL_HI && MO1 == LoongArchII::MO_PCREL_LO &&
         MO2 == LoongArchII::MO_PCREL64_LO)
-      return true;
+      return false;
     if ((MO0 == LoongArchII::MO_GOT_PC_HI || MO0 == LoongArchII::MO_LD_PC_HI ||
          MO0 == LoongArchII::MO_GD_PC_HI) &&
         MO1 == LoongArchII::MO_GOT_PC_LO && MO2 == LoongArchII::MO_GOT_PC64_LO)
-      return true;
+      return false;
     if (MO0 == LoongArchII::MO_IE_PC_HI && MO1 == LoongArchII::MO_IE_PC_LO &&
         MO2 == LoongArchII::MO_IE_PC64_LO)
-      return true;
+      return false;
     if (MO0 == LoongArchII::MO_DESC_PC_HI &&
         MO1 == LoongArchII::MO_DESC_PC_LO &&
         MO2 == LoongArchII::MO_DESC64_PC_LO)
-      return true;
+      return false;
     break;
   }
   case LoongArch::LU52I_D: {
     auto MO = MI.getOperand(2).getTargetFlags();
     if (MO == LoongArchII::MO_PCREL64_HI || MO == LoongArchII::MO_GOT_PC64_HI ||
         MO == LoongArchII::MO_IE_PC64_HI || MO == LoongArchII::MO_DESC64_PC_HI)
-      return true;
+      return false;
     break;
   }
   default:
@@ -487,7 +484,7 @@ bool LoongArchInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
         auto MO1 = LoongArchII::getDirectFlags(SecondOp->getOperand(2));
         auto MO2 = LoongArchII::getDirectFlags(Ld->getOperand(2));
         if (MO1 == LoongArchII::MO_DESC_PC_LO && MO2 == LoongArchII::MO_DESC_LD)
-          return true;
+          return false;
         break;
       }
       if (SecondOp == MIE ||
@@ -496,34 +493,34 @@ bool LoongArchInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
       auto MO1 = LoongArchII::getDirectFlags(SecondOp->getOperand(2));
       if (MO0 == LoongArchII::MO_PCREL_HI && SecondOp->getOpcode() == AddiOp &&
           MO1 == LoongArchII::MO_PCREL_LO)
-        return true;
+        return false;
       if (MO0 == LoongArchII::MO_GOT_PC_HI && SecondOp->getOpcode() == LdOp &&
           MO1 == LoongArchII::MO_GOT_PC_LO)
-        return true;
+        return false;
       if ((MO0 == LoongArchII::MO_LD_PC_HI ||
            MO0 == LoongArchII::MO_GD_PC_HI) &&
           SecondOp->getOpcode() == AddiOp && MO1 == LoongArchII::MO_GOT_PC_LO)
-        return true;
+        return false;
       break;
     }
     case LoongArch::ADDI_W:
     case LoongArch::ADDI_D: {
       auto MO = LoongArchII::getDirectFlags(MI.getOperand(2));
       if (MO == LoongArchII::MO_PCREL_LO || MO == LoongArchII::MO_GOT_PC_LO)
-        return true;
+        return false;
       break;
     }
     case LoongArch::LD_W:
     case LoongArch::LD_D: {
       auto MO = LoongArchII::getDirectFlags(MI.getOperand(2));
       if (MO == LoongArchII::MO_GOT_PC_LO)
-        return true;
+        return false;
       break;
     }
     case LoongArch::PseudoDESC_CALL: {
       auto MO = LoongArchII::getDirectFlags(MI.getOperand(2));
       if (MO == LoongArchII::MO_DESC_CALL)
-        return true;
+        return false;
       break;
     }
     default:
@@ -531,6 +528,18 @@ bool LoongArchInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
     }
   }
 
+  return true;
+}
+
+bool LoongArchInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
+                                              const MachineBasicBlock *MBB,
+                                              const MachineFunction &MF) const {
+  if (TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF))
+    return true;
+
+  if (!isSafeToMove(MI, MBB, MF))
+    return true;
+
   return false;
 }
 
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h
index 796ef9f3a5715..9f7a0a2239a87 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h
@@ -66,6 +66,9 @@ class LoongArchInstrInfo : public LoongArchGenInstrInfo {
   bool isBranchOffsetInRange(unsigned BranchOpc,
                              int64_t BrOffset) const override;
 
+  bool isSafeToMove(const MachineInstr &MI, const MachineBasicBlock *MBB,
+                    const MachineFunction &MF) const override;
+
   bool isSchedulingBoundary(const MachineInstr &MI,
                             const MachineBasicBlock *MBB,
                             const MachineFunction &MF) const override;
diff --git a/llvm/test/CodeGen/LoongArch/issue163681.ll b/llvm/test/CodeGen/LoongArch/issue163681.ll
new file mode 100644
index 0000000000000..f6df349253045
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/issue163681.ll
@@ -0,0 +1,56 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc --mtriple=loongarch64 -code-model=large --verify-machineinstrs < %s \
+; RUN:   | FileCheck %s
+
+@.str = external constant [1 x i8]
+
+define void @caller(ptr %0) {
+; CHECK-LABEL: caller:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi.d $sp, $sp, -16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_offset 1, -8
+; CHECK-NEXT:    ld.w $a2, $zero, 0
+; CHECK-NEXT:    ld.d $a1, $a0, 0
+; CHECK-NEXT:    beqz $a2, .LBB0_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    pcalau12i $a0, %got_pc_hi20(.str)
+; CHECK-NEXT:    addi.d $a2, $zero, %got_pc_lo12(.str)
+; CHECK-NEXT:    lu32i.d $a2, %got64_pc_lo20(.str)
+; CHECK-NEXT:    lu52i.d $a2, $a2, %got64_pc_hi12(.str)
+; CHECK-NEXT:    ldx.d $a2, $a2, $a0
+; CHECK-NEXT:    move $a0, $zero
+; CHECK-NEXT:    jirl $ra, $zero, 0
+; CHECK-NEXT:    b .LBB0_3
+; CHECK-NEXT:  .LBB0_2:
+; CHECK-NEXT:    pcalau12i $a0, %got_pc_hi20(.str)
+; CHECK-NEXT:    addi.d $a2, $zero, %got_pc_lo12(.str)
+; CHECK-NEXT:    lu32i.d $a2, %got64_pc_lo20(.str)
+; CHECK-NEXT:    lu52i.d $a2, $a2, %got64_pc_hi12(.str)
+; CHECK-NEXT:    ldx.d $a2, $a2, $a0
+; CHECK-NEXT:    move $a0, $zero
+; CHECK-NEXT:    move $a3, $zero
+; CHECK-NEXT:    jirl $ra, $zero, 0
+; CHECK-NEXT:  .LBB0_3:
+; CHECK-NEXT:    st.d $zero, $zero, 0
+; CHECK-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; CHECK-NEXT:    addi.d $sp, $sp, 16
+; CHECK-NEXT:    ret
+  %2 = load i32, ptr null, align 4
+  %3 = icmp eq i32 %2, 0
+  %4 = load i64, ptr %0, align 8
+  br i1 %3, label %6, label %5
+
+5:                                                ; preds = %1
+  call void null(ptr null, i64 %4, ptr @.str)
+  br label %7
+
+6:                                                ; preds = %1
+  tail call void null(ptr null, i64 %4, ptr @.str, i32 0)
+  br label %7
+
+7:                                                ; preds = %6, %5
+  store ptr null, ptr null, align 8
+  ret void
+}

From 6655681cd0554f8df91bb0f7631b882f5bb13b81 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn@outlook.com>
Date: Tue, 11 Nov 2025 19:44:20 -0600
Subject: [PATCH 13/32] [llvm-offload-wrapper] Fix Triple and OpenMP handling
 (#167580)

Summary:
The OpenMP handling using an offload binary should be optional, it's
only used for extra metadata for llvm-objdump. Also the triple was
completely wrong, it didn't let anyone correctly choose between ELF
and COFF handling.
---
 .../Frontend/Offloading/OffloadWrapper.cpp    | 28 ++++---
 llvm/test/Other/offload-wrapper.ll            | 52 ------------
 .../llvm-offload-wrapper/offload-wrapper.ll   | 81 +++++++++++++++++++
 .../llvm-offload-wrapper.cpp                  |  2 +-
 4 files changed, 99 insertions(+), 64 deletions(-)
 delete mode 100644 llvm/test/Other/offload-wrapper.ll
 create mode 100644 llvm/test/tools/llvm-offload-wrapper/offload-wrapper.ll

diff --git a/llvm/lib/Frontend/Offloading/OffloadWrapper.cpp b/llvm/lib/Frontend/Offloading/OffloadWrapper.cpp
index 45818deda8aa6..86060d1d2b0b3 100644
--- a/llvm/lib/Frontend/Offloading/OffloadWrapper.cpp
+++ b/llvm/lib/Frontend/Offloading/OffloadWrapper.cpp
@@ -147,21 +147,27 @@ GlobalVariable *createBinDesc(Module &M, ArrayRef<ArrayRef<char>> Bufs,
     Image->setAlignment(Align(object::OffloadBinary::getAlignment()));
 
     StringRef Binary(Buf.data(), Buf.size());
-    assert(identify_magic(Binary) == file_magic::offload_binary &&
-           "Invalid binary format");
 
+    uint64_t BeginOffset = 0;
+    uint64_t EndOffset = Binary.size();
+
+    // Optionally use an offload binary for its offload dumping support.
     // The device image struct contains the pointer to the beginning and end of
     // the image stored inside of the offload binary. There should only be one
     // of these for each buffer so we parse it out manually.
-    const auto *Header =
-        reinterpret_cast<const object::OffloadBinary::Header *>(
-            Binary.bytes_begin());
-    const auto *Entry = reinterpret_cast<const object::OffloadBinary::Entry *>(
-        Binary.bytes_begin() + Header->EntryOffset);
-
-    auto *Begin = ConstantInt::get(getSizeTTy(M), Entry->ImageOffset);
-    auto *Size =
-        ConstantInt::get(getSizeTTy(M), Entry->ImageOffset + Entry->ImageSize);
+    if (identify_magic(Binary) == file_magic::offload_binary) {
+      const auto *Header =
+          reinterpret_cast<const object::OffloadBinary::Header *>(
+              Binary.bytes_begin());
+      const auto *Entry =
+          reinterpret_cast<const object::OffloadBinary::Entry *>(
+              Binary.bytes_begin() + Header->EntryOffset);
+      BeginOffset = Entry->ImageOffset;
+      EndOffset = Entry->ImageOffset + Entry->ImageSize;
+    }
+
+    auto *Begin = ConstantInt::get(getSizeTTy(M), BeginOffset);
+    auto *Size = ConstantInt::get(getSizeTTy(M), EndOffset);
     Constant *ZeroBegin[] = {Zero, Begin};
     Constant *ZeroSize[] = {Zero, Size};
 
diff --git a/llvm/test/Other/offload-wrapper.ll b/llvm/test/Other/offload-wrapper.ll
deleted file mode 100644
index 9107a141ad201..0000000000000
--- a/llvm/test/Other/offload-wrapper.ll
+++ /dev/null
@@ -1,52 +0,0 @@
-; RUN: llvm-offload-wrapper --triple=x86-64 -kind=hip %s -o %t.bc
-; RUN: llvm-dis %t.bc -o - | FileCheck %s --check-prefix=HIP
-
-;      HIP: @__start_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry], section "llvm_offload_entries$OA"
-; HIP-NEXT: @__stop_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry], section "llvm_offload_entries$OZ"
-; HIP-NEXT: @.fatbin_image = internal constant {{.*}}, section ".hip_fatbin"
-; HIP-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1212764230, i32 1, ptr @.fatbin_image, ptr null }, section ".hipFatBinSegment", align 8
-; HIP-NEXT: @.hip.binary_handle = internal global ptr null
-; HIP-NEXT: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 101, ptr @.hip.fatbin_reg, ptr null }]
-
-;      HIP: define internal void @.hip.fatbin_reg() section ".text.startup" {
-; HIP-NEXT: entry:
-; HIP-NEXT:   %0 = call ptr @__hipRegisterFatBinary(ptr @.fatbin_wrapper)
-; HIP-NEXT:   store ptr %0, ptr @.hip.binary_handle, align 8
-; HIP-NEXT:   call void @.hip.globals_reg(ptr %0)
-; HIP-NEXT:   %1 = call i32 @atexit(ptr @.hip.fatbin_unreg)
-; HIP-NEXT:   ret void
-; HIP-NEXT: }
-
-;      HIP: define internal void @.hip.fatbin_unreg() section ".text.startup" {
-; HIP-NEXT: entry:
-; HIP-NEXT:   %0 = load ptr, ptr @.hip.binary_handle, align 8
-; HIP-NEXT:   call void @__hipUnregisterFatBinary(ptr %0)
-; HIP-NEXT:   ret void
-; HIP-NEXT: }
-
-; RUN: llvm-offload-wrapper --triple=x86-64 -kind=cuda %s -o %t.bc
-; RUN: llvm-dis %t.bc -o - | FileCheck %s --check-prefix=CUDA
-
-;      CUDA: @__start_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry], section "llvm_offload_entries$OA"
-; CUDA-NEXT: @__stop_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry], section "llvm_offload_entries$OZ"
-; CUDA-NEXT: @.fatbin_image = internal constant {{.*}}, section ".nv_fatbin"
-; CUDA-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1180844977, i32 1, ptr @.fatbin_image, ptr null }, section ".nvFatBinSegment", align 8
-; CUDA-NEXT: @.cuda.binary_handle = internal global ptr null
-; CUDA-NEXT: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 101, ptr @.cuda.fatbin_reg, ptr null }]
-
-;      CUDA: define internal void @.cuda.fatbin_reg() section ".text.startup" {
-; CUDA-NEXT: entry:
-; CUDA-NEXT:   %0 = call ptr @__cudaRegisterFatBinary(ptr @.fatbin_wrapper)
-; CUDA-NEXT:   store ptr %0, ptr @.cuda.binary_handle, align 8
-; CUDA-NEXT:   call void @.cuda.globals_reg(ptr %0)
-; CUDA-NEXT:   call void @__cudaRegisterFatBinaryEnd(ptr %0)
-; CUDA-NEXT:   %1 = call i32 @atexit(ptr @.cuda.fatbin_unreg)
-; CUDA-NEXT:   ret void
-; CUDA-NEXT: }
-
-;      CUDA: define internal void @.cuda.fatbin_unreg() section ".text.startup" {
-; CUDA-NEXT: entry:
-; CUDA-NEXT:   %0 = load ptr, ptr @.cuda.binary_handle, align 8
-; CUDA-NEXT:   call void @__cudaUnregisterFatBinary(ptr %0)
-; CUDA-NEXT:   ret void
-; CUDA-NEXT: }
diff --git a/llvm/test/tools/llvm-offload-wrapper/offload-wrapper.ll b/llvm/test/tools/llvm-offload-wrapper/offload-wrapper.ll
new file mode 100644
index 0000000000000..32aad0b6cf64e
--- /dev/null
+++ b/llvm/test/tools/llvm-offload-wrapper/offload-wrapper.ll
@@ -0,0 +1,81 @@
+; RUN: llvm-offload-wrapper --triple=x86_64-unknown-linux-gnu -kind=openmp %s -o %t.bc
+; RUN: llvm-dis %t.bc -o - | FileCheck %s --check-prefix=OMP
+
+;      OMP: @__start_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry]
+; OMP-NEXT: @__stop_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry]
+; OMP-NEXT: @__dummy.llvm_offload_entries = internal constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "llvm_offload_entries", align 8
+; OMP-NEXT: @llvm.compiler.used = appending global [1 x ptr] [ptr @__dummy.llvm_offload_entries], section "llvm.metadata"
+; OMP-NEXT: @.omp_offloading.device_image = internal unnamed_addr constant [[[SIZE:[0-9]+]] x i8] c"{{.*}}", section ".llvm.offloading", align 8
+; OMP-NEXT: @.omp_offloading.device_images = internal unnamed_addr constant [1 x %__tgt_device_image] [%__tgt_device_image { ptr @.omp_offloading.device_image, ptr getelementptr ([[[SIZE]] x i8], ptr @.omp_offloading.device_image, i64 0, i64 [[SIZE]]), ptr @__start_llvm_offload_entries, ptr @__stop_llvm_offload_entries }]
+; OMP-NEXT: @.omp_offloading.descriptor = internal constant %__tgt_bin_desc { i32 1, ptr @.omp_offloading.device_images, ptr @__start_llvm_offload_entries, ptr @__stop_llvm_offload_entries }
+; OMP-NEXT: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 101, ptr @.omp_offloading.descriptor_reg, ptr null }]
+
+;      OMP: define internal void @.omp_offloading.descriptor_reg() section ".text.startup" {
+; OMP-NEXT: entry:
+; OMP-NEXT:   call void @__tgt_register_lib(ptr @.omp_offloading.descriptor)
+; OMP-NEXT:   %0 = call i32 @atexit(ptr @.omp_offloading.descriptor_unreg)
+; OMP-NEXT:   ret void
+; OMP-NEXT: }
+
+;      OMP: define internal void @.omp_offloading.descriptor_unreg() section ".text.startup" {
+; OMP-NEXT: entry:
+; OMP-NEXT:   call void @__tgt_unregister_lib(ptr @.omp_offloading.descriptor)
+; OMP-NEXT:   ret void
+; OMP-NEXT: }
+
+; RUN: llvm-offload-wrapper --triple=x86_64-unknown-linux-gnu -kind=hip %s -o %t.bc
+; RUN: llvm-dis %t.bc -o - | FileCheck %s --check-prefix=HIP
+
+;      HIP: @__start_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry]
+; HIP-NEXT: @__stop_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry]
+; HIP-NEXT: @__dummy.llvm_offload_entries = internal constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "llvm_offload_entries", align 8
+; HIP-NEXT: @llvm.compiler.used = appending global [1 x ptr] [ptr @__dummy.llvm_offload_entries], section "llvm.metadata"
+; HIP-NEXT: @.fatbin_image = internal constant {{.*}}, section ".hip_fatbin"
+; HIP-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1212764230, i32 1, ptr @.fatbin_image, ptr null }, section ".hipFatBinSegment", align 8
+; HIP-NEXT: @.hip.binary_handle = internal global ptr null
+; HIP-NEXT: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 101, ptr @.hip.fatbin_reg, ptr null }]
+
+;      HIP: define internal void @.hip.fatbin_reg() section ".text.startup" {
+; HIP-NEXT: entry:
+; HIP-NEXT:   %0 = call ptr @__hipRegisterFatBinary(ptr @.fatbin_wrapper)
+; HIP-NEXT:   store ptr %0, ptr @.hip.binary_handle, align 8
+; HIP-NEXT:   call void @.hip.globals_reg(ptr %0)
+; HIP-NEXT:   %1 = call i32 @atexit(ptr @.hip.fatbin_unreg)
+; HIP-NEXT:   ret void
+; HIP-NEXT: }
+
+;      HIP: define internal void @.hip.fatbin_unreg() section ".text.startup" {
+; HIP-NEXT: entry:
+; HIP-NEXT:   %0 = load ptr, ptr @.hip.binary_handle, align 8
+; HIP-NEXT:   call void @__hipUnregisterFatBinary(ptr %0)
+; HIP-NEXT:   ret void
+; HIP-NEXT: }
+
+; RUN: llvm-offload-wrapper --triple=x86_64-unknown-linux-gnu -kind=cuda %s -o %t.bc
+; RUN: llvm-dis %t.bc -o - | FileCheck %s --check-prefix=CUDA
+
+;      CUDA: @__start_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry]
+; CUDA-NEXT: @__stop_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry]
+; CUDA-NEXT: @__dummy.llvm_offload_entries = internal constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "llvm_offload_entries", align 8
+; CUDA-NEXT: @llvm.compiler.used = appending global [1 x ptr] [ptr @__dummy.llvm_offload_entries], section "llvm.metadata"
+; CUDA-NEXT: @.fatbin_image = internal constant {{.*}}, section ".nv_fatbin"
+; CUDA-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1180844977, i32 1, ptr @.fatbin_image, ptr null }, section ".nvFatBinSegment", align 8
+; CUDA-NEXT: @.cuda.binary_handle = internal global ptr null
+; CUDA-NEXT: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 101, ptr @.cuda.fatbin_reg, ptr null }]
+
+;      CUDA: define internal void @.cuda.fatbin_reg() section ".text.startup" {
+; CUDA-NEXT: entry:
+; CUDA-NEXT:   %0 = call ptr @__cudaRegisterFatBinary(ptr @.fatbin_wrapper)
+; CUDA-NEXT:   store ptr %0, ptr @.cuda.binary_handle, align 8
+; CUDA-NEXT:   call void @.cuda.globals_reg(ptr %0)
+; CUDA-NEXT:   call void @__cudaRegisterFatBinaryEnd(ptr %0)
+; CUDA-NEXT:   %1 = call i32 @atexit(ptr @.cuda.fatbin_unreg)
+; CUDA-NEXT:   ret void
+; CUDA-NEXT: }
+
+;      CUDA: define internal void @.cuda.fatbin_unreg() section ".text.startup" {
+; CUDA-NEXT: entry:
+; CUDA-NEXT:   %0 = load ptr, ptr @.cuda.binary_handle, align 8
+; CUDA-NEXT:   call void @__cudaUnregisterFatBinary(ptr %0)
+; CUDA-NEXT:   ret void
+; CUDA-NEXT: }
diff --git a/llvm/tools/llvm-offload-wrapper/llvm-offload-wrapper.cpp b/llvm/tools/llvm-offload-wrapper/llvm-offload-wrapper.cpp
index d65b402571ae8..cda59b6f49b62 100644
--- a/llvm/tools/llvm-offload-wrapper/llvm-offload-wrapper.cpp
+++ b/llvm/tools/llvm-offload-wrapper/llvm-offload-wrapper.cpp
@@ -64,7 +64,7 @@ static Error wrapImages(ArrayRef<ArrayRef<char>> BuffersToWrap) {
 
   LLVMContext Context;
   Module M("offload.wrapper.module", Context);
-  M.setTargetTriple(Triple());
+  M.setTargetTriple(llvm::Triple(TheTriple));
 
   switch (Kind) {
   case llvm::object::OFK_OpenMP:

From a863fd879049b74a32f7657b1b8f2b1faca447ff Mon Sep 17 00:00:00 2001
From: Mircea Trofin <mtrofin@google.com>
Date: Tue, 11 Nov 2025 17:45:37 -0800
Subject: [PATCH 14/32] [NFC] Generalize the arithmetic type for
 `getDisjunctionWeights` (#167593)

---
 llvm/include/llvm/IR/ProfDataUtils.h | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/llvm/include/llvm/IR/ProfDataUtils.h b/llvm/include/llvm/IR/ProfDataUtils.h
index a7bcbf010d1bf..f1c2f38c74afd 100644
--- a/llvm/include/llvm/IR/ProfDataUtils.h
+++ b/llvm/include/llvm/IR/ProfDataUtils.h
@@ -18,6 +18,8 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/Support/Compiler.h"
+#include <cstddef>
+#include <type_traits>
 
 namespace llvm {
 struct MDProfLabels {
@@ -216,9 +218,13 @@ LLVM_ABI void scaleProfData(Instruction &I, uint64_t S, uint64_t T);
 /// branch weights B1 and B2, respectively. In both B1 and B2, the first
 /// position (index 0) is for the 'true' branch, and the second position (index
 /// 1) is for the 'false' branch.
+template <typename T1, typename T2,
+          typename = typename std::enable_if<
+              std::is_arithmetic_v<T1> && std::is_arithmetic_v<T2> &&
+              sizeof(T1) <= sizeof(uint64_t) && sizeof(T2) <= sizeof(uint64_t)>>
 inline SmallVector<uint64_t, 2>
-getDisjunctionWeights(const SmallVector<uint32_t, 2> &B1,
-                      const SmallVector<uint32_t, 2> &B2) {
+getDisjunctionWeights(const SmallVector<T1, 2> &B1,
+                      const SmallVector<T2, 2> &B2) {
   // For the first conditional branch, the probability the "true" case is taken
   // is p(b1) = B1[0] / (B1[0] + B1[1]). The "false" case's probability is
   // p(not b1) = B1[1] / (B1[0] + B1[1]).
@@ -235,8 +241,8 @@ getDisjunctionWeights(const SmallVector<uint32_t, 2> &B1,
   // the product of sums, the subtracted one cancels out).
   assert(B1.size() == 2);
   assert(B2.size() == 2);
-  auto FalseWeight = B1[1] * B2[1];
-  auto TrueWeight = B1[0] * B2[0] + B1[0] * B2[1] + B1[1] * B2[0];
+  uint64_t FalseWeight = B1[1] * B2[1];
+  uint64_t TrueWeight = B1[0] * (B2[0] + B2[1]) + B1[1] * B2[0];
   return {TrueWeight, FalseWeight};
 }
 } // namespace llvm

From 75e38aa6496e655d692de86fb549925d6476e11b Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Tue, 11 Nov 2025 18:01:15 -0800
Subject: [PATCH 15/32] RuntimeLibcalls: Add libcall entries for sleef and
 armpl modf functions (#166985)

---
 llvm/include/llvm/IR/RuntimeLibcalls.td       | 11 ++++
 llvm/lib/IR/RuntimeLibcalls.cpp               | 56 ++++++++++++++++++-
 .../Util/DeclareRuntimeLibcalls/armpl.ll      |  8 +++
 .../Util/DeclareRuntimeLibcalls/sleef.ll      | 10 +++-
 4 files changed, 82 insertions(+), 3 deletions(-)

diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.td b/llvm/include/llvm/IR/RuntimeLibcalls.td
index ee80606ed0dbf..ce7e836f66446 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.td
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.td
@@ -183,6 +183,7 @@ foreach FPTy = ["F32", "F64", "F80", "F128", "PPCF128"] in {
 }
 
 foreach VecTy = ["V4F32", "V2F64", "NXV4F32", "NXV2F64"] in {
+  def MODF_#VecTy : RuntimeLibcall;
   def SINCOS_#VecTy : RuntimeLibcall;
   def SINCOSPI_#VecTy : RuntimeLibcall;
 }
@@ -1093,6 +1094,11 @@ def __security_check_cookie_arm64ec : RuntimeLibcallImpl<SECURITY_CHECK_COOKIE,
 //===----------------------------------------------------------------------===//
 
 defset list<RuntimeLibcallImpl> SleefLibcalls = {
+  def _ZGVnN2vl8_modf : RuntimeLibcallImpl<MODF_V2F64>;
+  def _ZGVnN4vl4_modff : RuntimeLibcallImpl<MODF_V4F32>;
+  def _ZGVsNxvl8_modf : RuntimeLibcallImpl<MODF_NXV2F64>;
+  def _ZGVsNxvl4_modff : RuntimeLibcallImpl<MODF_NXV4F32>;
+
   def _ZGVnN2vl8l8_sincos : RuntimeLibcallImpl<SINCOS_V2F64>;
   def _ZGVnN4vl4l4_sincosf : RuntimeLibcallImpl<SINCOS_V4F32>;
   def _ZGVsNxvl8l8_sincos : RuntimeLibcallImpl<SINCOS_NXV2F64>;
@@ -1109,6 +1115,11 @@ defset list<RuntimeLibcallImpl> SleefLibcalls = {
 //===----------------------------------------------------------------------===//
 
 defset list<RuntimeLibcallImpl> ARMPLLibcalls = {
+  def armpl_vmodfq_f64 : RuntimeLibcallImpl<MODF_V2F64>; // CallingConv::AArch64_VectorCall
+  def armpl_vmodfq_f32 : RuntimeLibcallImpl<MODF_V4F32>; // CallingConv::AArch64_VectorCall
+  def armpl_svmodf_f64_x : RuntimeLibcallImpl<MODF_NXV2F64>;
+  def armpl_svmodf_f32_x : RuntimeLibcallImpl<MODF_NXV4F32>;
+
   def armpl_vsincosq_f64
       : RuntimeLibcallImpl<SINCOS_V2F64>; // CallingConv::AArch64_VectorCall
   def armpl_vsincosq_f32
diff --git a/llvm/lib/IR/RuntimeLibcalls.cpp b/llvm/lib/IR/RuntimeLibcalls.cpp
index e66b9adb43ac4..ee23b58742b64 100644
--- a/llvm/lib/IR/RuntimeLibcalls.cpp
+++ b/llvm/lib/IR/RuntimeLibcalls.cpp
@@ -43,7 +43,9 @@ RuntimeLibcallsInfo::RuntimeLibcallsInfo(const Triple &TT,
   switch (ClVectorLibrary) {
   case VectorLibrary::SLEEFGNUABI:
     for (RTLIB::LibcallImpl Impl :
-         {RTLIB::impl__ZGVnN2vl8l8_sincos, RTLIB::impl__ZGVnN4vl4l4_sincosf,
+         {RTLIB::impl__ZGVnN2vl8_modf, RTLIB::impl__ZGVnN4vl4_modff,
+          RTLIB::impl__ZGVsNxvl8_modf, RTLIB::impl__ZGVsNxvl4_modff,
+          RTLIB::impl__ZGVnN2vl8l8_sincos, RTLIB::impl__ZGVnN4vl4l4_sincosf,
           RTLIB::impl__ZGVsNxvl8l8_sincos, RTLIB::impl__ZGVsNxvl4l4_sincosf,
           RTLIB::impl__ZGVnN4vl4l4_sincospif, RTLIB::impl__ZGVnN2vl8l8_sincospi,
           RTLIB::impl__ZGVsNxvl4l4_sincospif,
@@ -52,7 +54,9 @@ RuntimeLibcallsInfo::RuntimeLibcallsInfo(const Triple &TT,
     break;
   case VectorLibrary::ArmPL:
     for (RTLIB::LibcallImpl Impl :
-         {RTLIB::impl_armpl_vsincosq_f64, RTLIB::impl_armpl_vsincosq_f32,
+         {RTLIB::impl_armpl_vmodfq_f64, RTLIB::impl_armpl_vmodfq_f32,
+          RTLIB::impl_armpl_svmodf_f64_x, RTLIB::impl_armpl_svmodf_f32_x,
+          RTLIB::impl_armpl_vsincosq_f64, RTLIB::impl_armpl_vsincosq_f32,
           RTLIB::impl_armpl_svsincos_f64_x, RTLIB::impl_armpl_svsincos_f32_x,
           RTLIB::impl_armpl_vsincospiq_f32, RTLIB::impl_armpl_vsincospiq_f64,
           RTLIB::impl_armpl_svsincospi_f32_x,
@@ -197,6 +201,52 @@ RuntimeLibcallsInfo::getFunctionTy(LLVMContext &Ctx, const Triple &TT,
                                                   fcNegNormal));
     return {FuncTy, Attrs};
   }
+  case RTLIB::impl__ZGVnN2vl8_modf:
+  case RTLIB::impl__ZGVnN4vl4_modff:
+  case RTLIB::impl__ZGVsNxvl8_modf:
+  case RTLIB::impl__ZGVsNxvl4_modff:
+  case RTLIB::impl_armpl_vmodfq_f64:
+  case RTLIB::impl_armpl_vmodfq_f32:
+  case RTLIB::impl_armpl_svmodf_f64_x:
+  case RTLIB::impl_armpl_svmodf_f32_x: {
+    AttrBuilder FuncAttrBuilder(Ctx);
+
+    bool IsF32 = LibcallImpl == RTLIB::impl__ZGVnN4vl4_modff ||
+                 LibcallImpl == RTLIB::impl__ZGVsNxvl4_modff ||
+                 LibcallImpl == RTLIB::impl_armpl_vmodfq_f32 ||
+                 LibcallImpl == RTLIB::impl_armpl_svmodf_f32_x;
+
+    bool IsScalable = LibcallImpl == RTLIB::impl__ZGVsNxvl8_modf ||
+                      LibcallImpl == RTLIB::impl__ZGVsNxvl4_modff ||
+                      LibcallImpl == RTLIB::impl_armpl_svmodf_f64_x ||
+                      LibcallImpl == RTLIB::impl_armpl_svmodf_f32_x;
+
+    Type *ScalarTy = IsF32 ? Type::getFloatTy(Ctx) : Type::getDoubleTy(Ctx);
+    unsigned EC = IsF32 ? 4 : 2;
+    VectorType *VecTy = VectorType::get(ScalarTy, EC, IsScalable);
+
+    for (Attribute::AttrKind Attr : CommonFnAttrs)
+      FuncAttrBuilder.addAttribute(Attr);
+    FuncAttrBuilder.addMemoryAttr(MemoryEffects::argMemOnly(ModRefInfo::Mod));
+
+    AttributeList Attrs;
+    Attrs = Attrs.addFnAttributes(Ctx, FuncAttrBuilder);
+
+    {
+      AttrBuilder ArgAttrBuilder(Ctx);
+      for (Attribute::AttrKind AK : CommonPtrArgAttrs)
+        ArgAttrBuilder.addAttribute(AK);
+      ArgAttrBuilder.addAlignmentAttr(DL.getABITypeAlign(VecTy));
+      Attrs = Attrs.addParamAttributes(Ctx, 1, ArgAttrBuilder);
+    }
+
+    PointerType *PtrTy = PointerType::get(Ctx, 0);
+    SmallVector<Type *, 4> ArgTys = {VecTy, PtrTy};
+    if (hasVectorMaskArgument(LibcallImpl))
+      ArgTys.push_back(VectorType::get(Type::getInt1Ty(Ctx), EC, IsScalable));
+
+    return {FunctionType::get(VecTy, ArgTys, false), Attrs};
+  }
   case RTLIB::impl__ZGVnN2vl8l8_sincos:
   case RTLIB::impl__ZGVnN4vl4l4_sincosf:
   case RTLIB::impl__ZGVsNxvl8l8_sincos:
@@ -271,6 +321,8 @@ bool RuntimeLibcallsInfo::hasVectorMaskArgument(RTLIB::LibcallImpl Impl) {
   /// FIXME: This should be generated by tablegen and support the argument at an
   /// arbitrary position
   switch (Impl) {
+  case RTLIB::impl_armpl_svmodf_f64_x:
+  case RTLIB::impl_armpl_svmodf_f32_x:
   case RTLIB::impl_armpl_svsincos_f32_x:
   case RTLIB::impl_armpl_svsincos_f64_x:
   case RTLIB::impl_armpl_svsincospi_f32_x:
diff --git a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/armpl.ll b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/armpl.ll
index 1d9cf6a5d77fe..e79e89c95c14a 100644
--- a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/armpl.ll
+++ b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/armpl.ll
@@ -1,6 +1,10 @@
 ; REQUIRES: aarch64-registered-target
 ; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=aarch64-unknown-linux -mattr=+neon,+sve -vector-library=ArmPL < %s | FileCheck %s
 
+; CHECK: declare <vscale x 4 x float> @armpl_svmodf_f32_x(<vscale x 4 x float>, ptr noalias nonnull writeonly align 16, <vscale x 4 x i1>) [[ATTRS:#[0-9]+]]
+
+; CHECK: declare <vscale x 2 x double> @armpl_svmodf_f64_x(<vscale x 2 x double>, ptr noalias nonnull writeonly align 16, <vscale x 2 x i1>) [[ATTRS]]
+
 ; CHECK: declare void @armpl_svsincos_f32_x(<vscale x 4 x float>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16, <vscale x 4 x i1>) [[ATTRS:#[0-9]+]]
 
 ; CHECK: declare void @armpl_svsincos_f64_x(<vscale x 2 x double>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16, <vscale x 2 x i1>) [[ATTRS]]
@@ -9,6 +13,10 @@
 
 ; CHECK: declare void @armpl_svsincospi_f64_x(<vscale x 2 x double>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16, <vscale x 2 x i1>) [[ATTRS]]
 
+; CHECK: declare <4 x float> @armpl_vmodfq_f32(<4 x float>, ptr noalias nonnull writeonly align 16) [[ATTRS]]
+
+; CHECK: declare <2 x double> @armpl_vmodfq_f64(<2 x double>, ptr noalias nonnull writeonly align 16) [[ATTRS]]
+
 ; CHECK: declare void @armpl_vsincospiq_f32(<4 x float>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]]
 
 ; CHECK: declare void @armpl_vsincospiq_f64(<2 x double>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]]
diff --git a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/sleef.ll b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/sleef.ll
index 2c6900761b1c0..ef2481111087f 100644
--- a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/sleef.ll
+++ b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/sleef.ll
@@ -1,18 +1,26 @@
 ; REQUIRES: aarch64-registered-target
 ; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=aarch64-unknown-linux -mattr=+neon,+sve -vector-library=sleefgnuabi < %s | FileCheck %s
 
-; CHECK: declare void @_ZGVnN2vl8l8_sincos(<2 x double>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS:#[0-9]+]]
+; CHECK: declare <2 x double> @_ZGVnN2vl8_modf(<2 x double>, ptr noalias nonnull writeonly align 16) [[ATTRS:#[0-9]+]]
+
+; CHECK: declare void @_ZGVnN2vl8l8_sincos(<2 x double>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]]
 
 ; CHECK: declare void @_ZGVnN2vl8l8_sincospi(<2 x double>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]]
 
+; CHECK: declare <4 x float> @_ZGVnN4vl4_modff(<4 x float>, ptr noalias nonnull writeonly align 16) [[ATTRS]]
+
 ; CHECK: declare void @_ZGVnN4vl4l4_sincosf(<4 x float>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]]
 
 ; CHECK: declare void @_ZGVnN4vl4l4_sincospif(<4 x float>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]]
 
+; CHECK: declare <vscale x 4 x float> @_ZGVsNxvl4_modff(<vscale x 4 x float>, ptr noalias nonnull writeonly align 16) [[ATTRS]]
+
 ; CHECK: declare void @_ZGVsNxvl4l4_sincosf(<vscale x 4 x float>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]]
 
 ; CHECK: declare void @_ZGVsNxvl4l4_sincospif(<vscale x 4 x float>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]]
 
+; CHECK: declare <vscale x 2 x double> @_ZGVsNxvl8_modf(<vscale x 2 x double>, ptr noalias nonnull writeonly align 16) [[ATTRS]]
+
 ; CHECK: declare void @_ZGVsNxvl8l8_sincos(<vscale x 2 x double>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]]
 
 ; CHECK: declare void @_ZGVsNxvl8l8_sincospi(<vscale x 2 x double>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]]

From 4b9771e41a2a53a0398f53bc90d8d0587f03e162 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Tue, 11 Nov 2025 18:05:35 -0800
Subject: [PATCH 16/32] DAG: Use modf vector libcalls through RuntimeLibcalls
 (#166986)

Copy new process from sincos/sincospi
---
 llvm/include/llvm/CodeGen/BasicTTIImpl.h      | 41 ++-----------------
 .../SelectionDAG/LegalizeVectorOps.cpp        |  5 ++-
 llvm/lib/CodeGen/TargetLoweringBase.cpp       | 18 ++++++++
 3 files changed, 24 insertions(+), 40 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index a52ad41d0f1b3..944e1714e8f98 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -313,33 +313,17 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     Type *Ty = getContainedTypes(RetTy).front();
     EVT VT = getTLI()->getValueType(DL, Ty);
 
-    EVT ScalarVT = VT.getScalarType();
     RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
 
-    /// Migration flag. IsVectorCall cases directly know about the vector
-    /// libcall in RuntimeLibcallsInfo and shouldn't try to use
-    /// LibInfo->getVectorMappingInfo.
-    bool IsVectorCall = false;
-
     switch (ICA.getID()) {
     case Intrinsic::modf:
-      LC = RTLIB::getMODF(ScalarVT);
+      LC = RTLIB::getMODF(VT);
       break;
     case Intrinsic::sincospi:
       LC = RTLIB::getSINCOSPI(VT);
-      if (LC == RTLIB::UNKNOWN_LIBCALL)
-        LC = RTLIB::getSINCOSPI(ScalarVT);
-      else if (VT.isVector())
-        IsVectorCall = true;
-
       break;
     case Intrinsic::sincos:
       LC = RTLIB::getSINCOS(VT);
-      if (LC == RTLIB::UNKNOWN_LIBCALL)
-        LC = RTLIB::getSINCOS(ScalarVT);
-      else if (VT.isVector())
-        IsVectorCall = true;
-
       break;
     default:
       return std::nullopt;
@@ -350,33 +334,14 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     if (LibcallImpl == RTLIB::Unsupported)
       return std::nullopt;
 
-    StringRef LCName =
-        RTLIB::RuntimeLibcallsInfo::getLibcallImplName(LibcallImpl);
-
-    // Search for a corresponding vector variant.
-    //
-    // FIXME: Should use RuntimeLibcallsInfo, not TargetLibraryInfo to get the
-    // vector mapping.
     LLVMContext &Ctx = RetTy->getContext();
-    ElementCount VF = getVectorizedTypeVF(RetTy);
-    VecDesc const *VD = nullptr;
-
-    if (!IsVectorCall) {
-      for (bool Masked : {false, true}) {
-        if ((VD = LibInfo->getVectorMappingInfo(LCName, VF, Masked)))
-          break;
-      }
-      if (!VD)
-        return std::nullopt;
-    }
 
     // Cost the call + mask.
     auto Cost =
         thisT()->getCallInstrCost(nullptr, RetTy, ICA.getArgTypes(), CostKind);
 
-    if ((VD && VD->isMasked()) ||
-        (IsVectorCall &&
-         RTLIB::RuntimeLibcallsInfo::hasVectorMaskArgument(LibcallImpl))) {
+    if (RTLIB::RuntimeLibcallsInfo::hasVectorMaskArgument(LibcallImpl)) {
+      ElementCount VF = getVectorizedTypeVF(RetTy);
       auto VecTy = VectorType::get(IntegerType::getInt1Ty(Ctx), VF);
       Cost += thisT()->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy,
                                       VecTy, {}, CostKind, 0, nullptr, {});
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 78d8ea0676dd7..a7ae794459331 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -1283,9 +1283,10 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
     break;
   }
   case ISD::FMODF: {
-    EVT VT = Node->getValueType(0).getVectorElementType();
+    EVT VT = Node->getValueType(0);
     RTLIB::Libcall LC = RTLIB::getMODF(VT);
-    if (DAG.expandMultipleResultFPLibCall(LC, Node, Results, VT,
+    if (LC != RTLIB::UNKNOWN_LIBCALL &&
+        DAG.expandMultipleResultFPLibCall(LC, Node, Results, VT,
                                           /*CallRetResNo=*/0))
       return;
     break;
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 36a424f1c8b63..77d9b156e2672 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -475,6 +475,24 @@ RTLIB::Libcall RTLIB::getSINCOS_STRET(EVT RetVT) {
 }
 
 RTLIB::Libcall RTLIB::getMODF(EVT RetVT) {
+  // TODO: Tablegen should generate this function
+  if (RetVT.isVector()) {
+    if (!RetVT.isSimple())
+      return RTLIB::UNKNOWN_LIBCALL;
+    switch (RetVT.getSimpleVT().SimpleTy) {
+    case MVT::v4f32:
+      return RTLIB::MODF_V4F32;
+    case MVT::v2f64:
+      return RTLIB::MODF_V2F64;
+    case MVT::nxv4f32:
+      return RTLIB::MODF_NXV4F32;
+    case MVT::nxv2f64:
+      return RTLIB::MODF_NXV2F64;
+    default:
+      return RTLIB::UNKNOWN_LIBCALL;
+    }
+  }
+
   return getFPLibCall(RetVT, MODF_F32, MODF_F64, MODF_F80, MODF_F128,
                       MODF_PPCF128);
 }

From c1f18a2518898e5f593ce1341d19f61b5ee58cdd Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Tue, 11 Nov 2025 18:26:24 -0800
Subject: [PATCH 17/32] InferAddressSpaces: Add more baseline tests for assume
 handling (#167611)

---
 .../AMDGPU/builtin-assumed-addrspace.ll       | 208 +++++++++++++++++-
 1 file changed, 200 insertions(+), 8 deletions(-)

diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/builtin-assumed-addrspace.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/builtin-assumed-addrspace.ll
index e0c80c0389541..32dca860a7ded 100644
--- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/builtin-assumed-addrspace.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/builtin-assumed-addrspace.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=infer-address-spaces -o - %s | FileCheck %s
 
-define float @f0(ptr %p) {
-; CHECK-LABEL: define float @f0(
+define float @assume_is_shared_gep(ptr %p) {
+; CHECK-LABEL: define float @assume_is_shared_gep(
 ; CHECK-SAME: ptr [[P:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[P]])
@@ -24,8 +24,8 @@ entry:
   ret float %load
 }
 
-define float @f1(ptr %p) {
-; CHECK-LABEL: define float @f1(
+define float @assume_is_private_gep(ptr %p) {
+; CHECK-LABEL: define float @assume_is_private_gep(
 ; CHECK-SAME: ptr [[P:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[P]])
@@ -47,8 +47,8 @@ entry:
   ret float %load
 }
 
-define float @f2(ptr %p) {
-; CHECK-LABEL: define float @f2(
+define float @assume_not_private_and_not_shared_gep(ptr %p) {
+; CHECK-LABEL: define float @assume_not_private_and_not_shared_gep(
 ; CHECK-SAME: ptr [[P:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[P]])
@@ -78,8 +78,8 @@ entry:
   ret float %load
 }
 
-define float @g0(i32 %c, ptr %p) {
-; CHECK-LABEL: define float @g0(
+define float @conditionally_assume_is_shared_gep(i32 %c, ptr %p) {
+; CHECK-LABEL: define float @conditionally_assume_is_shared_gep(
 ; CHECK-SAME: i32 [[C:%.*]], ptr [[P:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[C]], 0
@@ -127,6 +127,198 @@ if.end:
   ret float %add2
 }
 
+define float @conditionally_assume_is_shared_else_assume_private(i32 %c, ptr %p) {
+; CHECK-LABEL: define float @conditionally_assume_is_shared_else_assume_private(
+; CHECK-SAME: i32 [[C:%.*]], ptr [[P:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[C]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label %[[IF_THEN_SHARED:.*]], label %[[IF_THEN_PRIVATE:.*]]
+; CHECK:       [[IF_THEN_SHARED]]:
+; CHECK-NEXT:    [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[P]])
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[IS_SHARED]])
+; CHECK-NEXT:    [[WORKITEM_ID_X_0:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT:    [[IDXPROM:%.*]] = zext i32 [[WORKITEM_ID_X_0]] to i64
+; CHECK-NEXT:    [[TMP0:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(3)
+; CHECK-NEXT:    [[ARRAYIDX0:%.*]] = getelementptr inbounds float, ptr addrspace(3) [[TMP0]], i64 [[IDXPROM]]
+; CHECK-NEXT:    [[LOAD0:%.*]] = load float, ptr addrspace(3) [[ARRAYIDX0]], align 4
+; CHECK-NEXT:    [[ADD0:%.*]] = fadd float [[LOAD0]], 4.000000e+00
+; CHECK-NEXT:    br label %[[IF_END:.*]]
+; CHECK:       [[IF_THEN_PRIVATE]]:
+; CHECK-NEXT:    [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[P]])
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[IS_PRIVATE]])
+; CHECK-NEXT:    [[WORKITEM_ID_X_1:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT:    [[IDXPROM1:%.*]] = zext i32 [[WORKITEM_ID_X_1]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(5)
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr addrspace(5) [[TMP1]], i64 [[IDXPROM1]]
+; CHECK-NEXT:    [[LOAD1:%.*]] = load float, ptr addrspace(5) [[ARRAYIDX1]], align 4
+; CHECK-NEXT:    [[ADD1:%.*]] = fadd float [[LOAD1]], 4.000000e+00
+; CHECK-NEXT:    br label %[[IF_END]]
+; CHECK:       [[IF_END]]:
+; CHECK-NEXT:    [[PHI:%.*]] = phi float [ [[ADD0]], %[[IF_THEN_SHARED]] ], [ [[ADD1]], %[[IF_THEN_PRIVATE]] ]
+; CHECK-NEXT:    ret float [[PHI]]
+;
+entry:
+  %tobool.not = icmp eq i32 %c, 0
+  br i1 %tobool.not, label %if.then.shared, label %if.then.private
+
+if.then.shared:
+  %is.shared = call i1 @llvm.amdgcn.is.shared(ptr %p)
+  tail call void @llvm.assume(i1 %is.shared)
+  %workitem.id.x.0 = tail call i32 @llvm.amdgcn.workitem.id.x()
+  %idxprom = zext i32 %workitem.id.x.0 to i64
+  %arrayidx0 = getelementptr inbounds float, ptr %p, i64 %idxprom
+  %load0 = load float, ptr %arrayidx0, align 4
+  %add0 = fadd float %load0, 4.0
+  br label %if.end
+
+if.then.private:
+  %is.private = call i1 @llvm.amdgcn.is.private(ptr %p)
+  tail call void @llvm.assume(i1 %is.private)
+  %workitem.id.x.1 = tail call i32 @llvm.amdgcn.workitem.id.x()
+  %idxprom1 = zext i32 %workitem.id.x.1 to i64
+  %arrayidx1 = getelementptr inbounds float, ptr %p, i64 %idxprom1
+  %load1 = load float, ptr %arrayidx1, align 4
+  %add1 = fadd float %load1, 4.0
+  br label %if.end
+
+if.end:
+  %phi = phi float [ %add0, %if.then.shared ], [ %add1, %if.then.private ]
+  ret float %phi
+}
+
+define float @assume_func_arg_is_shared_load(ptr %flat.ptr) {
+; CHECK-LABEL: define float @assume_func_arg_is_shared_load(
+; CHECK-SAME: ptr [[FLAT_PTR:%.*]]) {
+; CHECK-NEXT:    [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[FLAT_PTR]])
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[IS_SHARED]])
+; CHECK-NEXT:    [[LOAD:%.*]] = load float, ptr [[FLAT_PTR]], align 4
+; CHECK-NEXT:    ret float [[LOAD]]
+;
+  %is.shared = call i1 @llvm.amdgcn.is.shared(ptr %flat.ptr)
+  tail call void @llvm.assume(i1 %is.shared)
+  %load = load float, ptr %flat.ptr, align 4
+  ret float %load
+}
+
+define float @assume_func_arg_is_private_load(ptr %flat.ptr) {
+; CHECK-LABEL: define float @assume_func_arg_is_private_load(
+; CHECK-SAME: ptr [[FLAT_PTR:%.*]]) {
+; CHECK-NEXT:    [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[FLAT_PTR]])
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[IS_PRIVATE]])
+; CHECK-NEXT:    [[LOAD:%.*]] = load float, ptr [[FLAT_PTR]], align 4
+; CHECK-NEXT:    ret float [[LOAD]]
+;
+  %is.private = call i1 @llvm.amdgcn.is.private(ptr %flat.ptr)
+  tail call void @llvm.assume(i1 %is.private)
+  %load = load float, ptr %flat.ptr, align 4
+  ret float %load
+}
+
+define float @assume_func_arg_is_not_shared_not_private(ptr %flat.ptr) {
+; CHECK-LABEL: define float @assume_func_arg_is_not_shared_not_private(
+; CHECK-SAME: ptr [[FLAT_PTR:%.*]]) {
+; CHECK-NEXT:    [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[FLAT_PTR]])
+; CHECK-NEXT:    [[NOT_PRIVATE:%.*]] = xor i1 [[IS_PRIVATE]], true
+; CHECK-NEXT:    [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[FLAT_PTR]])
+; CHECK-NEXT:    [[NOT_SHARED:%.*]] = xor i1 [[IS_SHARED]], true
+; CHECK-NEXT:    [[NOT_PRIVATE_AND_NOT_SHARED:%.*]] = and i1 [[NOT_PRIVATE]], [[NOT_SHARED]]
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[NOT_PRIVATE_AND_NOT_SHARED]])
+; CHECK-NEXT:    [[LOAD:%.*]] = load float, ptr [[FLAT_PTR]], align 4
+; CHECK-NEXT:    ret float [[LOAD]]
+;
+  %is.private = call i1 @llvm.amdgcn.is.private(ptr %flat.ptr)
+  %not.private = xor i1 %is.private, true
+  %is.shared = call i1 @llvm.amdgcn.is.shared(ptr %flat.ptr)
+  %not.shared = xor i1 %is.shared, true
+  %not.private.and.not.shared = and i1 %not.private, %not.shared
+  tail call void @llvm.assume(i1 %not.private.and.not.shared)
+  %load = load float, ptr %flat.ptr, align 4
+  ret float %load
+}
+
+define float @assume_func_arg_is_not_private_load(ptr %flat.ptr) {
+; CHECK-LABEL: define float @assume_func_arg_is_not_private_load(
+; CHECK-SAME: ptr [[FLAT_PTR:%.*]]) {
+; CHECK-NEXT:    [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[FLAT_PTR]])
+; CHECK-NEXT:    [[NOT_IS_PRIVATE:%.*]] = xor i1 [[IS_PRIVATE]], true
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[NOT_IS_PRIVATE]])
+; CHECK-NEXT:    [[LOAD:%.*]] = load float, ptr [[FLAT_PTR]], align 4
+; CHECK-NEXT:    ret float [[LOAD]]
+;
+  %is.private = call i1 @llvm.amdgcn.is.private(ptr %flat.ptr)
+  %not.is.private = xor i1 %is.private, true
+  tail call void @llvm.assume(i1 %not.is.private)
+  %load = load float, ptr %flat.ptr, align 4
+  ret float %load
+}
+
+define i64 @assume_func_arg_is_not_private_atomicrmw(ptr %flat.ptr, i64 %val) {
+; CHECK-LABEL: define i64 @assume_func_arg_is_not_private_atomicrmw(
+; CHECK-SAME: ptr [[FLAT_PTR:%.*]], i64 [[VAL:%.*]]) {
+; CHECK-NEXT:    [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[FLAT_PTR]])
+; CHECK-NEXT:    [[NOT_IS_PRIVATE:%.*]] = xor i1 [[IS_PRIVATE]], true
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[NOT_IS_PRIVATE]])
+; CHECK-NEXT:    [[RMW:%.*]] = atomicrmw sub ptr [[FLAT_PTR]], i64 [[VAL]] seq_cst, align 4
+; CHECK-NEXT:    ret i64 [[RMW]]
+;
+  %is.private = call i1 @llvm.amdgcn.is.private(ptr %flat.ptr)
+  %not.is.private = xor i1 %is.private, true
+  tail call void @llvm.assume(i1 %not.is.private)
+  %rmw = atomicrmw sub ptr %flat.ptr, i64 %val seq_cst, align 4
+  ret i64 %rmw
+}
+
+define float @contradictory_assume_after_gep_same_block(ptr %p) {
+; CHECK-LABEL: define float @contradictory_assume_after_gep_same_block(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT:    [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[P]])
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[IS_SHARED]])
+; CHECK-NEXT:    [[WORKITEM_ID_X:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT:    [[IDXPROM:%.*]] = zext i32 [[WORKITEM_ID_X]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(3)
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds float, ptr addrspace(3) [[TMP1]], i64 [[IDXPROM]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[P]], i64 [[IDXPROM]]
+; CHECK-NEXT:    [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[TMP2]])
+; CHECK-NEXT:    tail call void @llvm.assume(i1 false)
+; CHECK-NEXT:    [[LOAD:%.*]] = load float, ptr addrspace(3) [[GEP]], align 4
+; CHECK-NEXT:    ret float [[LOAD]]
+;
+  %is.shared = call i1 @llvm.amdgcn.is.shared(ptr %p)
+  tail call void @llvm.assume(i1 %is.shared)
+  %workitem.id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
+  %idxprom = zext i32 %workitem.id.x to i64
+  %gep = getelementptr inbounds float, ptr %p, i64 %idxprom
+  %is.private = call i1 @llvm.amdgcn.is.private(ptr %gep)
+  tail call void @llvm.assume(i1 %is.private)
+  %load = load float, ptr %gep, align 4
+  ret float %load
+}
+
+define float @contradictory_assume_argument_same_block(ptr %p) {
+; CHECK-LABEL: define float @contradictory_assume_argument_same_block(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT:    [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[P]])
+; CHECK-NEXT:    [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[P]])
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[IS_SHARED]])
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[IS_PRIVATE]])
+; CHECK-NEXT:    [[WORKITEM_ID_X:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT:    [[IDXPROM:%.*]] = zext i32 [[WORKITEM_ID_X]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(3)
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds float, ptr addrspace(3) [[TMP1]], i64 [[IDXPROM]]
+; CHECK-NEXT:    [[LOAD:%.*]] = load float, ptr addrspace(3) [[GEP]], align 4
+; CHECK-NEXT:    ret float [[LOAD]]
+;
+  %is.shared = call i1 @llvm.amdgcn.is.shared(ptr %p)
+  %is.private = call i1 @llvm.amdgcn.is.private(ptr %p)
+  tail call void @llvm.assume(i1 %is.shared)
+  tail call void @llvm.assume(i1 %is.private)
+  %workitem.id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
+  %idxprom = zext i32 %workitem.id.x to i64
+  %gep = getelementptr inbounds float, ptr %p, i64 %idxprom
+  %load = load float, ptr %gep, align 4
+  ret float %load
+}
+
 declare void @llvm.assume(i1)
 declare i1 @llvm.amdgcn.is.shared(ptr nocapture)
 declare i1 @llvm.amdgcn.is.private(ptr nocapture)

From a100a6c97be0259c8d0b5758830cba14d4eb5c4d Mon Sep 17 00:00:00 2001
From: Timur Baydyusenov <t.baydyusenov@syntacore.com>
Date: Wed, 12 Nov 2025 05:29:40 +0300
Subject: [PATCH 18/32] [llvm][llvm-dis] Fix 'llvm-dis' with
 '--materialize-metadata --show-annotations' crashes (#167487)

Added handling the case of a non-materialized module, also don't call
printInfoComment for immaterializable values
---
 llvm/lib/IR/AsmWriter.cpp                   | 13 ++++++------
 llvm/test/Assembler/metadata-annotations.ll | 22 +++++++++++++++++----
 llvm/tools/llvm-dis/llvm-dis.cpp            | 13 ++++++++++++
 3 files changed, 37 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index 0c8565c927a24..4d4ffe93a8067 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -2931,7 +2931,7 @@ class AssemblyWriter {
 
   // printInfoComment - Print a little comment after the instruction indicating
   // which slot it occupies.
-  void printInfoComment(const Value &V);
+  void printInfoComment(const Value &V, bool isMaterializable = false);
 
   // printGCRelocateComment - print comment after call to the gc.relocate
   // intrinsic indicating base and derived pointer names.
@@ -3963,7 +3963,7 @@ void AssemblyWriter::printGlobal(const GlobalVariable *GV) {
   if (Attrs.hasAttributes())
     Out << " #" << Machine.getAttributeGroupSlot(Attrs);
 
-  printInfoComment(*GV);
+  printInfoComment(*GV, GV->isMaterializable());
 }
 
 void AssemblyWriter::printAlias(const GlobalAlias *GA) {
@@ -4001,7 +4001,7 @@ void AssemblyWriter::printAlias(const GlobalAlias *GA) {
     Out << '"';
   }
 
-  printInfoComment(*GA);
+  printInfoComment(*GA, GA->isMaterializable());
   Out << '\n';
 }
 
@@ -4040,7 +4040,7 @@ void AssemblyWriter::printIFunc(const GlobalIFunc *GI) {
     printMetadataAttachments(MDs, ", ");
   }
 
-  printInfoComment(*GI);
+  printInfoComment(*GI, GI->isMaterializable());
   Out << '\n';
 }
 
@@ -4319,13 +4319,12 @@ void AssemblyWriter::printGCRelocateComment(const GCRelocateInst &Relocate) {
 
 /// printInfoComment - Print a little comment after the instruction indicating
 /// which slot it occupies.
-void AssemblyWriter::printInfoComment(const Value &V) {
+void AssemblyWriter::printInfoComment(const Value &V, bool isMaterializable) {
   if (const auto *Relocate = dyn_cast<GCRelocateInst>(&V))
     printGCRelocateComment(*Relocate);
 
-  if (AnnotationWriter) {
+  if (AnnotationWriter && !isMaterializable)
     AnnotationWriter->printInfoComment(V, Out);
-  }
 
   if (PrintInstDebugLocs) {
     if (auto *I = dyn_cast<Instruction>(&V)) {
diff --git a/llvm/test/Assembler/metadata-annotations.ll b/llvm/test/Assembler/metadata-annotations.ll
index 4fd471338cd0a..2a08a17849dbd 100644
--- a/llvm/test/Assembler/metadata-annotations.ll
+++ b/llvm/test/Assembler/metadata-annotations.ll
@@ -1,9 +1,23 @@
 ; RUN: llvm-as < %s | llvm-dis --materialize-metadata --show-annotations | FileCheck %s
 
+; CHECK: @global_var = global i32 1
+; CHECK: @alias = alias i32, ptr @global_var
+; CHECK: @ifunc = ifunc i32 (), ptr @ifunc_resolver
+@global_var = global i32 1
+@alias = alias i32, ptr @global_var
+@ifunc = ifunc i32 (), ptr @ifunc_resolver
+
+; CHECK: ; Materializable
+; CHECK-NEXT: define ptr @ifunc_resolver() {}
+define ptr @ifunc_resolver() {
+  ret ptr @defined_function
+}
+
 ; CHECK: ; Materializable
-; CHECK-NEXT: define dso_local i32 @test() {}
-define dso_local i32 @test() {
-entry:
-  ret i32 0
+; CHECK-NEXT: define void @defined_function() {}
+define void @defined_function() {
+  ret void
 }
 
+; CHECK: declare void @declared_function()
+declare void @declared_function()
diff --git a/llvm/tools/llvm-dis/llvm-dis.cpp b/llvm/tools/llvm-dis/llvm-dis.cpp
index 35c540963a487..90ae3ef077ae9 100644
--- a/llvm/tools/llvm-dis/llvm-dis.cpp
+++ b/llvm/tools/llvm-dis/llvm-dis.cpp
@@ -101,13 +101,26 @@ static void printDebugLoc(const DebugLoc &DL, formatted_raw_ostream &OS) {
   }
 }
 class CommentWriter : public AssemblyAnnotationWriter {
+private:
+  bool canSafelyAccessUses(const Value &V) {
+    // Can't safely access uses, if module not materialized.
+    const GlobalValue *GV = dyn_cast<GlobalValue>(&V);
+    return !GV || (GV->getParent() && GV->getParent()->isMaterialized());
+  }
+
 public:
   void emitFunctionAnnot(const Function *F,
                          formatted_raw_ostream &OS) override {
+    if (!canSafelyAccessUses(*F))
+      return;
+
     OS << "; [#uses=" << F->getNumUses() << ']';  // Output # uses
     OS << '\n';
   }
   void printInfoComment(const Value &V, formatted_raw_ostream &OS) override {
+    if (!canSafelyAccessUses(V))
+      return;
+
     bool Padded = false;
     if (!V.getType()->isVoidTy()) {
       OS.PadToColumn(50);

From 3e6442a516deccc9b734a8bfc5904e726481cb36 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Tue, 11 Nov 2025 18:31:43 -0800
Subject: [PATCH 19/32] [WebAssembly] Use MCRegister::id(). NFC (#167609)

---
 .../WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp     | 4 ++--
 .../WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h      | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp
index d8bfed9dc0390..651f631c1ee55 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp
@@ -317,8 +317,8 @@ void WebAssemblyInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
   const MCOperand &Op = MI->getOperand(OpNo);
   if (Op.isReg()) {
     const MCInstrDesc &Desc = MII.get(MI->getOpcode());
-    unsigned WAReg = Op.getReg();
-    if (int(WAReg) >= 0)
+    MCRegister WAReg = Op.getReg();
+    if (int(WAReg.id()) >= 0)
       printRegName(O, WAReg);
     else if (OpNo >= Desc.getNumDefs() && !IsVariadicDef)
       O << "$pop" << WebAssembly::getWARegStackId(WAReg);
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
index fe9a4bada2430..5dc0e3aa91622 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
@@ -608,9 +608,9 @@ inline bool isLocalTee(unsigned Opc) {
 static const unsigned UnusedReg = -1u;
 
 // For a given stackified WAReg, return the id number to print with push/pop.
-unsigned inline getWARegStackId(unsigned Reg) {
-  assert(Reg & INT32_MIN);
-  return Reg & INT32_MAX;
+unsigned inline getWARegStackId(MCRegister Reg) {
+  assert(Reg.id() & INT32_MIN);
+  return Reg.id() & INT32_MAX;
 }
 
 } // end namespace WebAssembly

From cf35502dd569d842a860696650a783f896db0648 Mon Sep 17 00:00:00 2001
From: Med Ismail Bennani <ismail@bennani.ma>
Date: Tue, 11 Nov 2025 18:31:57 -0800
Subject: [PATCH 20/32] [libunwind] Fix execution flow imbalance when using C++
 Exceptions (#165066)

---
 libunwind/src/Registers.hpp                   |  16 +-
 libunwind/src/UnwindCursor.hpp                |  66 ++++++-
 libunwind/src/UnwindLevel1.c                  |  24 ++-
 libunwind/src/UnwindRegistersRestore.S        |  15 +-
 libunwind/src/assembly.h                      |   4 +
 libunwind/src/config.h                        |   9 +
 libunwind/src/libunwind.cpp                   |  22 ++-
 libunwind/src/libunwind_ext.h                 |   6 +-
 .../Python/lldbsuite/test/decorators.py       |  25 +++
 .../unwind/libunwind_ret_injection/Makefile   |   6 +
 .../TestLibUnwindRetInjection.py              | 177 ++++++++++++++++++
 .../unwind/libunwind_ret_injection/main.cpp   |  45 +++++
 12 files changed, 396 insertions(+), 19 deletions(-)
 create mode 100644 lldb/test/API/functionalities/unwind/libunwind_ret_injection/Makefile
 create mode 100644 lldb/test/API/functionalities/unwind/libunwind_ret_injection/TestLibUnwindRetInjection.py
 create mode 100644 lldb/test/API/functionalities/unwind/libunwind_ret_injection/main.cpp

diff --git a/libunwind/src/Registers.hpp b/libunwind/src/Registers.hpp
index 9d4c8344150f6..28649fafb23d5 100644
--- a/libunwind/src/Registers.hpp
+++ b/libunwind/src/Registers.hpp
@@ -1832,8 +1832,9 @@ inline const char *Registers_ppc64::getRegisterName(int regNum) {
 /// Registers_arm64  holds the register state of a thread in a 64-bit arm
 /// process.
 class _LIBUNWIND_HIDDEN Registers_arm64;
-extern "C" void __libunwind_Registers_arm64_jumpto(Registers_arm64 *);
 extern "C" int64_t __libunwind_Registers_arm64_za_disable();
+extern "C" void __libunwind_Registers_arm64_jumpto(Registers_arm64 *,
+                                                   unsigned walkedFrames);
 
 #if defined(_LIBUNWIND_USE_GCS)
 extern "C" void *__libunwind_shstk_get_jump_target() {
@@ -1861,10 +1862,17 @@ class _LIBUNWIND_HIDDEN Registers_arm64 {
   v128        getVectorRegister(int num) const;
   void        setVectorRegister(int num, v128 value);
   static const char *getRegisterName(int num);
-  void jumpto() {
-    zaDisable();
-    __libunwind_Registers_arm64_jumpto(this);
+#ifdef _LIBUNWIND_TRACE_RET_INJECT
+  _LIBUNWIND_TRACE_NO_INLINE
+    void      returnto(unsigned walkedFrames) {
+      __libunwind_Registers_arm64_jumpto(this, walkedFrames);
+    }
+#else
+  void        jumpto() {
+      zaDisable();
+      __libunwind_Registers_arm64_jumpto(this, 0);
   }
+#endif
   static constexpr int lastDwarfRegNum() {
     return _LIBUNWIND_HIGHEST_DWARF_REGISTER_ARM64;
   }
diff --git a/libunwind/src/UnwindCursor.hpp b/libunwind/src/UnwindCursor.hpp
index 7ec5f9e91578a..d7348254af07b 100644
--- a/libunwind/src/UnwindCursor.hpp
+++ b/libunwind/src/UnwindCursor.hpp
@@ -472,7 +472,9 @@ class _LIBUNWIND_HIDDEN AbstractUnwindCursor {
   virtual void getInfo(unw_proc_info_t *) {
     _LIBUNWIND_ABORT("getInfo not implemented");
   }
-  virtual void jumpto() { _LIBUNWIND_ABORT("jumpto not implemented"); }
+  _LIBUNWIND_TRACE_NO_INLINE virtual void jumpto() {
+    _LIBUNWIND_ABORT("jumpto not implemented");
+  }
   virtual bool isSignalFrame() {
     _LIBUNWIND_ABORT("isSignalFrame not implemented");
   }
@@ -489,6 +491,12 @@ class _LIBUNWIND_HIDDEN AbstractUnwindCursor {
   virtual void saveVFPAsX() { _LIBUNWIND_ABORT("saveVFPAsX not implemented"); }
 #endif
 
+#ifdef _LIBUNWIND_TRACE_RET_INJECT
+  virtual void setWalkedFrames(unsigned) {
+    _LIBUNWIND_ABORT("setWalkedFrames not implemented");
+  }
+#endif
+
 #ifdef _AIX
   virtual uintptr_t getDataRelBase() {
     _LIBUNWIND_ABORT("getDataRelBase not implemented");
@@ -965,7 +973,8 @@ class UnwindCursor : public AbstractUnwindCursor{
   virtual void        setFloatReg(int, unw_fpreg_t);
   virtual int         step(bool stage2 = false);
   virtual void        getInfo(unw_proc_info_t *);
-  virtual void        jumpto();
+  _LIBUNWIND_TRACE_NO_INLINE
+    virtual void      jumpto();
   virtual bool        isSignalFrame();
   virtual bool        getFunctionName(char *buf, size_t len, unw_word_t *off);
   virtual void        setInfoBasedOnIPRegister(bool isReturnAddress = false);
@@ -974,6 +983,10 @@ class UnwindCursor : public AbstractUnwindCursor{
   virtual void        saveVFPAsX();
 #endif
 
+#ifdef _LIBUNWIND_TRACE_RET_INJECT
+  virtual void setWalkedFrames(unsigned);
+#endif
+
 #ifdef _AIX
   virtual uintptr_t getDataRelBase();
 #endif
@@ -1356,6 +1369,9 @@ class UnwindCursor : public AbstractUnwindCursor{
     defined(_LIBUNWIND_TARGET_HAIKU)
   bool             _isSigReturn = false;
 #endif
+#ifdef _LIBUNWIND_TRACE_RET_INJECT
+  uint32_t _walkedFrames;
+#endif
 };
 
 
@@ -1410,7 +1426,46 @@ void UnwindCursor<A, R>::setFloatReg(int regNum, unw_fpreg_t value) {
 }
 
 template <typename A, typename R> void UnwindCursor<A, R>::jumpto() {
+#ifdef _LIBUNWIND_TRACE_RET_INJECT
+  /*
+
+  The value of `_walkedFrames` is computed in `unwind_phase2` and represents the
+  number of frames walked starting `unwind_phase2` to get to the landing pad.
+
+  ```
+    // uc is initialized by __unw_getcontext in the parent frame.
+    // The first stack frame walked is unwind_phase2.
+    unsigned framesWalked = 1;
+  ```
+
+  To that, we need to add the number of function calls in libunwind between
+  `unwind_phase2` & `__libunwind_Registers_arm64_jumpto` which performs the long
+  jump, to rebalance the execution flow.
+
+  ```
+      frame #0: libunwind.1.dylib`__libunwind_Registers_arm64_jumpto at UnwindRegistersRestore.S:646
+      frame #1: libunwind.1.dylib`libunwind::Registers_arm64::returnto at Registers.hpp:2291:3
+      frame #2: libunwind.1.dylib`libunwind::UnwindCursor<libunwind::LocalAddressSpace, libunwind::Registers_arm64>::jumpto at UnwindCursor.hpp:1474:14
+      frame #3: libunwind.1.dylib`__unw_resume at libunwind.cpp:375:7
+      frame #4: libunwind.1.dylib`__unw_resume_with_frames_walked at libunwind.cpp:363:10
+      frame #5: libunwind.1.dylib`unwind_phase2 at UnwindLevel1.c:328:9
+      frame #6: libunwind.1.dylib`_Unwind_RaiseException at UnwindLevel1.c:480:10
+      frame #7: libc++abi.dylib`__cxa_throw at cxa_exception.cpp:295:5
+      ...
+  ```
+
+  If we look at the backtrace from `__libunwind_Registers_arm64_jumpto`, we see
+  there are 5 frames on the stack to reach `unwind_phase2`. However, only 4 of
+  them will never return, since `__libunwind_Registers_arm64_jumpto` returns
+  back to the landing pad, so we need to subtract 1 to the number of
+  `_EXTRA_LIBUNWIND_FRAMES_WALKED`.
+  */
+
+  static constexpr size_t _EXTRA_LIBUNWIND_FRAMES_WALKED = 5 - 1;
+  _registers.returnto(_walkedFrames + _EXTRA_LIBUNWIND_FRAMES_WALKED);
+#else
   _registers.jumpto();
+#endif
 }
 
 #ifdef __arm__
@@ -1419,6 +1474,13 @@ template <typename A, typename R> void UnwindCursor<A, R>::saveVFPAsX() {
 }
 #endif
 
+#ifdef _LIBUNWIND_TRACE_RET_INJECT
+template <typename A, typename R>
+void UnwindCursor<A, R>::setWalkedFrames(unsigned walkedFrames) {
+  _walkedFrames = walkedFrames;
+}
+#endif
+
 #ifdef _AIX
 template <typename A, typename R>
 uintptr_t UnwindCursor<A, R>::getDataRelBase() {
diff --git a/libunwind/src/UnwindLevel1.c b/libunwind/src/UnwindLevel1.c
index b0cd60dfb9141..79398bac8b531 100644
--- a/libunwind/src/UnwindLevel1.c
+++ b/libunwind/src/UnwindLevel1.c
@@ -48,16 +48,15 @@
 // avoided when invoking the `jumpto()` function. To do this, we use inline
 // assemblies to "goto" the `jumpto()` for these architectures.
 #if !defined(_LIBUNWIND_USE_CET) && !defined(_LIBUNWIND_USE_GCS)
-#define __unw_phase2_resume(cursor, fn)                                        \
+#define __unw_phase2_resume(cursor, payload)                                   \
   do {                                                                         \
-    (void)fn;                                                                  \
-    __unw_resume((cursor));                                                    \
+    __unw_resume_with_frames_walked((cursor), (payload));                      \
   } while (0)
 #elif defined(_LIBUNWIND_TARGET_I386)
 #define __shstk_step_size (4)
-#define __unw_phase2_resume(cursor, fn)                                        \
+#define __unw_phase2_resume(cursor, payload)                                   \
   do {                                                                         \
-    _LIBUNWIND_POP_SHSTK_SSP((fn));                                            \
+    _LIBUNWIND_POP_SHSTK_SSP((payload));                                       \
     void *shstkRegContext = __libunwind_shstk_get_registers((cursor));         \
     void *shstkJumpAddress = __libunwind_shstk_get_jump_target();              \
     __asm__ volatile("push %%edi\n\t"                                          \
@@ -67,9 +66,9 @@
   } while (0)
 #elif defined(_LIBUNWIND_TARGET_X86_64)
 #define __shstk_step_size (8)
-#define __unw_phase2_resume(cursor, fn)                                        \
+#define __unw_phase2_resume(cursor, payload)                                   \
   do {                                                                         \
-    _LIBUNWIND_POP_SHSTK_SSP((fn));                                            \
+    _LIBUNWIND_POP_SHSTK_SSP((payload));                                       \
     void *shstkRegContext = __libunwind_shstk_get_registers((cursor));         \
     void *shstkJumpAddress = __libunwind_shstk_get_jump_target();              \
     __asm__ volatile("jmpq *%%rdx\n\t" ::"D"(shstkRegContext),                 \
@@ -77,16 +76,17 @@
   } while (0)
 #elif defined(_LIBUNWIND_TARGET_AARCH64)
 #define __shstk_step_size (8)
-#define __unw_phase2_resume(cursor, fn)                                        \
+#define __unw_phase2_resume(cursor, payload)                                   \
   do {                                                                         \
-    _LIBUNWIND_POP_SHSTK_SSP((fn));                                            \
+    _LIBUNWIND_POP_SHSTK_SSP((payload));                                       \
     void *shstkRegContext = __libunwind_shstk_get_registers((cursor));         \
     void *shstkJumpAddress = __libunwind_shstk_get_jump_target();              \
     __asm__ volatile("mov x0, %0\n\t"                                          \
+                     "mov x1, wzr\n\t"                                         \
                      "br %1\n\t"                                               \
                      :                                                         \
                      : "r"(shstkRegContext), "r"(shstkJumpAddress)             \
-                     : "x0");                                                  \
+                     : "x0", "x1");                                            \
   } while (0)
 #endif
 
@@ -205,6 +205,8 @@ extern int __unw_step_stage2(unw_cursor_t *);
 #if defined(_LIBUNWIND_USE_GCS)
 // Enable the GCS target feature to permit gcspop instructions to be used.
 __attribute__((target("+gcs")))
+#else
+_LIBUNWIND_TRACE_NO_INLINE
 #endif
 static _Unwind_Reason_Code
 unwind_phase2(unw_context_t *uc, unw_cursor_t *cursor,
@@ -349,6 +351,8 @@ unwind_phase2(unw_context_t *uc, unw_cursor_t *cursor,
 #if defined(_LIBUNWIND_USE_GCS)
 // Enable the GCS target feature to permit gcspop instructions to be used.
 __attribute__((target("+gcs")))
+#else
+_LIBUNWIND_TRACE_NO_INLINE
 #endif
 static _Unwind_Reason_Code
 unwind_phase2_forced(unw_context_t *uc, unw_cursor_t *cursor,
diff --git a/libunwind/src/UnwindRegistersRestore.S b/libunwind/src/UnwindRegistersRestore.S
index fd306ed8c5230..76a80344034f7 100644
--- a/libunwind/src/UnwindRegistersRestore.S
+++ b/libunwind/src/UnwindRegistersRestore.S
@@ -645,13 +645,26 @@ Lnovec:
 #endif
 
 //
-// extern "C" void __libunwind_Registers_arm64_jumpto(Registers_arm64 *);
+// extern "C" void __libunwind_Registers_arm64_jumpto(Registers_arm64 *, unsigned);
 //
 // On entry:
 //  thread_state pointer is in x0
+//  walked_frames counter is in x1
 //
   .p2align 2
 DEFINE_LIBUNWIND_FUNCTION(__libunwind_Registers_arm64_jumpto)
+
+  #if defined(_LIBUNWIND_TRACE_RET_INJECT)
+    cbz     w1, 1f
+  0:
+    subs    w1, w1, #1
+    adr     x16, #8
+    ret     x16
+
+    b.ne    0b
+  1:
+  #endif
+
   // skip restore of x0,x1 for now
   ldp    x2, x3,  [x0, #0x010]
   ldp    x4, x5,  [x0, #0x020]
diff --git a/libunwind/src/assembly.h b/libunwind/src/assembly.h
index f0fcd006f2073..84c9d526f1d75 100644
--- a/libunwind/src/assembly.h
+++ b/libunwind/src/assembly.h
@@ -132,6 +132,10 @@
 
 #if defined(__APPLE__)
 
+#if defined(__aarch64__) || defined(__arm64__) || defined(__arm64e__)
+#define _LIBUNWIND_TRACE_RET_INJECT 1
+#endif
+
 #define SYMBOL_IS_FUNC(name)
 #define HIDDEN_SYMBOL(name) .private_extern name
 #if defined(_LIBUNWIND_HIDE_SYMBOLS)
diff --git a/libunwind/src/config.h b/libunwind/src/config.h
index deb5a4d4d73d4..f017403fa2234 100644
--- a/libunwind/src/config.h
+++ b/libunwind/src/config.h
@@ -28,6 +28,9 @@
     #define _LIBUNWIND_SUPPORT_COMPACT_UNWIND 1
     #define _LIBUNWIND_SUPPORT_DWARF_UNWIND 1
   #endif
+  #if defined(__aarch64__) || defined(__arm64__) || defined(__arm64e__)
+    #define _LIBUNWIND_TRACE_RET_INJECT 1
+  #endif
 #elif defined(_WIN32)
   #ifdef __SEH__
     #define _LIBUNWIND_SUPPORT_SEH_UNWIND 1
@@ -61,6 +64,12 @@
   #endif
 #endif
 
+#ifdef _LIBUNWIND_TRACE_RET_INJECT
+#define _LIBUNWIND_TRACE_NO_INLINE __attribute__((noinline, disable_tail_calls))
+#else
+#define _LIBUNWIND_TRACE_NO_INLINE
+#endif
+
 #if defined(_LIBUNWIND_HIDE_SYMBOLS)
   // The CMake file passes -fvisibility=hidden to control ELF/Mach-O visibility.
   #define _LIBUNWIND_EXPORT
diff --git a/libunwind/src/libunwind.cpp b/libunwind/src/libunwind.cpp
index 951d87db868bc..3a94b6cf0cc5c 100644
--- a/libunwind/src/libunwind.cpp
+++ b/libunwind/src/libunwind.cpp
@@ -247,7 +247,27 @@ _LIBUNWIND_HIDDEN int __unw_get_proc_info(unw_cursor_t *cursor,
 }
 _LIBUNWIND_WEAK_ALIAS(__unw_get_proc_info, unw_get_proc_info)
 
-/// Resume execution at cursor position (aka longjump).
+/// Rebalance the execution flow by injecting the right amount of `ret`
+/// instruction relatively to the amount of `walkedFrames` then resume execution
+/// at cursor position (aka longjump).
+_LIBUNWIND_HIDDEN int __unw_resume_with_frames_walked(unw_cursor_t *cursor,
+                                                      unsigned walkedFrames) {
+  _LIBUNWIND_TRACE_API("__unw_resume(cursor=%p, walkedFrames=%u)",
+                       static_cast<void *>(cursor), walkedFrames);
+#if __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__)
+  // Inform the ASan runtime that now might be a good time to clean stuff up.
+  __asan_handle_no_return();
+#endif
+#ifdef _LIBUNWIND_TRACE_RET_INJECT
+  AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor;
+  co->setWalkedFrames(walkedFrames);
+#endif
+  return __unw_resume(cursor);
+}
+_LIBUNWIND_WEAK_ALIAS(__unw_resume_with_frames_walked,
+                      unw_resume_with_frames_walked)
+
+/// Legacy function. Resume execution at cursor position (aka longjump).
 _LIBUNWIND_HIDDEN int __unw_resume(unw_cursor_t *cursor) {
   _LIBUNWIND_TRACE_API("__unw_resume(cursor=%p)", static_cast<void *>(cursor));
 #if __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__)
diff --git a/libunwind/src/libunwind_ext.h b/libunwind/src/libunwind_ext.h
index 28db43a4f6eef..900e8101f81f1 100644
--- a/libunwind/src/libunwind_ext.h
+++ b/libunwind/src/libunwind_ext.h
@@ -30,7 +30,11 @@ extern int __unw_get_reg(unw_cursor_t *, unw_regnum_t, unw_word_t *);
 extern int __unw_get_fpreg(unw_cursor_t *, unw_regnum_t, unw_fpreg_t *);
 extern int __unw_set_reg(unw_cursor_t *, unw_regnum_t, unw_word_t);
 extern int __unw_set_fpreg(unw_cursor_t *, unw_regnum_t, unw_fpreg_t);
-extern int __unw_resume(unw_cursor_t *);
+_LIBUNWIND_TRACE_NO_INLINE
+  extern int __unw_resume_with_frames_walked(unw_cursor_t *, unsigned);
+// `__unw_resume` is a legacy function. Use `__unw_resume_with_frames_walked` instead.
+_LIBUNWIND_TRACE_NO_INLINE
+  extern int __unw_resume(unw_cursor_t *);
 
 #ifdef __arm__
 /* Save VFP registers in FSTMX format (instead of FSTMD). */
diff --git a/lldb/packages/Python/lldbsuite/test/decorators.py b/lldb/packages/Python/lldbsuite/test/decorators.py
index 454196e1b0264..23d2165e07f7e 100644
--- a/lldb/packages/Python/lldbsuite/test/decorators.py
+++ b/lldb/packages/Python/lldbsuite/test/decorators.py
@@ -647,6 +647,31 @@ def is_out_of_tree_debugserver():
     return skipTestIfFn(is_out_of_tree_debugserver)(func)
 
 
+def skipIfOutOfTreeLibunwind(func):
+    """Decorate the item to skip tests if libunwind was not built in-tree."""
+
+    def is_out_of_tree_libunwind():
+        if not configuration.llvm_tools_dir:
+            return "out-of-tree libunwind"
+
+        # llvm_tools_dir is typically <build>/bin, so lib is a sibling.
+        llvm_lib_dir = os.path.join(
+            os.path.dirname(configuration.llvm_tools_dir), "lib"
+        )
+
+        if not os.path.isdir(llvm_lib_dir):
+            return "out-of-tree libunwind"
+
+        # Check for libunwind library (any extension).
+        for filename in os.listdir(llvm_lib_dir):
+            if filename.startswith("libunwind.") or filename.startswith("unwind."):
+                return None
+
+        return "out-of-tree libunwind"
+
+    return skipTestIfFn(is_out_of_tree_libunwind)(func)
+
+
 def skipIfRemote(func):
     """Decorate the item to skip tests if testing remotely."""
     return unittest.skipIf(lldb.remote_platform, "skip on remote platform")(func)
diff --git a/lldb/test/API/functionalities/unwind/libunwind_ret_injection/Makefile b/lldb/test/API/functionalities/unwind/libunwind_ret_injection/Makefile
new file mode 100644
index 0000000000000..4698eaa815b83
--- /dev/null
+++ b/lldb/test/API/functionalities/unwind/libunwind_ret_injection/Makefile
@@ -0,0 +1,6 @@
+CXX_SOURCES := main.cpp
+
+# Build with C++ exceptions enabled
+CXXFLAGS := -g -O0 -fexceptions
+
+include Makefile.rules
diff --git a/lldb/test/API/functionalities/unwind/libunwind_ret_injection/TestLibUnwindRetInjection.py b/lldb/test/API/functionalities/unwind/libunwind_ret_injection/TestLibUnwindRetInjection.py
new file mode 100644
index 0000000000000..e03234d1b5077
--- /dev/null
+++ b/lldb/test/API/functionalities/unwind/libunwind_ret_injection/TestLibUnwindRetInjection.py
@@ -0,0 +1,177 @@
+"""
+Test that libunwind correctly injects 'ret' instructions to rebalance execution flow
+when unwinding C++ exceptions. This is important for Apple Processor Trace analysis.
+"""
+
+import lldb
+import os
+from lldbsuite.test.decorators import *
+from lldbsuite.test.lldbtest import *
+from lldbsuite.test import lldbutil
+from lldbsuite.test import configuration
+
+
+class LibunwindRetInjectionTestCase(TestBase):
+    @skipIf(archs=no_match(["arm64", "arm64e", "aarch64"]))
+    @skipUnlessDarwin
+    @skipIfOutOfTreeLibunwind
+    def test_ret_injection_on_exception_unwind(self):
+        """Test that __libunwind_Registers_arm64_jumpto receives correct walkedFrames count and injects the right number of ret instructions."""
+        self.build()
+
+        exe = self.getBuildArtifact("a.out")
+        target = self.dbg.CreateTarget(exe)
+        self.assertTrue(target, VALID_TARGET)
+
+        # Find the just-built libunwind, not the system one.
+        # llvm_tools_dir is typically <build>/bin, so lib is a sibling.
+        self.assertIsNotNone(
+            configuration.llvm_tools_dir,
+            "llvm_tools_dir must be set to find in-tree libunwind",
+        )
+
+        llvm_lib_dir = os.path.join(
+            os.path.dirname(configuration.llvm_tools_dir), "lib"
+        )
+
+        # Find the libunwind library (platform-agnostic).
+        libunwind_path = None
+        for filename in os.listdir(llvm_lib_dir):
+            if filename.startswith("libunwind.") or filename.startswith("unwind."):
+                libunwind_path = os.path.join(llvm_lib_dir, filename)
+                break
+
+        self.assertIsNotNone(
+            libunwind_path, f"Could not find libunwind in {llvm_lib_dir}"
+        )
+
+        # Set breakpoint in __libunwind_Registers_arm64_jumpto.
+        # This is the function that performs the actual jump and ret injection.
+        bp = target.BreakpointCreateByName("__libunwind_Registers_arm64_jumpto")
+        self.assertTrue(bp.IsValid())
+        self.assertGreater(bp.GetNumLocations(), 0)
+
+        # Set up DYLD_INSERT_LIBRARIES to use the just-built libunwind.
+        launch_info = lldb.SBLaunchInfo(None)
+        env = target.GetEnvironment()
+        env.Set("DYLD_INSERT_LIBRARIES", libunwind_path, True)
+        launch_info.SetEnvironment(env, False)
+
+        # Launch the process with our custom libunwind.
+        error = lldb.SBError()
+        process = target.Launch(launch_info, error)
+        self.assertSuccess(
+            error, f"Failed to launch process with libunwind at {libunwind_path}"
+        )
+        self.assertTrue(process, PROCESS_IS_VALID)
+
+        # We should hit the breakpoint in __libunwind_Registers_arm64_jumpto
+        # during the exception unwinding phase 2.
+        threads = lldbutil.get_threads_stopped_at_breakpoint(process, bp)
+        self.assertEqual(len(threads), 1, "Should have stopped at breakpoint")
+
+        thread = threads[0]
+        frame = thread.GetFrameAtIndex(0)
+
+        # Verify we're in __libunwind_Registers_arm64_jumpto.
+        function_name = frame.GetFunctionName()
+        self.assertTrue(
+            "__libunwind_Registers_arm64_jumpto" in function_name,
+            f"Expected to be in __libunwind_Registers_arm64_jumpto, got {function_name}",
+        )
+
+        # On ARM64, the walkedFrames parameter should be in register x1 (second parameter).
+        # According to the ARM64 calling convention, integer arguments are passed in x0-x7.
+        # x0 = Registers_arm64* pointer.
+        # x1 = unsigned walkedFrames.
+        error = lldb.SBError()
+        x1_value = frame.register["x1"].GetValueAsUnsigned(error)
+        self.assertSuccess(error, "Failed to read x1 register")
+
+        # According to the code in UnwindCursor.hpp, the walkedFrames value represents:
+        # 1. The number of frames walked in unwind_phase2 to reach the landing pad.
+        # 2. Plus _EXTRA_LIBUNWIND_FRAMES_WALKED = 5 - 1 = 4 additional libunwind frames.
+        #
+        # From the comment in the code:
+        #   frame #0: __libunwind_Registers_arm64_jumpto
+        #   frame #1: Registers_arm64::returnto
+        #   frame #2: UnwindCursor::jumpto
+        #   frame #3: __unw_resume
+        #   frame #4: __unw_resume_with_frames_walked
+        #   frame #5: unwind_phase2
+        #
+        # Since __libunwind_Registers_arm64_jumpto returns to the landing pad,
+        # we subtract 1, so _EXTRA_LIBUNWIND_FRAMES_WALKED = 4.
+        #
+        # For our test program:
+        # - unwind_phase2 starts walking (frame 0 counted here).
+        # - Walks through: func_d (throw site), func_c, func_b, func_a.
+        # - Finds landing pad in main.
+        # That's approximately 4-5 frames from the user code.
+        # Plus the 4 extra libunwind frames.
+        #
+        # So we expect x1 to be roughly 8-10.
+        expected_min_frames = 8
+        expected_max_frames = 13  # Allow some variation for libc++abi frames.
+
+        self.assertGreaterEqual(
+            x1_value,
+            expected_min_frames,
+            f"walkedFrames (x1) should be >= {expected_min_frames}, got {x1_value}. "
+            "This is the number of 'ret' instructions that will be executed.",
+        )
+
+        self.assertLessEqual(
+            x1_value,
+            expected_max_frames,
+            f"walkedFrames (x1) should be <= {expected_max_frames}, got {x1_value}. "
+            "Value seems too high.",
+        )
+
+        # Now step through the ret injection loop and count the actual number of 'ret' executions.
+        # The loop injects exactly x1_value ret instructions before continuing with register restoration.
+        # We step until we hit the first 'ldp' instruction (register restoration starts with 'ldp x2, x3, [x0, #0x010]').
+        ret_executed_count = 0
+        max_steps = 100  # Safety limit to prevent infinite loops.
+
+        for step_count in range(max_steps):
+            # Get current instruction.
+            pc = frame.GetPC()
+            inst = process.ReadMemory(pc, 4, lldb.SBError())
+
+            # Disassemble current instruction.
+            current_inst = target.GetInstructions(lldb.SBAddress(pc, target), inst)[0]
+            mnemonic = current_inst.GetMnemonic(target)
+            operands = current_inst.GetOperands(target)
+
+            # Check if we've reached the register restoration part (first ldp after the loop).
+            if mnemonic == "ldp":
+                # We've exited the ret injection loop.
+                break
+
+            # Count 'ret' instructions that get executed.
+            if mnemonic == "ret":
+                self.assertEqual(operands, "x16")
+                ret_executed_count += 1
+
+            # Step one instruction.
+            thread.StepInstruction(False)  # False = step over.
+
+            # Update frame reference.
+            frame = thread.GetFrameAtIndex(0)
+
+        # Verify we didn't hit the safety limit.
+        self.assertLess(
+            step_count,
+            max_steps - 1,
+            f"Stepped {max_steps} times without reaching 'ldp' instruction. Something is wrong.",
+        )
+
+        # The number of executed 'ret' instructions should match x1_value.
+        # According to the implementation, the loop executes exactly x1_value times.
+        self.assertEqual(
+            ret_executed_count,
+            x1_value,
+            f"Expected {x1_value} 'ret' instructions to be executed (matching x1 register), "
+            f"but counted {ret_executed_count} executed 'ret' instructions.",
+        )
diff --git a/lldb/test/API/functionalities/unwind/libunwind_ret_injection/main.cpp b/lldb/test/API/functionalities/unwind/libunwind_ret_injection/main.cpp
new file mode 100644
index 0000000000000..00685e4d6b137
--- /dev/null
+++ b/lldb/test/API/functionalities/unwind/libunwind_ret_injection/main.cpp
@@ -0,0 +1,45 @@
+// Test program to verify libunwind ret injection feature for execution flow
+// rebalancing.
+//
+// This test creates a multi-frame call stack and throws a C++ exception to
+// trigger libunwind's two-phase exception handling. The test verifies that
+// libunwind correctly injects the right amount of 'ret' instructions to
+// rebalance the execution flow when returning to the landing pad, which is
+// important for Apple Processor Trace analysis.
+
+#include <cstdio>
+#include <exception>
+#include <stdexcept>
+
+// Marker functions with noinline to ensure they appear in the stack.
+static void __attribute__((noinline)) func_d() {
+  printf("In func_d, about to throw exception\n");
+  throw std::runtime_error("test exception");
+}
+
+static void __attribute__((noinline)) func_c() {
+  printf("In func_c\n");
+  func_d();
+}
+
+static void __attribute__((noinline)) func_b() {
+  printf("In func_b\n");
+  func_c();
+}
+
+static void __attribute__((noinline)) func_a() {
+  printf("In func_a\n");
+  func_b();
+}
+
+int main(int argc, char *argv[]) {
+  try {
+    printf("In main, about to call func_a\n");
+    func_a();
+    printf("ERROR: Should not reach here\n");
+    return 1;
+  } catch (const std::exception &e) {
+    printf("Caught exception in main: %s\n", e.what());
+    return 0;
+  }
+}

From 95f2728b5cdaf03c5f0c13983903f8e7b50b22b4 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Tue, 11 Nov 2025 18:47:28 -0800
Subject: [PATCH 21/32] DAG: Stop using TargetLibraryInfo for multi-result FP
 intrinsic codegen (#166987)

Only use RuntimeLibcallsInfo. Remove the helper functions used to
transition.
---
 llvm/include/llvm/CodeGen/SelectionDAG.h      | 10 +---
 llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp |  4 +-
 .../SelectionDAG/LegalizeFloatTypes.cpp       |  3 +-
 .../SelectionDAG/LegalizeVectorOps.cpp        |  4 +-
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 60 ++++---------------
 5 files changed, 16 insertions(+), 65 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index 62d2f222110e4..5b331e4444915 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -1725,17 +1725,9 @@ class SelectionDAG {
   /// value.
   LLVM_ABI bool
   expandMultipleResultFPLibCall(RTLIB::Libcall LC, SDNode *Node,
-                                SmallVectorImpl<SDValue> &Results, EVT CallType,
+                                SmallVectorImpl<SDValue> &Results,
                                 std::optional<unsigned> CallRetResNo = {});
 
-  // FIXME: Ths should be removed, and form using RTLIB::Libcall should be
-  // preferred. Callers should resolve the exact type libcall to use.
-  LLVM_ABI bool
-  expandMultipleResultFPLibCall(StringRef LibcallName, CallingConv::ID CC,
-                                SDNode *Node, SmallVectorImpl<SDValue> &Results,
-                                std::optional<unsigned> CallRetResNo = {},
-                                bool IsVectorMasked = false);
-
   /// Expand the specified \c ISD::VAARG node as the Legalize pass would.
   LLVM_ABI SDValue expandVAArg(SDNode *Node);
 
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index a0baf821698a8..3ed84af6a8717 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -4842,7 +4842,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
     RTLIB::Libcall LC = Node->getOpcode() == ISD::FSINCOS
                             ? RTLIB::getSINCOS(VT)
                             : RTLIB::getSINCOSPI(VT);
-    bool Expanded = DAG.expandMultipleResultFPLibCall(LC, Node, Results, VT);
+    bool Expanded = DAG.expandMultipleResultFPLibCall(LC, Node, Results);
     if (!Expanded) {
       DAG.getContext()->emitError(Twine("no libcall available for ") +
                                   Node->getOperationName(&DAG));
@@ -4940,7 +4940,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
     EVT VT = Node->getValueType(0);
     RTLIB::Libcall LC = Node->getOpcode() == ISD::FMODF ? RTLIB::getMODF(VT)
                                                         : RTLIB::getFREXP(VT);
-    bool Expanded = DAG.expandMultipleResultFPLibCall(LC, Node, Results, VT,
+    bool Expanded = DAG.expandMultipleResultFPLibCall(LC, Node, Results,
                                                       /*CallRetResNo=*/0);
     if (!Expanded)
       llvm_unreachable("Expected scalar FFREXP/FMODF to expand to libcall!");
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 29c4dac12a81a..58983cb57d7f6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -1726,8 +1726,7 @@ void DAGTypeLegalizer::ExpandFloatRes_UnaryWithTwoFPResults(
     SDNode *N, RTLIB::Libcall LC, std::optional<unsigned> CallRetResNo) {
   assert(!N->isStrictFPOpcode() && "strictfp not implemented");
   SmallVector<SDValue> Results;
-  DAG.expandMultipleResultFPLibCall(LC, N, Results, N->getValueType(0),
-                                    CallRetResNo);
+  DAG.expandMultipleResultFPLibCall(LC, N, Results, CallRetResNo);
   for (auto [ResNo, Res] : enumerate(Results)) {
     SDValue Lo, Hi;
     GetPairElements(Res, Lo, Hi);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index a7ae794459331..c55e55df373e9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -1275,7 +1275,7 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
                             ? RTLIB::getSINCOS(VT)
                             : RTLIB::getSINCOSPI(VT);
     if (LC != RTLIB::UNKNOWN_LIBCALL &&
-        DAG.expandMultipleResultFPLibCall(LC, Node, Results, VT))
+        DAG.expandMultipleResultFPLibCall(LC, Node, Results))
       return;
 
     // TODO: Try to see if there's a narrower call available to use before
@@ -1286,7 +1286,7 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
     EVT VT = Node->getValueType(0);
     RTLIB::Libcall LC = RTLIB::getMODF(VT);
     if (LC != RTLIB::UNKNOWN_LIBCALL &&
-        DAG.expandMultipleResultFPLibCall(LC, Node, Results, VT,
+        DAG.expandMultipleResultFPLibCall(LC, Node, Results,
                                           /*CallRetResNo=*/0))
       return;
     break;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index b5d502b90c90c..f05266967fb68 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2514,56 +2514,14 @@ static bool canFoldStoreIntoLibCallOutputPointers(StoreSDNode *StoreNode,
 
 bool SelectionDAG::expandMultipleResultFPLibCall(
     RTLIB::Libcall LC, SDNode *Node, SmallVectorImpl<SDValue> &Results,
-    EVT CallVT, std::optional<unsigned> CallRetResNo) {
+    std::optional<unsigned> CallRetResNo) {
   if (LC == RTLIB::UNKNOWN_LIBCALL)
     return false;
 
-  EVT VT = Node->getValueType(0);
-
-  RTLIB::LibcallImpl Impl = TLI->getLibcallImpl(LC);
-  if (Impl == RTLIB::Unsupported)
-    return false;
-
-  StringRef LCName = TLI->getLibcallImplName(Impl);
-
-  // FIXME: This should not use TargetLibraryInfo. There should be
-  // RTLIB::Libcall entries for each used vector type, and directly matched.
-  auto getVecDesc = [&]() -> VecDesc const * {
-    for (bool Masked : {false, true}) {
-      if (VecDesc const *VD = getLibInfo().getVectorMappingInfo(
-              LCName, VT.getVectorElementCount(), Masked)) {
-        return VD;
-      }
-    }
-    return nullptr;
-  };
-
-  // For vector types, we must find a vector mapping for the libcall.
-  VecDesc const *VD = nullptr;
-  if (VT.isVector() && !CallVT.isVector() && !(VD = getVecDesc()))
+  RTLIB::LibcallImpl LibcallImpl = TLI->getLibcallImpl(LC);
+  if (LibcallImpl == RTLIB::Unsupported)
     return false;
 
-  bool IsMasked = (VD && VD->isMasked()) ||
-                  RTLIB::RuntimeLibcallsInfo::hasVectorMaskArgument(Impl);
-
-  // This wrapper function exists because getVectorMappingInfo works in terms of
-  // function names instead of RTLIB enums.
-
-  // FIXME: If we used a vector mapping, this assumes the calling convention of
-  // the vector function is the same as the scalar.
-
-  StringRef Name = VD ? VD->getVectorFnName() : LCName;
-
-  return expandMultipleResultFPLibCall(Name,
-                                       TLI->getLibcallImplCallingConv(Impl),
-                                       Node, Results, CallRetResNo, IsMasked);
-}
-
-// FIXME: This belongs in TargetLowering
-bool SelectionDAG::expandMultipleResultFPLibCall(
-    StringRef Name, CallingConv::ID CC, SDNode *Node,
-    SmallVectorImpl<SDValue> &Results, std::optional<unsigned> CallRetResNo,
-    bool IsMasked) {
   LLVMContext &Ctx = *getContext();
   EVT VT = Node->getValueType(0);
   unsigned NumResults = Node->getNumValues();
@@ -2624,8 +2582,8 @@ bool SelectionDAG::expandMultipleResultFPLibCall(
 
   SDLoc DL(Node);
 
-  // Pass the vector mask (if required).
-  if (IsMasked) {
+  if (RTLIB::RuntimeLibcallsInfo::hasVectorMaskArgument(LibcallImpl)) {
+    // Pass the vector mask (if required).
     EVT MaskVT = TLI->getSetCCResultType(getDataLayout(), Ctx, VT);
     SDValue Mask = getBoolConstant(true, DL, MaskVT, VT);
     Args.emplace_back(Mask, MaskVT.getTypeForEVT(Ctx));
@@ -2636,10 +2594,12 @@ bool SelectionDAG::expandMultipleResultFPLibCall(
                       : Type::getVoidTy(Ctx);
   SDValue InChain = StoresInChain ? StoresInChain : getEntryNode();
   SDValue Callee =
-      getExternalSymbol(Name.data(), TLI->getPointerTy(getDataLayout()));
+      getExternalSymbol(TLI->getLibcallImplName(LibcallImpl).data(),
+                        TLI->getPointerTy(getDataLayout()));
   TargetLowering::CallLoweringInfo CLI(*this);
-  CLI.setDebugLoc(DL).setChain(InChain).setLibCallee(CC, RetType, Callee,
-                                                     std::move(Args));
+  CLI.setDebugLoc(DL).setChain(InChain).setLibCallee(
+      TLI->getLibcallImplCallingConv(LibcallImpl), RetType, Callee,
+      std::move(Args));
 
   auto [Call, CallChain] = TLI->LowerCallTo(CLI);
 

From 389a23c538e33b30903b4e733f5d30f629199829 Mon Sep 17 00:00:00 2001
From: William Tran-Viet <wtranviet@proton.me>
Date: Tue, 11 Nov 2025 22:00:08 -0500
Subject: [PATCH 22/32] [libc++] Implement P2988R12: `std::optional<T&>`
 (#155202)

Resolves #148131

- Unlock `std::optional<T&>` implementation
- Allow instantiations of `optional<T(&)(...)>` and `optional<T(&)[]>`
but disables `value_or()` and `optional::iterator` + all `iterator`
related functions
- Update documentation
- Update tests
---
 libcxx/docs/FeatureTestMacroTable.rst         |   2 +
 libcxx/docs/ReleaseNotes/22.rst               |   1 +
 libcxx/docs/Status/Cxx2cPapers.csv            |   2 +-
 libcxx/include/__iterator/wrap_iter.h         |   4 +-
 libcxx/include/optional                       | 326 ++++++++++++------
 libcxx/include/version                        |   5 +-
 libcxx/modules/std/optional.inc               |   3 +-
 .../iterator.compile.pass.cpp                 |   9 +-
 .../value_or.compile.pass.cpp                 |  28 ++
 .../optional.version.compile.pass.cpp         |   4 +-
 .../version.version.compile.pass.cpp          |   4 +-
 .../optional/optional.iterator/begin.pass.cpp |   7 +-
 .../borrowed_range.compile.pass.cpp           |  34 ++
 .../optional/optional.iterator/end.pass.cpp   |   8 +-
 .../optional.iterator/iterator.pass.cpp       |  40 ++-
 .../optional.monadic/and_then.pass.cpp        |  87 +++++
 .../optional.monadic/or_else.pass.cpp         |  26 ++
 .../optional.monadic/transform.pass.cpp       | 131 ++++++-
 .../assign_value.pass.cpp                     |  56 ++-
 .../optional.object.assign/emplace.pass.cpp   |  27 +-
 .../optional.object.ctor/ctor.verify.cpp      |  24 +-
 .../optional.object.ctor/move.pass.cpp        | 124 +++----
 .../ref_constructs_from_temporary.verify.cpp  |  35 ++
 .../optional.object.ctor/ref_t.pass.cpp       |  75 ++++
 .../optional.object.dtor/dtor.pass.cpp        |  22 +-
 .../optional.object.mod/reset.pass.cpp        |  13 +-
 .../dereference.pass.cpp                      |  14 +-
 .../dereference_const.pass.cpp                |  19 +
 .../has_value.pass.cpp                        |   9 +-
 .../optional.object.observe/op_arrow.pass.cpp |  30 +-
 .../op_arrow_const.pass.cpp                   |  19 +
 .../optional.object.observe/value.pass.cpp    |   8 +
 .../optional.object.observe/value_or.pass.cpp |   8 +
 .../value_or_const.pass.cpp                   |  10 +-
 .../optional.object.swap/swap.pass.cpp        |  77 ++++-
 ...al_requires_destructible_object.verify.cpp |  12 +-
 .../optional/optional.object/types.pass.cpp   |   9 +-
 .../optional.specalg/make_optional.pass.cpp   |   4 +-
 .../make_optional_explicit.pass.cpp           |  29 +-
 .../optional/optional.specalg/swap.pass.cpp   |  76 +++-
 .../generate_feature_test_macro_components.py |   1 +
 41 files changed, 1176 insertions(+), 246 deletions(-)
 create mode 100644 libcxx/test/libcxx/utilities/optional/optional.object/optional.object.observe/value_or.compile.pass.cpp
 create mode 100644 libcxx/test/std/utilities/optional/optional.iterator/borrowed_range.compile.pass.cpp
 create mode 100644 libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/ref_constructs_from_temporary.verify.cpp
 create mode 100644 libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/ref_t.pass.cpp

diff --git a/libcxx/docs/FeatureTestMacroTable.rst b/libcxx/docs/FeatureTestMacroTable.rst
index d5ed9188b1b23..756bdf71f8b22 100644
--- a/libcxx/docs/FeatureTestMacroTable.rst
+++ b/libcxx/docs/FeatureTestMacroTable.rst
@@ -486,6 +486,8 @@ Status
     ---------------------------------------------------------- -----------------
     ``__cpp_lib_not_fn``                                       ``202306L``
     ---------------------------------------------------------- -----------------
+    ``__cpp_lib_optional``                                     ``202506L``
+    ---------------------------------------------------------- -----------------
     ``__cpp_lib_optional_range_support``                       ``202406L``
     ---------------------------------------------------------- -----------------
     ``__cpp_lib_out_ptr``                                      ``202311L``
diff --git a/libcxx/docs/ReleaseNotes/22.rst b/libcxx/docs/ReleaseNotes/22.rst
index a6a0ac8670fb5..2c19dfc57a3f8 100644
--- a/libcxx/docs/ReleaseNotes/22.rst
+++ b/libcxx/docs/ReleaseNotes/22.rst
@@ -40,6 +40,7 @@ Implemented Papers
 
 - P2321R2: ``zip`` (`Github <https://llvm.org/PR105169>`__) (The paper is partially implemented. ``zip_transform_view``
   is implemented in this release)
+- P2988R12: ``std::optional<T&>`` (`Github <https://llvm.org/PR148131>`__)
 - P3044R2: sub-``string_view`` from ``string`` (`Github <https://llvm.org/PR148140>`__)
 - P3223R2: Making ``std::istream::ignore`` less surprising (`Github <https://llvm.org/PR148178>`__)
 - P3060R3: Add ``std::views::indices(n)`` (`Github <https://llvm.org/PR148175>`__)
diff --git a/libcxx/docs/Status/Cxx2cPapers.csv b/libcxx/docs/Status/Cxx2cPapers.csv
index 0f4dbb882088a..0455643446f8e 100644
--- a/libcxx/docs/Status/Cxx2cPapers.csv
+++ b/libcxx/docs/Status/Cxx2cPapers.csv
@@ -122,7 +122,7 @@
 "`P3293R3 <https://wg21.link/P3293R3>`__","Splicing a base class subobject","2025-06 (Sofia)","","","`#148125 <https://github.com/llvm/llvm-project/issues/148125>`__",""
 "`P3491R3 <https://wg21.link/P3491R3>`__","``define_static_{string,object,array}``","2025-06 (Sofia)","","","`#148126 <https://github.com/llvm/llvm-project/issues/148126>`__",""
 "`P3096R12 <https://wg21.link/P3096R12>`__","Function Parameter Reflection in Reflection for C++26","2025-06 (Sofia)","","","`#148127 <https://github.com/llvm/llvm-project/issues/148127>`__",""
-"`P2988R12 <https://wg21.link/P2988R12>`__","``std::optional<T&>``","2025-06 (Sofia)","","","`#148131 <https://github.com/llvm/llvm-project/issues/148131>`__",""
+"`P2988R12 <https://wg21.link/P2988R12>`__","``std::optional<T&>``","2025-06 (Sofia)","|Complete|","22","`#148131 <https://github.com/llvm/llvm-project/issues/148131>`__",""
 "`P3348R4 <https://wg21.link/P3348R4>`__","C++26 should refer to C23 not C17","2025-06 (Sofia)","","","`#148133 <https://github.com/llvm/llvm-project/issues/148133>`__",""
 "`P3037R6 <https://wg21.link/P3037R6>`__","``constexpr`` ``std::shared_ptr`` and friends","2025-06 (Sofia)","","","`#148135 <https://github.com/llvm/llvm-project/issues/148135>`__",""
 "`P3284R4 <https://wg21.link/P3284R4>`__","``write_env`` and ``unstoppable`` Sender Adaptors","2025-06 (Sofia)","","","`#148136 <https://github.com/llvm/llvm-project/issues/148136>`__",""
diff --git a/libcxx/include/__iterator/wrap_iter.h b/libcxx/include/__iterator/wrap_iter.h
index d18d9682da449..98745f600a6ec 100644
--- a/libcxx/include/__iterator/wrap_iter.h
+++ b/libcxx/include/__iterator/wrap_iter.h
@@ -117,8 +117,8 @@ class __wrap_iter {
   friend class span;
   template <class _Tp, size_t _Size>
   friend struct array;
-  template <class _Tp>
-  friend class optional;
+  template <class _Tp, class>
+  friend struct __optional_iterator;
 };
 
 template <class _Iter1>
diff --git a/libcxx/include/optional b/libcxx/include/optional
index a3023622e2067..ad672f6a9914f 100644
--- a/libcxx/include/optional
+++ b/libcxx/include/optional
@@ -210,6 +210,7 @@ namespace std {
 #  include <__iterator/wrap_iter.h>
 #  include <__memory/addressof.h>
 #  include <__memory/construct_at.h>
+#  include <__ranges/enable_borrowed_range.h>
 #  include <__ranges/enable_view.h>
 #  include <__tuple/sfinae_helpers.h>
 #  include <__type_traits/add_pointer.h>
@@ -239,6 +240,7 @@ namespace std {
 #  include <__type_traits/is_trivially_relocatable.h>
 #  include <__type_traits/is_unbounded_array.h>
 #  include <__type_traits/negation.h>
+#  include <__type_traits/reference_constructs_from_temporary.h>
 #  include <__type_traits/remove_const.h>
 #  include <__type_traits/remove_cv.h>
 #  include <__type_traits/remove_cvref.h>
@@ -409,39 +411,30 @@ struct __optional_storage_base : __optional_destruct_base<_Tp> {
         __construct(std::forward<_That>(__opt).__get());
     }
   }
+
+  template <class _Up>
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __assign_from_val(_Up&& __val) {
+    this->__get() = std::forward<_Up>(__val);
+  }
+
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __swap(__optional_storage_base& __rhs) {
+    using std::swap;
+    swap(this->__get(), __rhs.__get());
+  }
 };
 
-// optional<T&> is currently required to be ill-formed. However, it may
-// be allowed in the future. For this reason, it has already been implemented
-// to ensure we can make the change in an ABI-compatible manner.
 template <class _Tp>
 struct __optional_storage_base<_Tp, true> {
   using value_type                 = _Tp;
   using __raw_type _LIBCPP_NODEBUG = remove_reference_t<_Tp>;
   __raw_type* __value_;
 
-  template <class _Up>
-  static _LIBCPP_HIDE_FROM_ABI constexpr bool __can_bind_reference() {
-    using _RawUp = __libcpp_remove_reference_t<_Up>;
-    using _UpPtr = _RawUp*;
-    using _RawTp = __libcpp_remove_reference_t<_Tp>;
-    using _TpPtr = _RawTp*;
-    using _CheckLValueArg =
-        integral_constant<bool,
-                          (is_lvalue_reference<_Up>::value && is_convertible<_UpPtr, _TpPtr>::value) ||
-                              is_same<_RawUp, reference_wrapper<_RawTp>>::value ||
-                              is_same<_RawUp, reference_wrapper<__remove_const_t<_RawTp>>>::value >;
-    return (is_lvalue_reference<_Tp>::value && _CheckLValueArg::value) ||
-           (is_rvalue_reference<_Tp>::value && !is_lvalue_reference<_Up>::value &&
-            is_convertible<_UpPtr, _TpPtr>::value);
-  }
-
   _LIBCPP_HIDE_FROM_ABI constexpr __optional_storage_base() noexcept : __value_(nullptr) {}
 
   template <class _UArg>
   _LIBCPP_HIDE_FROM_ABI constexpr explicit __optional_storage_base(in_place_t, _UArg&& __uarg)
       : __value_(std::addressof(__uarg)) {
-    static_assert(__can_bind_reference<_UArg>(),
+    static_assert(!__reference_constructs_from_temporary_v<_Tp, _UArg>,
                   "Attempted to construct a reference element in tuple from a "
                   "possible temporary");
   }
@@ -457,7 +450,7 @@ struct __optional_storage_base<_Tp, true> {
   template <class _UArg>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __construct(_UArg&& __val) {
     _LIBCPP_ASSERT_INTERNAL(!has_value(), "__construct called for engaged __optional_storage");
-    static_assert(__can_bind_reference<_UArg>(),
+    static_assert(!__reference_constructs_from_temporary_v<_Tp, _UArg>,
                   "Attempted to construct a reference element in tuple from a "
                   "possible temporary");
     __value_ = std::addressof(__val);
@@ -481,6 +474,15 @@ struct __optional_storage_base<_Tp, true> {
         __construct(std::forward<_That>(__opt).__get());
     }
   }
+
+  template <class _Up>
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __assign_from_val(_Up&& __val) noexcept {
+    __value_ = std::addressof(__val);
+  }
+
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __swap(__optional_storage_base& __rhs) noexcept {
+    std::swap(__value_, __rhs.__value_);
+  }
 };
 
 template <class _Tp, bool = is_trivially_copy_constructible<_Tp>::value>
@@ -592,6 +594,10 @@ constexpr bool ranges::enable_view<optional<_Tp>> = true;
 
 template <class _Tp>
 constexpr range_format format_kind<optional<_Tp>> = range_format::disabled;
+
+template <class _Tp>
+constexpr bool ranges::enable_borrowed_range<optional<_Tp&>> = true;
+
 #    endif
 
 #    if _LIBCPP_STD_VER >= 20
@@ -606,19 +612,19 @@ struct __is_std_optional : false_type {};
 template <class _Tp>
 struct __is_std_optional<optional<_Tp>> : true_type {};
 
-template <class _Tp>
-class _LIBCPP_DECLSPEC_EMPTY_BASES optional
-    : private __optional_move_assign_base<_Tp>,
-      private __optional_sfinae_ctor_base_t<_Tp>,
-      private __optional_sfinae_assign_base_t<_Tp> {
-  using __base _LIBCPP_NODEBUG = __optional_move_assign_base<_Tp>;
+template <class _Tp, class = void>
+struct __optional_iterator {};
 
-  using __pointer _LIBCPP_NODEBUG       = std::add_pointer_t<_Tp>;
-  using __const_pointer _LIBCPP_NODEBUG = std::add_pointer_t<const _Tp>;
+template <class _Tp>
+struct __optional_iterator<
+    _Tp,
+    enable_if_t<!(is_lvalue_reference_v<_Tp> && is_function_v<__libcpp_remove_reference_t<_Tp>>) &&
+                !(is_lvalue_reference_v<_Tp> && is_array_v<__libcpp_remove_reference_t<_Tp>>)> > {
+private:
+  using __pointer _LIBCPP_NODEBUG       = add_pointer_t<remove_reference_t<_Tp>>;
+  using __const_pointer _LIBCPP_NODEBUG = add_pointer_t<const remove_reference_t<_Tp>>;
 
 public:
-  using value_type = _Tp;
-
 #    if _LIBCPP_STD_VER >= 26
 #      ifdef _LIBCPP_ABI_BOUNDED_ITERATORS_IN_OPTIONAL
   using iterator       = __bounded_iter<__wrap_iter<__pointer>>;
@@ -627,19 +633,86 @@ public:
   using iterator       = __wrap_iter<__pointer>;
   using const_iterator = __wrap_iter<__const_pointer>;
 #      endif
+
+  // [optional.iterators], iterator support
+  _LIBCPP_HIDE_FROM_ABI constexpr iterator begin() noexcept {
+    auto& __derived_self = static_cast<optional<_Tp>&>(*this);
+    auto __ptr           = [&__derived_self]() {
+      if constexpr (is_lvalue_reference_v<_Tp>) {
+        return __derived_self.has_value() ? std::addressof(__derived_self.__get()) : nullptr;
+      }
+      return std::addressof(__derived_self.__get());
+    }();
+
+#      ifdef _LIBCPP_ABI_BOUNDED_ITERATORS_IN_OPTIONAL
+    return std::__make_bounded_iter(
+        __wrap_iter<__pointer>(__ptr),
+        __wrap_iter<__pointer>(__ptr),
+        __wrap_iter<__pointer>(__ptr) + (__derived_self.has_value() ? 1 : 0));
+#      else
+    return iterator(__ptr);
+#      endif
+  }
+
+  _LIBCPP_HIDE_FROM_ABI constexpr const_iterator begin() const noexcept {
+    auto& __derived_self = static_cast<const optional<_Tp>&>(*this);
+    auto* __ptr          = [&__derived_self]() {
+      if constexpr (is_lvalue_reference_v<_Tp>) {
+        return __derived_self.has_value() ? std::addressof(__derived_self.__get()) : nullptr;
+      }
+      return std::addressof(__derived_self.__get());
+    }();
+
+#      ifdef _LIBCPP_ABI_BOUNDED_ITERATORS_IN_OPTIONAL
+    return std::__make_bounded_iter(
+        __wrap_iter<__const_pointer>(__ptr),
+        __wrap_iter<__const_pointer>(__ptr),
+        __wrap_iter<__const_pointer>(__ptr) + (__derived_self.has_value() ? 1 : 0));
+#      else
+    return const_iterator(__ptr);
+#      endif
+  }
+
+  _LIBCPP_HIDE_FROM_ABI constexpr iterator end() noexcept {
+    return begin() + (static_cast<optional<_Tp>&>(*this).has_value() ? 1 : 0);
+  }
+  _LIBCPP_HIDE_FROM_ABI constexpr const_iterator end() const noexcept {
+    return begin() + (static_cast<const optional<_Tp>&>(*this).has_value() ? 1 : 0);
+  }
 #    endif
+};
+
+template <class _Tp>
+class _LIBCPP_DECLSPEC_EMPTY_BASES optional
+    : private __optional_move_assign_base<_Tp>,
+      private __optional_sfinae_ctor_base_t<_Tp>,
+      private __optional_sfinae_assign_base_t<_Tp>,
+      public __optional_iterator<_Tp> {
+  using __base _LIBCPP_NODEBUG = __optional_move_assign_base<_Tp>;
+
+public:
+  using value_type = __libcpp_remove_reference_t<_Tp>;
+
   using __trivially_relocatable _LIBCPP_NODEBUG =
       conditional_t<__libcpp_is_trivially_relocatable<_Tp>::value, optional, void>;
 
 private:
-  // Disable the reference extension using this static assert.
-  static_assert(!is_same_v<__remove_cvref_t<value_type>, in_place_t>,
+  static_assert(!is_same_v<__remove_cvref_t<_Tp>, in_place_t>,
                 "instantiation of optional with in_place_t is ill-formed");
-  static_assert(!is_same_v<__remove_cvref_t<value_type>, nullopt_t>,
-                "instantiation of optional with nullopt_t is ill-formed");
-  static_assert(!is_reference_v<value_type>, "instantiation of optional with a reference type is ill-formed");
-  static_assert(is_destructible_v<value_type>, "instantiation of optional with a non-destructible type is ill-formed");
-  static_assert(!is_array_v<value_type>, "instantiation of optional with an array type is ill-formed");
+  static_assert(!is_same_v<__remove_cvref_t<_Tp>, nullopt_t>, "instantiation of optional with nullopt_t is ill-formed");
+#    if _LIBCPP_STD_VER >= 26
+  static_assert(!is_rvalue_reference_v<_Tp>, "instantiation of optional with an rvalue reference type is ill-formed");
+#    else
+  static_assert(!is_reference_v<_Tp>, "instantiation of optional with a reference type is ill-formed");
+#    endif
+  static_assert(is_destructible_v<_Tp>, "instantiation of optional with a non-destructible type is ill-formed");
+  static_assert(!is_array_v<_Tp>, "instantiation of optional with an array type is ill-formed");
+
+#    if _LIBCPP_STD_VER >= 26
+  template <class _Up>
+  constexpr static bool __libcpp_opt_ref_ctor_deleted =
+      is_lvalue_reference_v<_Tp> && reference_constructs_from_temporary_v<_Tp, _Up>;
+#    endif
 
   // LWG2756: conditionally explicit conversion from _Up
   struct _CheckOptionalArgsConstructor {
@@ -714,18 +787,15 @@ public:
 
   template <class _InPlaceT,
             class... _Args,
-            enable_if_t<_And<_IsSame<_InPlaceT, in_place_t>, is_constructible<value_type, _Args...>>::value, int> = 0>
+            enable_if_t<_And<_IsSame<_InPlaceT, in_place_t>, is_constructible<_Tp, _Args...>>::value, int> = 0>
   _LIBCPP_HIDE_FROM_ABI constexpr explicit optional(_InPlaceT, _Args&&... __args)
       : __base(in_place, std::forward<_Args>(__args)...) {}
 
-  template <class _Up,
-            class... _Args,
-            enable_if_t<is_constructible_v<value_type, initializer_list<_Up>&, _Args...>, int> = 0>
+  template <class _Up, class... _Args, enable_if_t<is_constructible_v<_Tp, initializer_list<_Up>&, _Args...>, int> = 0>
   _LIBCPP_HIDE_FROM_ABI constexpr explicit optional(in_place_t, initializer_list<_Up> __il, _Args&&... __args)
       : __base(in_place, __il, std::forward<_Args>(__args)...) {}
 
-  template <class _Up                                                                        = value_type,
-            enable_if_t<_CheckOptionalArgsCtor<_Up>::template __enable_implicit<_Up>(), int> = 0>
+  template <class _Up = _Tp, enable_if_t<_CheckOptionalArgsCtor<_Up>::template __enable_implicit<_Up>(), int> = 0>
   _LIBCPP_HIDE_FROM_ABI constexpr optional(_Up&& __v) : __base(in_place, std::forward<_Up>(__v)) {}
 
   template <class _Up                                                                        = remove_cv_t<_Tp>,
@@ -752,6 +822,38 @@ public:
     this->__construct_from(std::move(__v));
   }
 
+  // deleted optional<T&> constructors
+#    if _LIBCPP_STD_VER >= 26
+  template <class _Up, class... _Args, enable_if_t<is_constructible_v<_Tp, initializer_list<_Up>&, _Args...>, int> = 0>
+    requires __libcpp_opt_ref_ctor_deleted<_Up>
+  explicit optional(in_place_t, initializer_list<_Up>, _Args&&...) = delete;
+
+  template <class _Up = _Tp, enable_if_t<_CheckOptionalArgsCtor<_Up>::template __enable_implicit<_Up>(), int> = 0>
+    requires __libcpp_opt_ref_ctor_deleted<_Up>
+  optional(_Up&&) = delete;
+
+  template <class _Up                                                                        = remove_cv_t<_Tp>,
+            enable_if_t<_CheckOptionalArgsCtor<_Up>::template __enable_explicit<_Up>(), int> = 0>
+    requires __libcpp_opt_ref_ctor_deleted<_Up>
+  explicit optional(_Up&&) = delete;
+
+  template <class _Up, enable_if_t<_CheckOptionalLikeCtor<_Up, _Up const&>::template __enable_implicit<_Up>(), int> = 0>
+    requires __libcpp_opt_ref_ctor_deleted<_Up>
+  optional(const optional<_Up>&) = delete;
+
+  template <class _Up, enable_if_t<_CheckOptionalLikeCtor<_Up, _Up const&>::template __enable_explicit<_Up>(), int> = 0>
+    requires __libcpp_opt_ref_ctor_deleted<_Up>
+  explicit optional(const optional<_Up>&) = delete;
+
+  template <class _Up, enable_if_t<_CheckOptionalLikeCtor<_Up, _Up&&>::template __enable_implicit<_Up>(), int> = 0>
+    requires __libcpp_opt_ref_ctor_deleted<_Up>
+  optional(optional<_Up>&&) = delete;
+
+  template <class _Up, enable_if_t<_CheckOptionalLikeCtor<_Up, _Up&&>::template __enable_explicit<_Up>(), int> = 0>
+    requires __libcpp_opt_ref_ctor_deleted<_Up>
+  explicit optional(optional<_Up>&&) = delete;
+#    endif
+
 #    if _LIBCPP_STD_VER >= 23
   template <class _Tag,
             class _Fp,
@@ -770,15 +872,15 @@ public:
   _LIBCPP_HIDE_FROM_ABI constexpr optional& operator=(optional&&)      = default;
 
   // LWG2756
-  template <class _Up        = remove_cv_t<value_type>,
+  template <class _Up        = remove_cv_t<_Tp>,
             enable_if_t<_And<_IsNotSame<__remove_cvref_t<_Up>, optional>,
-                             _Or<_IsNotSame<__remove_cvref_t<_Up>, value_type>, _Not<is_scalar<value_type>>>,
-                             is_constructible<value_type, _Up>,
-                             is_assignable<value_type&, _Up>>::value,
+                             _Or<_IsNotSame<__remove_cvref_t<_Up>, _Tp>, _Not<is_scalar<_Tp>>>,
+                             is_constructible<_Tp, _Up>,
+                             is_assignable<_Tp&, _Up>>::value,
                         int> = 0>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 optional& operator=(_Up&& __v) {
     if (this->has_value())
-      this->__get() = std::forward<_Up>(__v);
+      this->__assign_from_val(std::forward<_Up>(__v));
     else
       this->__construct(std::forward<_Up>(__v));
     return *this;
@@ -798,7 +900,7 @@ public:
     return *this;
   }
 
-  template <class... _Args, enable_if_t<is_constructible_v<value_type, _Args...>, int> = 0>
+  template <class... _Args, enable_if_t<is_constructible_v<_Tp, _Args...>, int> = 0>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Tp& emplace(_Args&&... __args) {
     reset();
     this->__construct(std::forward<_Args>(__args)...);
@@ -807,7 +909,12 @@ public:
 
   template <class _Up,
             class... _Args,
-            enable_if_t<is_constructible_v<value_type, initializer_list<_Up>&, _Args...>, int> = 0>
+            enable_if_t<is_constructible_v<_Tp, initializer_list<_Up>&, _Args...>
+#    if _LIBCPP_STD_VER >= 26
+                            && !reference_constructs_from_temporary_v<_Tp&, _Up>
+#    endif
+                        ,
+                        int> = 0>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Tp& emplace(initializer_list<_Up> __il, _Args&&... __args) {
     reset();
     this->__construct(__il, std::forward<_Args>(__args)...);
@@ -815,11 +922,10 @@ public:
   }
 
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
-  swap(optional& __opt) noexcept(is_nothrow_move_constructible_v<value_type> && is_nothrow_swappable_v<value_type>) {
+  swap(optional& __opt) noexcept(is_nothrow_move_constructible_v<_Tp> && is_nothrow_swappable_v<_Tp>) {
     if (this->has_value() == __opt.has_value()) {
-      using std::swap;
       if (this->has_value())
-        swap(this->__get(), __opt.__get());
+        this->__swap(__opt);
     } else {
       if (this->has_value()) {
         __opt.__construct(std::move(this->__get()));
@@ -831,60 +937,32 @@ public:
     }
   }
 
-#    if _LIBCPP_STD_VER >= 26
-  // [optional.iterators], iterator support
-  _LIBCPP_HIDE_FROM_ABI constexpr iterator begin() noexcept {
-#      ifdef _LIBCPP_ABI_BOUNDED_ITERATORS_IN_OPTIONAL
-    return std::__make_bounded_iter(
-        std::__wrap_iter<__pointer>(std::addressof(this->__get())),
-        std::__wrap_iter<__pointer>(std::addressof(this->__get())),
-        std::__wrap_iter<__pointer>(std::addressof(this->__get()) + (this->has_value() ? 1 : 0)));
-#      else
-    return iterator(std::addressof(this->__get()));
-#      endif
-  }
-
-  _LIBCPP_HIDE_FROM_ABI constexpr const_iterator begin() const noexcept {
-#      ifdef _LIBCPP_ABI_BOUNDED_ITERATORS_IN_OPTIONAL
-    return std::__make_bounded_iter(
-        std::__wrap_iter<__const_pointer>(std::addressof(this->__get())),
-        std::__wrap_iter<__const_pointer>(std::addressof(this->__get())),
-        std::__wrap_iter<__const_pointer>(std::addressof(this->__get()) + (this->has_value() ? 1 : 0)));
-#      else
-    return const_iterator(std::addressof(this->__get()));
-#      endif
-  }
-
-  _LIBCPP_HIDE_FROM_ABI constexpr iterator end() noexcept { return begin() + (this->has_value() ? 1 : 0); }
-  _LIBCPP_HIDE_FROM_ABI constexpr const_iterator end() const noexcept { return begin() + (this->has_value() ? 1 : 0); }
-#    endif
-
-  _LIBCPP_HIDE_FROM_ABI constexpr add_pointer_t<value_type const> operator->() const noexcept {
+  _LIBCPP_HIDE_FROM_ABI constexpr add_pointer_t<_Tp const> operator->() const noexcept {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(this->has_value(), "optional operator-> called on a disengaged value");
     return std::addressof(this->__get());
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr add_pointer_t<value_type> operator->() noexcept {
+  _LIBCPP_HIDE_FROM_ABI constexpr add_pointer_t<_Tp> operator->() noexcept {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(this->has_value(), "optional operator-> called on a disengaged value");
     return std::addressof(this->__get());
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr const value_type& operator*() const& noexcept {
+  _LIBCPP_HIDE_FROM_ABI constexpr const _Tp& operator*() const& noexcept {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(this->has_value(), "optional operator* called on a disengaged value");
     return this->__get();
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr value_type& operator*() & noexcept {
+  _LIBCPP_HIDE_FROM_ABI constexpr _Tp& operator*() & noexcept {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(this->has_value(), "optional operator* called on a disengaged value");
     return this->__get();
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr value_type&& operator*() && noexcept {
+  _LIBCPP_HIDE_FROM_ABI constexpr _Tp&& operator*() && noexcept {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(this->has_value(), "optional operator* called on a disengaged value");
     return std::move(this->__get());
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr const value_type&& operator*() const&& noexcept {
+  _LIBCPP_HIDE_FROM_ABI constexpr const _Tp&& operator*() const&& noexcept {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(this->has_value(), "optional operator* called on a disengaged value");
     return std::move(this->__get());
   }
@@ -894,48 +972,66 @@ public:
   using __base::__get;
   using __base::has_value;
 
-  _LIBCPP_HIDE_FROM_ABI constexpr value_type const& value() const& {
+  _LIBCPP_HIDE_FROM_ABI constexpr _Tp const& value() const& {
     if (!this->has_value())
       std::__throw_bad_optional_access();
     return this->__get();
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr value_type& value() & {
+  _LIBCPP_HIDE_FROM_ABI constexpr _Tp& value() & {
     if (!this->has_value())
       std::__throw_bad_optional_access();
     return this->__get();
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr value_type&& value() && {
+  _LIBCPP_HIDE_FROM_ABI constexpr _Tp&& value() && {
     if (!this->has_value())
       std::__throw_bad_optional_access();
     return std::move(this->__get());
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr value_type const&& value() const&& {
+  _LIBCPP_HIDE_FROM_ABI constexpr _Tp const&& value() const&& {
     if (!this->has_value())
       std::__throw_bad_optional_access();
     return std::move(this->__get());
   }
 
   template <class _Up = remove_cv_t<_Tp>>
-  _LIBCPP_HIDE_FROM_ABI constexpr value_type value_or(_Up&& __v) const& {
-    static_assert(is_copy_constructible_v<value_type>, "optional<T>::value_or: T must be copy constructible");
-    static_assert(is_convertible_v<_Up, value_type>, "optional<T>::value_or: U must be convertible to T");
-    return this->has_value() ? this->__get() : static_cast<value_type>(std::forward<_Up>(__v));
+#    if _LIBCPP_STD_VER >= 26
+    requires(!(is_lvalue_reference_v<_Tp> && is_function_v<__libcpp_remove_reference_t<_Tp>>) &&
+             !(is_lvalue_reference_v<_Tp> && is_array_v<__libcpp_remove_reference_t<_Tp>>))
+#    endif
+  _LIBCPP_HIDE_FROM_ABI constexpr _Tp value_or(_Up&& __v) const& {
+    static_assert(is_copy_constructible_v<_Tp>, "optional<T>::value_or: T must be copy constructible");
+    static_assert(is_convertible_v<_Up, _Tp>, "optional<T>::value_or: U must be convertible to T");
+    return this->has_value() ? this->__get() : static_cast<_Tp>(std::forward<_Up>(__v));
   }
 
   template <class _Up = remove_cv_t<_Tp>>
-  _LIBCPP_HIDE_FROM_ABI constexpr value_type value_or(_Up&& __v) && {
-    static_assert(is_move_constructible_v<value_type>, "optional<T>::value_or: T must be move constructible");
-    static_assert(is_convertible_v<_Up, value_type>, "optional<T>::value_or: U must be convertible to T");
-    return this->has_value() ? std::move(this->__get()) : static_cast<value_type>(std::forward<_Up>(__v));
+#    if _LIBCPP_STD_VER >= 26
+    requires(!is_lvalue_reference_v<_Tp>)
+#    endif
+  _LIBCPP_HIDE_FROM_ABI constexpr _Tp value_or(_Up&& __v) && {
+    static_assert(is_move_constructible_v<_Tp>, "optional<T>::value_or: T must be move constructible");
+    static_assert(is_convertible_v<_Up, _Tp>, "optional<T>::value_or: U must be convertible to T");
+    return this->has_value() ? std::move(this->__get()) : static_cast<_Tp>(std::forward<_Up>(__v));
+  }
+
+#    if _LIBCPP_STD_VER >= 26
+  template <class _Up = remove_cv_t<_Tp>>
+    requires(is_lvalue_reference_v<_Tp> &&
+             !(is_function_v<__libcpp_remove_reference_t<_Tp>> || is_array_v<__libcpp_remove_reference_t<_Tp>>))
+  _LIBCPP_HIDE_FROM_ABI constexpr _Tp value_or(_Up&& __v) && {
+    static_assert(is_move_constructible_v<_Tp>, "optional<T>::value_or: T must be move constructible");
+    static_assert(is_convertible_v<_Up, _Tp>, "optional<T>::value_or: U must be convertible to T");
+    return this->has_value() ? this->__get() : static_cast<_Tp>(std::forward<_Up>(__v));
   }
+#    endif
 
 #    if _LIBCPP_STD_VER >= 23
   template <class _Func>
   _LIBCPP_HIDE_FROM_ABI constexpr auto and_then(_Func&& __f) & {
-    using _Up = invoke_result_t<_Func, value_type&>;
+    using _Up = invoke_result_t<_Func, _Tp&>;
     static_assert(__is_std_optional<remove_cvref_t<_Up>>::value,
                   "Result of f(value()) must be a specialization of std::optional");
     if (*this)
@@ -945,7 +1041,7 @@ public:
 
   template <class _Func>
   _LIBCPP_HIDE_FROM_ABI constexpr auto and_then(_Func&& __f) const& {
-    using _Up = invoke_result_t<_Func, const value_type&>;
+    using _Up = invoke_result_t<_Func, const _Tp&>;
     static_assert(__is_std_optional<remove_cvref_t<_Up>>::value,
                   "Result of f(value()) must be a specialization of std::optional");
     if (*this)
@@ -955,7 +1051,7 @@ public:
 
   template <class _Func>
   _LIBCPP_HIDE_FROM_ABI constexpr auto and_then(_Func&& __f) && {
-    using _Up = invoke_result_t<_Func, value_type&&>;
+    using _Up = invoke_result_t<_Func, _Tp&&>;
     static_assert(__is_std_optional<remove_cvref_t<_Up>>::value,
                   "Result of f(std::move(value())) must be a specialization of std::optional");
     if (*this)
@@ -965,7 +1061,7 @@ public:
 
   template <class _Func>
   _LIBCPP_HIDE_FROM_ABI constexpr auto and_then(_Func&& __f) const&& {
-    using _Up = invoke_result_t<_Func, const value_type&&>;
+    using _Up = invoke_result_t<_Func, const _Tp&&>;
     static_assert(__is_std_optional<remove_cvref_t<_Up>>::value,
                   "Result of f(std::move(value())) must be a specialization of std::optional");
     if (*this)
@@ -975,7 +1071,7 @@ public:
 
   template <class _Func>
   _LIBCPP_HIDE_FROM_ABI constexpr auto transform(_Func&& __f) & {
-    using _Up = remove_cv_t<invoke_result_t<_Func, value_type&>>;
+    using _Up = remove_cv_t<invoke_result_t<_Func, _Tp&>>;
     static_assert(!is_array_v<_Up>, "Result of f(value()) should not be an Array");
     static_assert(!is_same_v<_Up, in_place_t>, "Result of f(value()) should not be std::in_place_t");
     static_assert(!is_same_v<_Up, nullopt_t>, "Result of f(value()) should not be std::nullopt_t");
@@ -987,7 +1083,7 @@ public:
 
   template <class _Func>
   _LIBCPP_HIDE_FROM_ABI constexpr auto transform(_Func&& __f) const& {
-    using _Up = remove_cv_t<invoke_result_t<_Func, const value_type&>>;
+    using _Up = remove_cv_t<invoke_result_t<_Func, const _Tp&>>;
     static_assert(!is_array_v<_Up>, "Result of f(value()) should not be an Array");
     static_assert(!is_same_v<_Up, in_place_t>, "Result of f(value()) should not be std::in_place_t");
     static_assert(!is_same_v<_Up, nullopt_t>, "Result of f(value()) should not be std::nullopt_t");
@@ -999,7 +1095,7 @@ public:
 
   template <class _Func>
   _LIBCPP_HIDE_FROM_ABI constexpr auto transform(_Func&& __f) && {
-    using _Up = remove_cv_t<invoke_result_t<_Func, value_type&&>>;
+    using _Up = remove_cv_t<invoke_result_t<_Func, _Tp&&>>;
     static_assert(!is_array_v<_Up>, "Result of f(std::move(value())) should not be an Array");
     static_assert(!is_same_v<_Up, in_place_t>, "Result of f(std::move(value())) should not be std::in_place_t");
     static_assert(!is_same_v<_Up, nullopt_t>, "Result of f(std::move(value())) should not be std::nullopt_t");
@@ -1011,7 +1107,7 @@ public:
 
   template <class _Func>
   _LIBCPP_HIDE_FROM_ABI constexpr auto transform(_Func&& __f) const&& {
-    using _Up = remove_cvref_t<invoke_result_t<_Func, const value_type&&>>;
+    using _Up = remove_cvref_t<invoke_result_t<_Func, const _Tp&&>>;
     static_assert(!is_array_v<_Up>, "Result of f(std::move(value())) should not be an Array");
     static_assert(!is_same_v<_Up, in_place_t>, "Result of f(std::move(value())) should not be std::in_place_t");
     static_assert(!is_same_v<_Up, nullopt_t>, "Result of f(std::move(value())) should not be std::nullopt_t");
@@ -1023,7 +1119,7 @@ public:
 
   template <invocable _Func>
   _LIBCPP_HIDE_FROM_ABI constexpr optional or_else(_Func&& __f) const&
-    requires is_copy_constructible_v<value_type>
+    requires is_copy_constructible_v<_Tp>
   {
     static_assert(is_same_v<remove_cvref_t<invoke_result_t<_Func>>, optional>,
                   "Result of f() should be the same type as this optional");
@@ -1034,7 +1130,7 @@ public:
 
   template <invocable _Func>
   _LIBCPP_HIDE_FROM_ABI constexpr optional or_else(_Func&& __f) &&
-    requires is_move_constructible_v<value_type>
+    requires is_move_constructible_v<_Tp>
   {
     static_assert(is_same_v<remove_cvref_t<invoke_result_t<_Func>>, optional>,
                   "Result of f() should be the same type as this optional");
@@ -1336,7 +1432,15 @@ swap(optional<_Tp>& __x, optional<_Tp>& __y) noexcept(noexcept(__x.swap(__y))) {
   __x.swap(__y);
 }
 
-template <class _Tp>
+struct __make_optional_barrier_tag {
+  explicit __make_optional_barrier_tag() = default;
+};
+
+template <
+#    if _LIBCPP_STD_VER >= 26
+    __make_optional_barrier_tag = __make_optional_barrier_tag{},
+#    endif
+    class _Tp>
 _LIBCPP_HIDE_FROM_ABI constexpr optional<decay_t<_Tp>> make_optional(_Tp&& __v) {
   return optional<decay_t<_Tp>>(std::forward<_Tp>(__v));
 }
diff --git a/libcxx/include/version b/libcxx/include/version
index b0030602f854a..05532ea731ff3 100644
--- a/libcxx/include/version
+++ b/libcxx/include/version
@@ -187,7 +187,8 @@ __cpp_lib_nonmember_container_access                    201411L <array> <deque>
 __cpp_lib_not_fn                                        202306L <functional>
                                                         201603L // C++17
 __cpp_lib_null_iterators                                201304L <iterator>
-__cpp_lib_optional                                      202110L <optional>
+__cpp_lib_optional                                      202506L <optional>
+                                                        202110L // C++23
                                                         202106L // C++20
                                                         201606L // C++17
 __cpp_lib_optional_range_support                        202406L <optional>
@@ -594,6 +595,8 @@ __cpp_lib_void_t                                        201411L <type_traits>
 # define __cpp_lib_mdspan                               202406L
 # undef  __cpp_lib_not_fn
 # define __cpp_lib_not_fn                               202306L
+# undef  __cpp_lib_optional
+# define __cpp_lib_optional                             202506L
 # define __cpp_lib_optional_range_support               202406L
 # undef  __cpp_lib_out_ptr
 # define __cpp_lib_out_ptr                              202311L
diff --git a/libcxx/modules/std/optional.inc b/libcxx/modules/std/optional.inc
index 9ee51117277ce..88de0bb4db12b 100644
--- a/libcxx/modules/std/optional.inc
+++ b/libcxx/modules/std/optional.inc
@@ -13,8 +13,9 @@ export namespace std {
 #if _LIBCPP_STD_VER >= 26
   // [optional.iterators], iterator support
   namespace ranges {
+    using std::ranges::enable_borrowed_range;
     using std::ranges::enable_view;
-  }
+  } // namespace ranges
 #endif
   // [optional.nullopt], no-value state indicator
   using std::nullopt;
diff --git a/libcxx/test/libcxx/utilities/optional/optional.iterator/iterator.compile.pass.cpp b/libcxx/test/libcxx/utilities/optional/optional.iterator/iterator.compile.pass.cpp
index 3cdd7553e2e5d..b604579e43557 100644
--- a/libcxx/test/libcxx/utilities/optional/optional.iterator/iterator.compile.pass.cpp
+++ b/libcxx/test/libcxx/utilities/optional/optional.iterator/iterator.compile.pass.cpp
@@ -23,8 +23,7 @@ concept has_iterator_aliases = requires {
 
 static_assert(has_iterator_aliases<std::optional<int>>);
 static_assert(has_iterator_aliases<std::optional<const int>>);
-
-// TODO: Uncomment these once P2988R12 is implemented, as they would be testing optional<T&>
-
-// static_assert(!has_iterator_aliases<std::optional<int (&)[]>>);
-// static_assert(!has_iterator_aliases<std::optional<void (&)(int, char)>>);
+static_assert(has_iterator_aliases<std::optional<int&>>);
+static_assert(has_iterator_aliases<std::optional<const int&>>);
+static_assert(!has_iterator_aliases<std::optional<int (&)[1]>>);
+static_assert(!has_iterator_aliases<std::optional<int (&)()>>);
diff --git a/libcxx/test/libcxx/utilities/optional/optional.object/optional.object.observe/value_or.compile.pass.cpp b/libcxx/test/libcxx/utilities/optional/optional.object/optional.object.observe/value_or.compile.pass.cpp
new file mode 100644
index 0000000000000..25df0dd6c1936
--- /dev/null
+++ b/libcxx/test/libcxx/utilities/optional/optional.object/optional.object.observe/value_or.compile.pass.cpp
@@ -0,0 +1,28 @@
+
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// REQUIRES: std-at-least-c++26
+
+// <optional>
+
+// template <class U> T optional<T>::value_or(U&&);
+
+#include <concepts>
+#include <optional>
+
+template <typename Opt, typename T>
+concept has_value_or = requires(Opt opt, T&& t) {
+  { opt.value_or(t) } -> std::same_as<T>;
+};
+
+static_assert(has_value_or<std::optional<int>, int>);
+static_assert(has_value_or<std::optional<int&>, int&>);
+static_assert(has_value_or<std::optional<const int&>, const int&>);
+static_assert(!has_value_or<std::optional<int (&)[1]>&&, int (&)[1]>);
+static_assert(!has_value_or<std::optional<int (&)()>&&, int (&)()>);
diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/optional.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/optional.version.compile.pass.cpp
index aca6290f5a4bf..c4e652979a4e6 100644
--- a/libcxx/test/std/language.support/support.limits/support.limits.general/optional.version.compile.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/support.limits.general/optional.version.compile.pass.cpp
@@ -142,8 +142,8 @@
 #  ifndef __cpp_lib_optional
 #    error "__cpp_lib_optional should be defined in c++26"
 #  endif
-#  if __cpp_lib_optional != 202110L
-#    error "__cpp_lib_optional should have the value 202110L in c++26"
+#  if __cpp_lib_optional != 202506L
+#    error "__cpp_lib_optional should have the value 202506L in c++26"
 #  endif
 
 #  ifndef __cpp_lib_optional_range_support
diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp
index 8189c5c4e5985..996ec29dce697 100644
--- a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp
@@ -7509,8 +7509,8 @@
 #  ifndef __cpp_lib_optional
 #    error "__cpp_lib_optional should be defined in c++26"
 #  endif
-#  if __cpp_lib_optional != 202110L
-#    error "__cpp_lib_optional should have the value 202110L in c++26"
+#  if __cpp_lib_optional != 202506L
+#    error "__cpp_lib_optional should have the value 202506L in c++26"
 #  endif
 
 #  ifndef __cpp_lib_optional_range_support
diff --git a/libcxx/test/std/utilities/optional/optional.iterator/begin.pass.cpp b/libcxx/test/std/utilities/optional/optional.iterator/begin.pass.cpp
index df95a8df3793f..81234525923a1 100644
--- a/libcxx/test/std/utilities/optional/optional.iterator/begin.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.iterator/begin.pass.cpp
@@ -21,7 +21,8 @@
 
 template <typename T>
 constexpr bool test() {
-  std::optional<T> opt{T{}};
+  std::remove_reference_t<T> t = std::remove_reference_t<T>{};
+  std::optional<T> opt{t};
 
   { // begin() is marked noexcept
     static_assert(noexcept(opt.begin()));
@@ -53,6 +54,10 @@ constexpr bool tests() {
   assert(test<char>());
   assert(test<const int>());
   assert(test<const char>());
+  assert(test<int&>());
+  assert(test<char&>());
+  assert(test<const int&>());
+  assert(test<const char&>());
   return true;
 }
 
diff --git a/libcxx/test/std/utilities/optional/optional.iterator/borrowed_range.compile.pass.cpp b/libcxx/test/std/utilities/optional/optional.iterator/borrowed_range.compile.pass.cpp
new file mode 100644
index 0000000000000..a79d1d51a5b11
--- /dev/null
+++ b/libcxx/test/std/utilities/optional/optional.iterator/borrowed_range.compile.pass.cpp
@@ -0,0 +1,34 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// REQUIRES: std-at-least-c++26
+
+// <optional>
+
+// template <class T> class optional<T&>::iterator;
+// template <class T> class optional<T&>::const_iterator;
+// template <class T>
+// constexpr bool ranges::enable_borrowed_range<optional<T&>> = true;
+
+#include <cassert>
+#include <optional>
+#include <ranges>
+
+template <typename T>
+void borrowed_range() {
+  static_assert(std::ranges::enable_borrowed_range<std::optional<T&>>);
+  static_assert(std::ranges::range<std::optional<T&>> == std::ranges::borrowed_range<std::optional<T&>>);
+}
+
+void test_borrowed_range() {
+  borrowed_range<int>();
+  borrowed_range<const int>();
+  borrowed_range<int[]>();
+  borrowed_range<int[10]>();
+  borrowed_range<int()>();
+}
diff --git a/libcxx/test/std/utilities/optional/optional.iterator/end.pass.cpp b/libcxx/test/std/utilities/optional/optional.iterator/end.pass.cpp
index 966c3e7441880..c62c9fc7746d6 100644
--- a/libcxx/test/std/utilities/optional/optional.iterator/end.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.iterator/end.pass.cpp
@@ -17,6 +17,7 @@
 #include <iterator>
 #include <optional>
 #include <ranges>
+#include <type_traits>
 #include <utility>
 
 template <typename T>
@@ -41,7 +42,8 @@ constexpr bool test() {
     assert(it2 == std::as_const(disengaged).end());
   }
 
-  std::optional<T> engaged{T{}};
+  std::remove_reference_t<T> t = std::remove_reference_t<T>{};
+  std::optional<T> engaged{t};
 
   { // end() != begin() if the optional is engaged
     auto it  = engaged.end();
@@ -62,6 +64,10 @@ constexpr bool tests() {
   assert(test<char>());
   assert(test<const int>());
   assert(test<const char>());
+  assert(test<int&>());
+  assert(test<char&>());
+  assert(test<const int&>());
+  assert(test<const char&>());
 
   return true;
 }
diff --git a/libcxx/test/std/utilities/optional/optional.iterator/iterator.pass.cpp b/libcxx/test/std/utilities/optional/optional.iterator/iterator.pass.cpp
index 1203290a0290a..671fac35e732a 100644
--- a/libcxx/test/std/utilities/optional/optional.iterator/iterator.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.iterator/iterator.pass.cpp
@@ -14,15 +14,23 @@
 // template <class T> class optional::const_iterator;
 
 #include <cassert>
-#include <iterator>
 #include <optional>
 #include <ranges>
 #include <type_traits>
 #include <utility>
 
-template <typename T, T __val>
+template <typename T>
+constexpr bool test_range_concept() {
+  return std::ranges::range<std::optional<T>>;
+}
+
+template <typename T, std::remove_reference_t<T> __val>
 constexpr bool test() {
-  std::optional<T> opt{__val};
+  std::remove_reference_t<T> v{__val};
+  std::optional<T> opt{v};
+  {
+    assert(test_range_concept<T>());
+  }
 
   { // Dereferencing an iterator of an engaged optional will return the same value that the optional holds.
     auto it  = opt.begin();
@@ -41,13 +49,14 @@ constexpr bool test() {
     assert(std::random_access_iterator<decltype(it2)>);
   }
 
-  { // const_iterator::value_type == std::remove_cv_t<T>, const_iterator::reference == const T&, iterator::value_type = std::remove_cv_t<T>, iterator::reference == T&
+  { // const_iterator::value_type == std::remove_cvref_t<T>, const_iterator::reference == const T&, iterator::value_type = std::remove_cvref_t<T>, iterator::reference == T&
+    // std::remove_cv_t is impossible for optional<T&>
     auto it  = opt.begin();
     auto it2 = std::as_const(opt).begin();
-    assert((std::is_same_v<typename decltype(it)::value_type, std::remove_cv_t<T>>));
-    assert((std::is_same_v<typename decltype(it)::reference, T&>));
-    assert((std::is_same_v<typename decltype(it2)::value_type, std::remove_cv_t<T>>));
-    assert((std::is_same_v<typename decltype(it2)::reference, const T&>));
+    assert((std::is_same_v<typename decltype(it)::value_type, std::remove_cvref_t<T>>));
+    assert((std::is_same_v<typename decltype(it)::reference, std::remove_reference_t<T>&>));
+    assert((std::is_same_v<typename decltype(it2)::value_type, std::remove_cvref_t<T>>));
+    assert((std::is_same_v<typename decltype(it2)::reference, const std::remove_reference_t<T>&>));
   }
 
   { // std::ranges::size for an engaged optional<T> == 1, disengaged optional<T> == 0
@@ -68,13 +77,13 @@ constexpr bool test() {
   // An optional with value that is reset will have a begin() == end(), then when it is reassigned a value,
   // begin() != end(), and *begin() will contain the new value.
   {
-    std::optional<T> val{__val};
+    std::optional<T> val{v};
     assert(val.begin() != val.end());
     val.reset();
     assert(val.begin() == val.end());
-    val.emplace(__val);
+    val.emplace(v);
     assert(val.begin() != val.end());
-    assert(*(val.begin()) == __val);
+    assert(*(val.begin()) == v);
   }
 
   return true;
@@ -86,6 +95,15 @@ constexpr bool tests() {
   assert((test<bool, true>()));
   assert((test<const int, 2>()));
   assert((test<const char, 'b'>()));
+  assert((test<int&, 1>()));
+  assert((test<char&, 'a'>()));
+  assert((test<bool&, true>()));
+  assert((test<const int&, 2>()));
+  assert((test<const char&, 'b'>()));
+
+  assert(!test_range_concept<int (&)()>());
+  assert(!test_range_concept<int (&)[]>());
+  assert(!test_range_concept<int (&)[42]>());
 
   return true;
 }
diff --git a/libcxx/test/std/utilities/optional/optional.monadic/and_then.pass.cpp b/libcxx/test/std/utilities/optional/optional.monadic/and_then.pass.cpp
index 97305d976e066..133eed4a606bb 100644
--- a/libcxx/test/std/utilities/optional/optional.monadic/and_then.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.monadic/and_then.pass.cpp
@@ -16,6 +16,7 @@
 // template<class F> constexpr auto and_then(F&&) const&&;
 
 #include <cassert>
+#include <concepts>
 #include <optional>
 
 #include "test_macros.h"
@@ -257,8 +258,94 @@ constexpr bool test() {
   return true;
 }
 
+#if TEST_STD_VER >= 26
+constexpr bool test_ref() {
+  // Test & overload
+  {
+    // Without & qualifier on F's operator()
+    {
+      int j = 42;
+      std::optional<int&> i{j};
+      std::same_as<std::optional<int>> decltype(auto) r = i.and_then(LVal{});
+
+      assert(r == 1);
+      assert(i.and_then(NOLVal{}) == std::nullopt);
+    }
+
+    //With & qualifier on F's operator()
+    {
+      int j = 42;
+      std::optional<int&> i{j};
+      RefQual l{};
+      NORefQual nl{};
+      std::same_as<std::optional<int>> decltype(auto) r = i.and_then(l);
+
+      assert(r == 1);
+      assert(i.and_then(nl) == std::nullopt);
+    }
+  }
+
+  // Test const& overload
+  {
+    // Without & qualifier on F's operator()
+    {
+      int j = 42;
+      std::optional<const int&> i{j};
+      std::same_as<std::optional<int>> decltype(auto) r = i.and_then(CLVal{});
+
+      assert(r == 1);
+      assert(i.and_then(NOCLVal{}) == std::nullopt);
+    }
+
+    //With & qualifier on F's operator()
+    {
+      int j = 42;
+      const std::optional<int&> i{j};
+      const CRefQual l{};
+      const NOCRefQual nl{};
+      std::same_as<std::optional<int>> decltype(auto) r = i.and_then(l);
+
+      assert(r == 1);
+      assert(i.and_then(nl) == std::nullopt);
+    }
+  }
+  // Test && overload
+  {
+    //With & qualifier on F's operator()
+    {
+      int j = 42;
+      std::optional<int&> i{j};
+      std::same_as<std::optional<int>> decltype(auto) r = i.and_then(RVRefQual{});
+
+      assert(r == 1);
+      assert(i.and_then(NORVRefQual{}) == std::nullopt);
+    }
+  }
+
+  // Test const&& overload
+  {
+    //With & qualifier on F's operator()
+    {
+      int j = 42;
+      const std::optional<int&> i{j};
+      const RVCRefQual l{};
+      const NORVCRefQual nl{};
+      std::same_as<std::optional<int>> decltype(auto) r = i.and_then(std::move(l));
+
+      assert(r == 1);
+      assert(i.and_then(std::move(nl)) == std::nullopt);
+    }
+  }
+  return true;
+}
+#endif
+
 int main(int, char**) {
   test();
   static_assert(test());
+#if TEST_STD_VER >= 26
+  test_ref();
+  static_assert(test_ref());
+#endif
   return 0;
 }
diff --git a/libcxx/test/std/utilities/optional/optional.monadic/or_else.pass.cpp b/libcxx/test/std/utilities/optional/optional.monadic/or_else.pass.cpp
index ccc94ab9be2cb..de0a67c1579ee 100644
--- a/libcxx/test/std/utilities/optional/optional.monadic/or_else.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.monadic/or_else.pass.cpp
@@ -62,6 +62,32 @@ constexpr bool test() {
       return std::optional<MoveOnly>{};
     });
   }
+#if TEST_STD_VER >= 26
+  {
+    int i = 2;
+    std::optional<int&> opt;
+    assert(opt.or_else([&] { return std::optional<int&>{i}; }) == i);
+    int j = 3;
+    opt   = j;
+    opt.or_else([] {
+      assert(false);
+      return std::optional<int&>{};
+    });
+    assert(opt == j);
+  }
+  {
+    int i = 2;
+    std::optional<int&> opt;
+    assert(std::move(opt).or_else([&] { return std::optional<int&>{i}; }) == i);
+    int j = 3;
+    opt   = j;
+    std::move(opt).or_else([] {
+      assert(false);
+      return std::optional<int&>{};
+    });
+    assert(opt == j);
+  }
+#endif
 
   return true;
 }
diff --git a/libcxx/test/std/utilities/optional/optional.monadic/transform.pass.cpp b/libcxx/test/std/utilities/optional/optional.monadic/transform.pass.cpp
index 0a151517b101c..ad2713f2ac5b8 100644
--- a/libcxx/test/std/utilities/optional/optional.monadic/transform.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.monadic/transform.pass.cpp
@@ -17,62 +17,64 @@
 
 #include "test_macros.h"
 #include <cassert>
+#include <concepts>
 #include <optional>
 #include <type_traits>
+#include <utility>
 
 struct LVal {
   constexpr int operator()(int&) { return 1; }
-  int operator()(const int&) = delete;
-  int operator()(int&&) = delete;
+  int operator()(const int&)  = delete;
+  int operator()(int&&)       = delete;
   int operator()(const int&&) = delete;
 };
 
 struct CLVal {
   int operator()(int&) = delete;
   constexpr int operator()(const int&) { return 1; }
-  int operator()(int&&) = delete;
+  int operator()(int&&)       = delete;
   int operator()(const int&&) = delete;
 };
 
 struct RVal {
-  int operator()(int&) = delete;
+  int operator()(int&)       = delete;
   int operator()(const int&) = delete;
   constexpr int operator()(int&&) { return 1; }
   int operator()(const int&&) = delete;
 };
 
 struct CRVal {
-  int operator()(int&) = delete;
+  int operator()(int&)       = delete;
   int operator()(const int&) = delete;
-  int operator()(int&&) = delete;
+  int operator()(int&&)      = delete;
   constexpr int operator()(const int&&) { return 1; }
 };
 
 struct RefQual {
   constexpr int operator()(int) & { return 1; }
-  int operator()(int) const& = delete;
-  int operator()(int) && = delete;
+  int operator()(int) const&  = delete;
+  int operator()(int) &&      = delete;
   int operator()(int) const&& = delete;
 };
 
 struct CRefQual {
   int operator()(int) & = delete;
   constexpr int operator()(int) const& { return 1; }
-  int operator()(int) && = delete;
+  int operator()(int) &&      = delete;
   int operator()(int) const&& = delete;
 };
 
 struct RVRefQual {
-  int operator()(int) & = delete;
+  int operator()(int) &      = delete;
   int operator()(int) const& = delete;
   constexpr int operator()(int) && { return 1; }
   int operator()(int) const&& = delete;
 };
 
 struct RVCRefQual {
-  int operator()(int) & = delete;
+  int operator()(int) &      = delete;
   int operator()(int) const& = delete;
-  int operator()(int) && = delete;
+  int operator()(int) &&     = delete;
   constexpr int operator()(int) const&& { return 1; }
 };
 
@@ -83,7 +85,7 @@ struct NoCopy {
 };
 
 struct NoMove {
-  NoMove() = default;
+  NoMove()         = default;
   NoMove(NoMove&&) = delete;
   NoMove operator()(const NoCopy&&) { return NoMove{}; }
 };
@@ -200,8 +202,111 @@ constexpr bool test() {
   return true;
 }
 
+#if TEST_STD_VER >= 26
+constexpr bool test_ref() {
+  {
+    std::optional<int&> opt1;
+    std::same_as<std::optional<int>> decltype(auto) opt1r = opt1.transform([](int i) { return i + 2; });
+    assert(!opt1);
+    assert(!opt1r);
+  }
+
+  {
+    int i = 42;
+    std::optional<int&> opt{i};
+    std::same_as<std::optional<int>> decltype(auto) o2 = opt.transform([](int j) { return j + 2; });
+
+    assert(*o2 == 44);
+  }
+  // Test & overload
+  {
+    // Without & qualifier on F's operator()
+    {
+      int i = 42;
+      std::optional<int&> opt{i};
+      std::same_as<std::optional<int>> decltype(auto) o3 = opt.transform(LVal{});
+
+      assert(*o3 == 1);
+    }
+
+    //With & qualifier on F's operator()
+    {
+      int i = 42;
+      std::optional<int&> opt{i};
+      RefQual l{};
+      std::same_as<std::optional<int>> decltype(auto) o3 = opt.transform(l);
+
+      assert(*o3 == 1);
+    }
+  }
+  // const& overload
+  {
+    // Without & qualifier on F's operator()
+    {
+      int i = 42;
+      std::optional<const int&> opt{i};
+      std::same_as<std::optional<int>> decltype(auto) o3 = std::as_const(opt).transform(CLVal{});
+
+      assert(*o3 == 1);
+    }
+
+    //With & qualifier on F's operator()
+    {
+      int i = 42;
+      const std::optional<int&> opt{i};
+      const CRefQual l{};
+      std::same_as<std::optional<int>> decltype(auto) o3 = opt.transform(l);
+
+      assert(*o3 == 1);
+    }
+  }
+
+  // Test && overload
+  {
+    // Without & qualifier on F's operator()
+    {
+      int i = 42;
+      std::optional<int> opt{i};
+      std::same_as<std::optional<int>> decltype(auto) o3 = std::move(opt).transform(RVal{});
+
+      assert(*o3 == 1);
+    }
+
+    //With & qualifier on F's operator()
+    {
+      int i = 42;
+      std::optional<int&> opt{i};
+      std::same_as<std::optional<int>> decltype(auto) o3 = std::move(opt).transform(RVRefQual{});
+      assert(*o3 == 1);
+    }
+  }
+
+  // const&& overload
+  {
+    //With & qualifier on F's operator()
+    {
+      int i = 42;
+      std::optional<int&> opt{i};
+      const RVCRefQual rvc{};
+      std::same_as<std::optional<int>> decltype(auto) o3 = opt.transform(std::move(rvc));
+      assert(*o3 == 1);
+    }
+  }
+  {
+    std::optional<int&> o6 = std::nullopt;
+    auto o6r               = o6.transform([](int) { return 42; });
+    assert(!o6r);
+  }
+  return true;
+}
+#endif
+
 int main(int, char**) {
   test();
   static_assert(test());
+#if TEST_STD_VER >= 26
+  test_ref();
+  static_assert(test_ref());
+#endif
   return 0;
 }
diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.assign/assign_value.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.assign/assign_value.pass.cpp
index eaca111b72dca..ddb9ffc4bf80c 100644
--- a/libcxx/test/std/utilities/optional/optional.object/optional.object.assign/assign_value.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.assign/assign_value.pass.cpp
@@ -250,6 +250,57 @@ constexpr T pr38638(T v)
   return *o + 2;
 }
 
+#if TEST_STD_VER >= 26
+
+template <typename T, std::remove_reference_t<T> _Val>
+constexpr void test_with_ref() {
+  T t{_Val};
+  { // to empty
+    optional<T&> opt;
+    opt = t;
+    assert(static_cast<bool>(opt) == true);
+    assert(*opt == t);
+  }
+  { // to existing
+    optional<T&> opt{t};
+    opt = t;
+    assert(static_cast<bool>(opt) == true);
+    assert(*opt == t);
+  }
+  { // test default argument
+    optional<T&> opt;
+    opt = {t};
+    assert(static_cast<bool>(opt) == true);
+    assert(*opt == t);
+  }
+  { // test default argument
+    optional<T&> opt{t};
+    opt = {};
+    assert(static_cast<bool>(opt) == false);
+  }
+  // test two objects, make sure that the optional only changes what it holds a reference to
+  {
+    T t2{_Val};
+    optional<T&> opt{t};
+    opt = t2;
+
+    assert(std::addressof(*opt) != std::addressof(t));
+    assert(std::addressof(*opt) == std::addressof(t2));
+  }
+  // test that reassigning the reference for an optional<T&> doesn't affect the objet it's holding a reference to
+  {
+    int i = -1;
+    int j = 2;
+    optional<int&> opt{i};
+    opt = j;
+
+    assert(i == -1);
+    assert(std::addressof(*opt) != std::addressof(i));
+    assert(std::addressof(*opt) == std::addressof(j));
+    assert(*opt == 2);
+  }
+}
+#endif
 
 int main(int, char**)
 {
@@ -281,5 +332,8 @@ int main(int, char**)
 
     static_assert(pr38638(3) == 5, "");
 
-  return 0;
+#if TEST_STD_VER >= 26
+    test_with_ref<int, 3>();
+#endif
+    return 0;
 }
diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.assign/emplace.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.assign/emplace.pass.cpp
index 245d8ff3d2146..629e315add4d9 100644
--- a/libcxx/test/std/utilities/optional/optional.object/optional.object.assign/emplace.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.assign/emplace.pass.cpp
@@ -221,6 +221,24 @@ TEST_CONSTEXPR_CXX20 bool test_empty_emplace() {
   return true;
 }
 
+#if TEST_STD_VER >= 26
+template <class T, std::remove_reference_t<T> _Val>
+constexpr bool test_ref() {
+  using Opt = std::optional<T&>;
+  T t{_Val};
+  {
+    Opt opt;
+    auto& v = opt.emplace(t);
+    static_assert(std::is_same_v<T&, decltype(v)>);
+    assert(static_cast<bool>(opt) == true);
+    assert(*opt == t);
+    assert(&v == &*opt);
+    assert(&t == &*opt);
+  }
+  return true;
+}
+#endif
+
 int main(int, char**)
 {
     {
@@ -291,6 +309,11 @@ int main(int, char**)
         }
     }
 #endif
-
-  return 0;
+#if TEST_STD_VER >= 26
+    static_assert(test_ref<int, 1>());
+    static_assert(test_ref<double, 15.0>());
+    assert((test_ref<int, 1>()));
+    assert((test_ref<double, 15.0>()));
+#endif
+    return 0;
 }
diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/ctor.verify.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/ctor.verify.cpp
index 775d2bde7d13d..c5281783d4350 100644
--- a/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/ctor.verify.cpp
+++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/ctor.verify.cpp
@@ -23,18 +23,26 @@ struct NonDestructible { ~NonDestructible() = delete; };
 
 int main(int, char**)
 {
-    {
-    std::optional<char &> o1;          // expected-error-re@optional:* {{static assertion failed{{.*}}instantiation of optional with a reference type is ill-formed}}
-    std::optional<NonDestructible> o2; // expected-error-re@optional:* {{static assertion failed{{.*}}instantiation of optional with a non-destructible type is ill-formed}}
-    std::optional<char[20]> o3;        // expected-error-re@optional:* {{static assertion failed{{.*}}instantiation of optional with an array type is ill-formed}}
-    }
-
-    {
+  {
+#if TEST_STD_VER >= 26
+    std::optional<int&&>
+        opt2; // expected-error-re@optional:* {{static assertion failed{{.*}}instantiation of optional with an rvalue reference type is ill-formed}}
+#else
+    std::optional<char&>
+        o1; // expected-error-re@optional:* {{static assertion failed{{.*}}instantiation of optional with a reference type is ill-formed}}
+#endif
+    std::optional<NonDestructible>
+        o2; // expected-error-re@optional:* {{static assertion failed{{.*}}instantiation of optional with a non-destructible type is ill-formed}}
+    std::optional<char[20]>
+        o3; // expected-error-re@optional:* {{static assertion failed{{.*}}instantiation of optional with an array type is ill-formed}}
+  }
+
+  {
     std::optional<               std::in_place_t> o1; // expected-error-re@optional:* {{static assertion failed{{.*}}instantiation of optional with in_place_t is ill-formed}}
     std::optional<const          std::in_place_t> o2; // expected-error-re@optional:* {{static assertion failed{{.*}}instantiation of optional with in_place_t is ill-formed}}
     std::optional<      volatile std::in_place_t> o3; // expected-error-re@optional:* {{static assertion failed{{.*}}instantiation of optional with in_place_t is ill-formed}}
     std::optional<const volatile std::in_place_t> o4; // expected-error-re@optional:* {{static assertion failed{{.*}}instantiation of optional with in_place_t is ill-formed}}
-    }
+  }
 
     {
     std::optional<               std::nullopt_t> o1; // expected-error-re@optional:* {{static assertion failed{{.*}}instantiation of optional with nullopt_t is ill-formed}}
diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/move.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/move.pass.cpp
index f856c1d41d05a..f59fc3b82ad7f 100644
--- a/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/move.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/move.pass.cpp
@@ -78,71 +78,71 @@ void test_ref(InitArgs&&... args)
         assert(&(*lhs) == &(*rhs));
 }
 
-void test_reference_extension()
-{
-#if defined(_LIBCPP_VERSION) && 0 // FIXME these extensions are currently disabled.
-    using T = TestTypes::TestType;
-    T::reset();
-    {
-        T t;
-        T::reset_constructors();
-        test_ref<T&>();
-        test_ref<T&>(t);
-        assert(T::alive == 1);
-        assert(T::constructed == 0);
-        assert(T::assigned == 0);
-        assert(T::destroyed == 0);
-    }
-    assert(T::destroyed == 1);
-    assert(T::alive == 0);
-    {
-        T t;
-        const T& ct = t;
-        T::reset_constructors();
-        test_ref<T const&>();
-        test_ref<T const&>(t);
-        test_ref<T const&>(ct);
-        assert(T::alive == 1);
-        assert(T::constructed == 0);
-        assert(T::assigned == 0);
-        assert(T::destroyed == 0);
-    }
-    assert(T::alive == 0);
-    assert(T::destroyed == 1);
-    {
-        T t;
-        T::reset_constructors();
-        test_ref<T&&>();
-        test_ref<T&&>(std::move(t));
-        assert(T::alive == 1);
-        assert(T::constructed == 0);
-        assert(T::assigned == 0);
-        assert(T::destroyed == 0);
-    }
-    assert(T::alive == 0);
-    assert(T::destroyed == 1);
-    {
-        T t;
-        const T& ct = t;
-        T::reset_constructors();
-        test_ref<T const&&>();
-        test_ref<T const&&>(std::move(t));
-        test_ref<T const&&>(std::move(ct));
-        assert(T::alive == 1);
-        assert(T::constructed == 0);
-        assert(T::assigned == 0);
-        assert(T::destroyed == 0);
-    }
-    assert(T::alive == 0);
-    assert(T::destroyed == 1);
-    {
-        static_assert(!std::is_copy_constructible<std::optional<T&&>>::value, "");
-        static_assert(!std::is_copy_constructible<std::optional<T const&&>>::value, "");
-    }
+void test_reference_extension() {
+#if TEST_STD_VER >= 26
+  using T = TestTypes::TestType;
+  T::reset();
+  {
+    T t;
+    T::reset_constructors();
+    test_ref<T&>();
+    test_ref<T&>(t);
+    assert(T::alive == 1);
+    assert(T::constructed == 0);
+    assert(T::assigned == 0);
+    assert(T::destroyed == 0);
+  }
+  assert(T::destroyed == 1);
+  assert(T::alive == 0);
+  {
+    T t;
+    const T& ct = t;
+    T::reset_constructors();
+    test_ref<T const&>();
+    test_ref<T const&>(t);
+    test_ref<T const&>(ct);
+    assert(T::alive == 1);
+    assert(T::constructed == 0);
+    assert(T::assigned == 0);
+    assert(T::destroyed == 0);
+  }
+  assert(T::alive == 0);
+  assert(T::destroyed == 1);
+#  if 0 // FIXME: optional<T&&> is not allowed.
+  {
+    T t;
+    T::reset_constructors();
+    test_ref<T&&>();
+    test_ref<T&&>(std::move(t));
+    assert(T::alive == 1);
+    assert(T::constructed == 0);
+    assert(T::assigned == 0);
+    assert(T::destroyed == 0);
+  }
+  assert(T::alive == 0);
+  assert(T::destroyed == 1);
+  {
+    T t;
+    const T& ct = t;
+    T::reset_constructors();
+    test_ref<T const&&>();
+    test_ref<T const&&>(std::move(t));
+    test_ref<T const&&>(std::move(ct));
+    assert(T::alive == 1);
+    assert(T::constructed == 0);
+    assert(T::assigned == 0);
+    assert(T::destroyed == 0);
+  }
+  assert(T::alive == 0);
+  assert(T::destroyed == 1);
+  {
+    static_assert(!std::is_copy_constructible_v<std::optional<T&&>>);
+    static_assert(!std::is_copy_constructible_v<std::optional<T const&&>>);
+  }
+#  endif
 #endif
 }
 
-
 int main(int, char**)
 {
     test<int>();
diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/ref_constructs_from_temporary.verify.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/ref_constructs_from_temporary.verify.cpp
new file mode 100644
index 0000000000000..01b241ffbe79b
--- /dev/null
+++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/ref_constructs_from_temporary.verify.cpp
@@ -0,0 +1,35 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// REQUIRES: std-at-least-c++26
+
+// optional
+
+#include <optional>
+#include <utility>
+
+struct X {
+  int i;
+
+  X(int j) : i(j) {}
+};
+
+int main(int, char**) {
+  const std::optional<int> _co(1);
+  std::optional<int> _o(1);
+
+  // expected-error-re@*:* 8 {{call to deleted constructor of 'std::optional<{{.*}}>'}}
+  std::optional<const int&> o1{1};                     // optional(U&&)
+  std::optional<const int&> o2{std::optional<int>(1)}; // optional(optional<U>&&)
+  std::optional<const int&> o3{_co};                   // optional(const optional<U>&)
+  std::optional<const int&> o4{_o};                    // optional(optional<U>&)
+  std::optional<const X&> o5{1};                       // optional(U&&)
+  std::optional<const X&> o6{std::optional<int>(1)};   // optional(optional<U>&&)
+  std::optional<const X&> o7{_co};                     // optional(const optional<U>&)
+  std::optional<const X&> o8{_o};                      // optional(optional<U>&)
+}
diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/ref_t.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/ref_t.pass.cpp
new file mode 100644
index 0000000000000..57552743af138
--- /dev/null
+++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/ref_t.pass.cpp
@@ -0,0 +1,75 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// REQUIRES: std-at-least-c++26
+
+// <optional>
+
+#include <cassert>
+#include <optional>
+#include <type_traits>
+#include <utility>
+
+template <typename RefType, std::remove_reference_t<RefType> _Val>
+constexpr bool test() {
+  std::remove_reference_t<RefType> item{_Val};
+  std::optional<RefType> opt{item};
+
+  {
+    assert(*opt == item);
+    assert(&(*opt) == &item);
+  }
+  {
+    assert(*std::as_const(opt) == item);
+    assert(&(*std::as_const(opt)) == &item);
+  }
+
+  return true;
+}
+
+template <typename T>
+constexpr T foo(T val) {
+  return val;
+}
+
+template <typename T, T _Val>
+constexpr bool fn_ref_test() {
+  std::optional<T (&)(T)> opt{foo<T>};
+  assert(opt.has_value());
+  assert((*opt)(_Val) == _Val);
+
+  return true;
+}
+
+template <typename T, T _Val>
+constexpr bool array_ref_test() {
+  T arr[5]{};
+  std::optional<T(&)[5]> opt{arr};
+
+  assert(opt.has_value());
+  (*opt)[0] = _Val;
+  assert((*opt)[0] == _Val);
+  assert(arr[0] == _Val);
+
+  return true;
+}
+
+constexpr bool tests() {
+  assert((test<int&, 1>()));
+  assert((test<double&, 1.0>()));
+  assert((fn_ref_test<int, 1>()));
+  assert((array_ref_test<int, 1>()));
+  assert((fn_ref_test<double, 1.0>()));
+  assert((array_ref_test<double, 1.0>()));
+  return true;
+}
+
+int main(int, char**) {
+  static_assert(tests());
+  tests();
+}
diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.dtor/dtor.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.dtor/dtor.pass.cpp
index c0044276ea9ad..1202879036f56 100644
--- a/libcxx/test/std/utilities/optional/optional.object/optional.object.dtor/dtor.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.dtor/dtor.pass.cpp
@@ -11,9 +11,9 @@
 
 // ~optional();
 
+#include <cassert>
 #include <optional>
 #include <type_traits>
-#include <cassert>
 
 #include "test_macros.h"
 
@@ -64,6 +64,24 @@ int main(int, char**)
         }
         assert(X::dtor_called == true);
     }
+#if TEST_STD_VER >= 26
+    {
+      typedef X& T;
+      static_assert(std::is_trivially_destructible_v<T>);
+      static_assert(std::is_trivially_destructible_v<optional<T>>);
+    }
+    X::dtor_called = false;
+    X x;
+    {
+      optional<X&> opt{x};
+      assert(X::dtor_called == false);
+    }
+    assert(X::dtor_called == false);
 
-  return 0;
+    {
+      static_assert(std::is_trivially_destructible_v<X (&)()>);
+      static_assert(std::is_trivially_destructible_v<optional<X (&)()>>);
+    }
+#endif
+    return 0;
 }
diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.mod/reset.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.mod/reset.pass.cpp
index 7029b37cbecd7..e23e481f6a05d 100644
--- a/libcxx/test/std/utilities/optional/optional.object/optional.object.mod/reset.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.mod/reset.pass.cpp
@@ -69,5 +69,16 @@ int main(int, char**)
         X::dtor_called = false;
     }
 
-  return 0;
+#if TEST_STD_VER >= 26
+    {
+      X x{};
+      optional<X&> opt(x);
+      X::dtor_called = false;
+      opt.reset();
+      assert(X::dtor_called == false);
+      assert(static_cast<bool>(opt) == false);
+    }
+#endif
+
+    return 0;
 }
diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/dereference.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/dereference.pass.cpp
index 49b4d21a28066..6c1bf8aa15a8d 100644
--- a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/dereference.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/dereference.pass.cpp
@@ -50,7 +50,19 @@ int main(int, char**)
         optional<X> opt(X{});
         assert((*opt).test() == 4);
     }
+#if TEST_STD_VER >= 26
+    {
+      X x{};
+      optional<X&> opt(x);
+      ASSERT_SAME_TYPE(decltype(*opt), X&);
+      ASSERT_NOEXCEPT(*opt);
+    }
+    {
+      X x{};
+      optional<X&> opt(x);
+      assert((*opt).test() == 4);
+    }
+#endif
     static_assert(test() == 7, "");
-
     return 0;
 }
diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/dereference_const.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/dereference_const.pass.cpp
index ff86d9534faf6..c15d4e4af74cc 100644
--- a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/dereference_const.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/dereference_const.pass.cpp
@@ -43,6 +43,25 @@ int main(int, char**)
         constexpr optional<X> opt(X{});
         static_assert((*opt).test() == 3, "");
     }
+#if TEST_STD_VER >= 26
+    {
+      X x{};
+      const optional<X&> opt{x};
+      ASSERT_SAME_TYPE(decltype(*opt), X&);
+      ASSERT_NOEXCEPT(*opt);
+    }
+    {
+      X x{};
+      const optional<const X&> opt{x};
+      ASSERT_SAME_TYPE(decltype(*opt), const X&);
+      ASSERT_NOEXCEPT(*opt);
+    }
+    {
+      static constexpr X x{};
+      constexpr optional<const X&> opt(x);
+      static_assert((*opt).test() == 3);
+    }
+#endif
     {
         constexpr optional<Y> opt(Y{});
         assert((*opt).test() == 2);
diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/has_value.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/has_value.pass.cpp
index 6998e023022c5..9873a767cfbe6 100644
--- a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/has_value.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/has_value.pass.cpp
@@ -33,6 +33,13 @@ int main(int, char**)
         constexpr optional<int> opt(0);
         static_assert(opt.has_value(), "");
     }
+#if TEST_STD_VER >= 26
+    {
+      static constexpr int i = 0;
+      constexpr optional<const int&> opt{i};
+      static_assert(opt.has_value());
+    }
+#endif
 
-  return 0;
+    return 0;
 }
diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/op_arrow.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/op_arrow.pass.cpp
index 2b5fba546ef42..96d22743ac7fe 100644
--- a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/op_arrow.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/op_arrow.pass.cpp
@@ -19,9 +19,9 @@
 
 using std::optional;
 
-struct X
-{
-    int test() noexcept {return 3;}
+struct X {
+  int test() noexcept { return 3; }
+  int test() const noexcept { return 3; }
 };
 
 struct Y
@@ -47,6 +47,30 @@ int main(int, char**)
         optional<X> opt(X{});
         assert(opt->test() == 3);
     }
+#if TEST_STD_VER >= 26
+    {
+      X x{};
+      std::optional<X&> opt(x);
+      ASSERT_SAME_TYPE(decltype(opt.operator->()), X*);
+      ASSERT_NOEXCEPT(opt.operator->());
+    }
+    {
+      X x{};
+      std::optional<const X&> opt(x);
+      ASSERT_SAME_TYPE(decltype(opt.operator->()), const X*);
+      ASSERT_NOEXCEPT(opt.operator->());
+    }
+    {
+      X x{};
+      optional<X&> opt{x};
+      assert(opt->test() == 3);
+    }
+    {
+      X x{};
+      optional<const X&> opt{x};
+      assert(opt->test() == 3);
+    }
+#endif
     {
         static_assert(test() == 3, "");
     }
diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/op_arrow_const.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/op_arrow_const.pass.cpp
index d8ce932bd7810..e9694fd6d9640 100644
--- a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/op_arrow_const.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/op_arrow_const.pass.cpp
@@ -54,6 +54,25 @@ int main(int, char**)
         constexpr optional<Z> opt(Z{});
         static_assert(opt->test() == 1, "");
     }
+#if TEST_STD_VER >= 26
+    {
+      X x{};
+      const std::optional<X&> opt(x);
+      ASSERT_SAME_TYPE(decltype(opt.operator->()), X*);
+      ASSERT_NOEXCEPT(opt.operator->());
+    }
+    {
+      X x{};
+      const std::optional<const X&> opt(x);
+      ASSERT_SAME_TYPE(decltype(opt.operator->()), const X*);
+      ASSERT_NOEXCEPT(opt.operator->());
+    }
+    {
+      static constexpr Z z{};
+      constexpr optional<const Z&> opt(z);
+      static_assert(opt->test() == 1);
+    }
+#endif
 
     return 0;
 }
diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value.pass.cpp
index 781784c6806a4..22b74f5512d53 100644
--- a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value.pass.cpp
@@ -56,6 +56,14 @@ int main(int, char**)
         opt.emplace();
         assert(opt.value().test() == 4);
     }
+#if TEST_STD_VER >= 26
+    {
+      X x;
+      optional<X&> opt{x};
+      ASSERT_NOT_NOEXCEPT(opt.value());
+      ASSERT_SAME_TYPE(decltype(opt.value()), X&);
+    }
+#endif
 #ifndef TEST_HAS_NO_EXCEPTIONS
     {
         optional<X> opt;
diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_or.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_or.pass.cpp
index 8c063ae1a799c..66890ff9c9b91 100644
--- a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_or.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_or.pass.cpp
@@ -80,6 +80,14 @@ constexpr int test()
       assert((std::move(opt).value_or({2, 3}) == Z{2, 3}));
       assert(!opt);
     }
+#if TEST_STD_VER >= 26
+    {
+      int y = 2;
+      optional<int&> opt;
+      assert(std::move(opt).value_or(y) == 2);
+      assert(!opt);
+    }
+#endif
     return 0;
 }
 
diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_or_const.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_or_const.pass.cpp
index ec42890a3b995..6bd308b405605 100644
--- a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_or_const.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_or_const.pass.cpp
@@ -79,6 +79,12 @@ int main(int, char**)
       const optional<X> opt;
       assert(opt.value_or({Y(3)}) == 4);
     }
-
-  return 0;
+#if TEST_STD_VER >= 26
+    {
+      X y{3};
+      const optional<X&> opt;
+      assert(opt.value_or(y) == 3);
+    }
+#endif
+    return 0;
 }
diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.swap/swap.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.swap/swap.pass.cpp
index e3a2fdb8b0020..a82ca615e0c8c 100644
--- a/libcxx/test/std/utilities/optional/optional.object/optional.object.swap/swap.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.swap/swap.pass.cpp
@@ -13,9 +13,10 @@
 //     noexcept(is_nothrow_move_constructible<T>::value &&
 //              is_nothrow_swappable<T>::value)
 
+#include <cassert>
+#include <memory>
 #include <optional>
 #include <type_traits>
-#include <cassert>
 
 #include "test_macros.h"
 #include "archetypes.h"
@@ -127,6 +128,74 @@ TEST_CONSTEXPR_CXX20 bool check_swap()
     return true;
 }
 
+#if TEST_STD_VER >= 26
+template <typename T>
+constexpr bool check_swap_ref() {
+  {
+    optional<T&> opt1;
+    optional<T&> opt2;
+    static_assert(noexcept(opt1.swap(opt2)) == true);
+    assert(static_cast<bool>(opt1) == false);
+    assert(static_cast<bool>(opt2) == false);
+    opt1.swap(opt2);
+    assert(static_cast<bool>(opt1) == false);
+    assert(static_cast<bool>(opt2) == false);
+  }
+
+  {
+    T one{1};
+    optional<T&> opt1(one);
+    optional<T&> opt2;
+    static_assert(noexcept(opt1.swap(opt2)) == true);
+    assert(static_cast<bool>(opt1) == true);
+    assert(std::addressof(*opt1) == std::addressof(one));
+    assert(static_cast<bool>(opt2) == false);
+    opt1.swap(opt2);
+    assert(static_cast<bool>(opt1) == false);
+    assert(static_cast<bool>(opt2) == true);
+    assert(std::addressof(*opt2) == std::addressof(one));
+  }
+
+  {
+    T two{2};
+    optional<T&> opt1;
+    optional<T&> opt2(two);
+    static_assert(noexcept(opt1.swap(opt2)) == true);
+    assert(static_cast<bool>(opt1) == false);
+    assert(static_cast<bool>(opt2) == true);
+    assert(std::addressof(*opt2) == std::addressof(two));
+    opt1.swap(opt2);
+    assert(static_cast<bool>(opt1) == true);
+    assert(std::addressof(*opt1) == std::addressof(two));
+    assert(static_cast<bool>(opt2) == false);
+  }
+
+  {
+    T one{1};
+    T two{2};
+
+    optional<T&> opt1(one);
+    optional<T&> opt2(two);
+    static_assert(noexcept(opt1.swap(opt2)) == true);
+    assert(static_cast<bool>(opt1) == true);
+    assert(*opt1 == 1);
+    assert(std::addressof(*opt1) == std::addressof(one));
+    assert(static_cast<bool>(opt2) == true);
+    assert(*opt2 == 2);
+    assert(std::addressof(*opt2) == std::addressof(two));
+    opt1.swap(opt2);
+    assert(static_cast<bool>(opt1) == true);
+    assert(*opt1 == 2);
+    assert(std::addressof(*opt1) == std::addressof(two));
+    assert(static_cast<bool>(opt2) == true);
+    assert(*opt2 == 1);
+    assert(std::addressof(*opt2) == std::addressof(one));
+  }
+
+  return true;
+}
+#endif
+
 int main(int, char**)
 {
     check_swap<int>();
@@ -134,6 +203,12 @@ int main(int, char**)
 #if TEST_STD_VER > 17
     static_assert(check_swap<int>());
     static_assert(check_swap<W>());
+#endif
+#if TEST_STD_VER >= 26
+    static_assert(check_swap_ref<int>());
+    static_assert(check_swap_ref<W>());
+    check_swap_ref<int>();
+    check_swap_ref<W>();
 #endif
     {
         optional<X> opt1;
diff --git a/libcxx/test/std/utilities/optional/optional.object/optional_requires_destructible_object.verify.cpp b/libcxx/test/std/utilities/optional/optional.object/optional_requires_destructible_object.verify.cpp
index a96c3c648f939..a956ab3a219cf 100644
--- a/libcxx/test/std/utilities/optional/optional.object/optional_requires_destructible_object.verify.cpp
+++ b/libcxx/test/std/utilities/optional/optional.object/optional_requires_destructible_object.verify.cpp
@@ -13,6 +13,8 @@
 
 #include <optional>
 
+#include "test_macros.h"
+
 using std::optional;
 
 struct X
@@ -25,9 +27,13 @@ int main(int, char**)
 {
     using std::optional;
     {
-        // expected-error-re@optional:* 2 {{static assertion failed{{.*}}instantiation of optional with a reference type is ill-formed}}
-        optional<int&> opt1;
-        optional<int&&> opt2;
+#if TEST_STD_VER >= 26
+      // expected-error-re@optional:* {{static assertion failed{{.*}}instantiation of optional with an rvalue reference type is ill-formed}}
+#else
+      // expected-error-re@optional:* 2 {{static assertion failed{{.*}}instantiation of optional with a reference type is ill-formed}}
+#endif
+      optional<int&> opt1;
+      optional<int&&> opt2;
     }
     {
         // expected-error-re@optional:* {{static assertion failed{{.*}}instantiation of optional with a non-destructible type is ill-formed}}
diff --git a/libcxx/test/std/utilities/optional/optional.object/types.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/types.pass.cpp
index d097559877267..ecbc6b4548ee6 100644
--- a/libcxx/test/std/utilities/optional/optional.object/types.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.object/types.pass.cpp
@@ -36,6 +36,11 @@ int main(int, char**)
     test<optional<const int>, const int>();
     test<optional<double>, double>();
     test<optional<const double>, const double>();
-
-  return 0;
+#if TEST_STD_VER >= 26
+    test<optional<int&>, int>();
+    test<optional<const int&>, const int>();
+    test<optional<double&>, double>();
+    test<optional<const double&>, const double>();
+#endif
+    return 0;
 }
diff --git a/libcxx/test/std/utilities/optional/optional.specalg/make_optional.pass.cpp b/libcxx/test/std/utilities/optional/optional.specalg/make_optional.pass.cpp
index e325a7af558eb..c27645165d20e 100644
--- a/libcxx/test/std/utilities/optional/optional.specalg/make_optional.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.specalg/make_optional.pass.cpp
@@ -13,10 +13,10 @@
 // template <class T>
 //   constexpr optional<decay_t<T>> make_optional(T&& v);
 
+#include <cassert>
+#include <memory>
 #include <optional>
 #include <string>
-#include <memory>
-#include <cassert>
 
 #include "test_macros.h"
 
diff --git a/libcxx/test/std/utilities/optional/optional.specalg/make_optional_explicit.pass.cpp b/libcxx/test/std/utilities/optional/optional.specalg/make_optional_explicit.pass.cpp
index 23f131d2fc499..5dd1d6f0b3380 100644
--- a/libcxx/test/std/utilities/optional/optional.specalg/make_optional_explicit.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.specalg/make_optional_explicit.pass.cpp
@@ -15,13 +15,30 @@
 // GCC crashes on this file, see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=120577
 // XFAIL: gcc-15
 
+#include <cassert>
+#include <memory>
 #include <optional>
 #include <string>
-#include <memory>
-#include <cassert>
+#include <string_view>
 
 #include "test_macros.h"
 
+template <typename T>
+constexpr bool test_ref() {
+  T i{0};
+  auto opt = std::make_optional<T&>(i);
+
+#if TEST_STD_VER < 26
+  assert((std::is_same_v<decltype(opt), std::optional<T>>));
+#else
+  assert((std::is_same_v<decltype(opt), std::optional<T&>>));
+#endif
+
+  assert(*opt == 0);
+
+  return true;
+}
+
 int main(int, char**)
 {
     {
@@ -43,6 +60,12 @@ int main(int, char**)
         auto opt = std::make_optional<std::string>(4u, 'X');
         assert(*opt == "XXXX");
     }
+    using namespace std::string_view_literals;
+
+    static_assert(test_ref<int>());
+    assert((test_ref<int>()));
+    static_assert(test_ref<double>());
+    assert((test_ref<double>()));
 
-  return 0;
+    return 0;
 }
diff --git a/libcxx/test/std/utilities/optional/optional.specalg/swap.pass.cpp b/libcxx/test/std/utilities/optional/optional.specalg/swap.pass.cpp
index 0da3a821e7961..c757120a1c146 100644
--- a/libcxx/test/std/utilities/optional/optional.specalg/swap.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.specalg/swap.pass.cpp
@@ -12,9 +12,10 @@
 // template <class T> void swap(optional<T>& x, optional<T>& y)
 //     noexcept(noexcept(x.swap(y)));
 
+#include <cassert>
+#include <memory>
 #include <optional>
 #include <type_traits>
-#include <cassert>
 
 #include "test_macros.h"
 #include "archetypes.h"
@@ -109,9 +110,82 @@ void test_swap_sfinae() {
     }
 }
 
+#if TEST_STD_VER >= 26
+template <typename T>
+constexpr bool test_swap_ref() {
+  {
+    optional<T&> opt1;
+    optional<T&> opt2;
+    static_assert(noexcept(swap(opt1, opt2)) == true);
+    assert(static_cast<bool>(opt1) == false);
+    assert(static_cast<bool>(opt2) == false);
+    swap(opt1, opt2);
+    assert(static_cast<bool>(opt1) == false);
+    assert(static_cast<bool>(opt2) == false);
+  }
+  {
+    T one{1};
+    optional<T&> opt1(one);
+    optional<T&> opt2;
+    static_assert(noexcept(swap(opt1, opt2)) == true);
+    assert(static_cast<bool>(opt1) == true);
+    assert(*opt1 == 1);
+    assert(std::addressof(*opt1) == std::addressof(one));
+    assert(static_cast<bool>(opt2) == false);
+    swap(opt1, opt2);
+    assert(static_cast<bool>(opt1) == false);
+    assert(static_cast<bool>(opt2) == true);
+    assert(*opt2 == 1);
+    assert(std::addressof(*opt2) == std::addressof(one));
+  }
+  {
+    T two{2};
+    optional<T&> opt1;
+    optional<T&> opt2(two);
+    static_assert(noexcept(swap(opt1, opt2)) == true);
+    assert(static_cast<bool>(opt1) == false);
+    assert(static_cast<bool>(opt2) == true);
+    assert(*opt2 == 2);
+    assert(std::addressof(*opt2) == std::addressof(two));
+    swap(opt1, opt2);
+    assert(static_cast<bool>(opt1) == true);
+    assert(*opt1 == 2);
+    assert(std::addressof(*opt1) == std::addressof(two));
+    assert(static_cast<bool>(opt2) == false);
+  }
+  {
+    T one{1};
+    T two{2};
+    optional<T&> opt1(one);
+    optional<T&> opt2(two);
+    static_assert(noexcept(swap(opt1, opt2)) == true);
+    assert(static_cast<bool>(opt1) == true);
+    assert(*opt1 == 1);
+    assert(std::addressof(*opt1) == std::addressof(one));
+    assert(static_cast<bool>(opt2) == true);
+    assert(*opt2 == 2);
+    assert(std::addressof(*opt2) == std::addressof(two));
+    swap(opt1, opt2);
+    assert(static_cast<bool>(opt1) == true);
+    assert(*opt1 == 2);
+    assert(std::addressof(*opt1) == std::addressof(two));
+    assert(static_cast<bool>(opt2) == true);
+    assert(*opt2 == 1);
+    assert(std::addressof(*opt2) == std::addressof(one));
+  }
+  return true;
+}
+#endif
+
 int main(int, char**)
 {
     test_swap_sfinae();
+#if TEST_STD_VER >= 26
+    static_assert(test_swap_ref<int>());
+    static_assert(test_swap_ref<double>());
+    test_swap_ref<int>();
+    test_swap_ref<double>();
+#endif
     {
         optional<int> opt1;
         optional<int> opt2;
diff --git a/libcxx/utils/generate_feature_test_macro_components.py b/libcxx/utils/generate_feature_test_macro_components.py
index 82a1785a0c906..0802f865f9406 100644
--- a/libcxx/utils/generate_feature_test_macro_components.py
+++ b/libcxx/utils/generate_feature_test_macro_components.py
@@ -1017,6 +1017,7 @@ def add_version_header(tc):
                 "c++17": 201606,
                 "c++20": 202106,  # P2231R1 Missing constexpr in std::optional and std::variant
                 "c++23": 202110,  # P0798R8 Monadic operations for std::optional + LWG3621 Remove feature-test macro __cpp_lib_monadic_optional
+                "c++26": 202506,  # P2988R12: std::optional<T&>
             },
             "headers": ["optional"],
         },

From 175e3becbf8468213034679bf749e3c0c4e0bbda Mon Sep 17 00:00:00 2001
From: Asher Mancinelli <ashermancinelli@gmail.com>
Date: Tue, 11 Nov 2025 19:00:39 -0800
Subject: [PATCH 23/32] [MLIR][Python] Add region_op wrappers for linalg
 (#167616)

Makes linalg.reduce and linalg.map region_ops so they can be constructed
from functions and be called as decorators.
---
 mlir/python/mlir/dialects/linalg/__init__.py |  4 ++
 mlir/test/python/dialects/linalg/ops.py      | 76 +++++++++++++++++++-
 2 files changed, 79 insertions(+), 1 deletion(-)

diff --git a/mlir/python/mlir/dialects/linalg/__init__.py b/mlir/python/mlir/dialects/linalg/__init__.py
index d387c12deeed9..c92bda74c12bf 100644
--- a/mlir/python/mlir/dialects/linalg/__init__.py
+++ b/mlir/python/mlir/dialects/linalg/__init__.py
@@ -352,3 +352,7 @@ def unpack(
             ip=ip,
         )
     )
+
+
+reduce = region_op(ReduceOp, terminator=YieldOp)
+map = region_op(MapOp, terminator=YieldOp)
diff --git a/mlir/test/python/dialects/linalg/ops.py b/mlir/test/python/dialects/linalg/ops.py
index 709a1d2424f35..92591cd59fb40 100644
--- a/mlir/test/python/dialects/linalg/ops.py
+++ b/mlir/test/python/dialects/linalg/ops.py
@@ -1,7 +1,8 @@
 # RUN: %PYTHON %s | FileCheck %s
 
-from mlir.dialects import arith, func, linalg, tensor, memref
+from mlir.dialects import arith, func, linalg, tensor, memref, builtin
 from mlir.dialects.linalg.opdsl.lang import *
+from mlir.extras import types as T
 from mlir.ir import *
 
 
@@ -857,3 +858,76 @@ def elementwise_op(
                     )
 
         print(module)
+
+
+@run
+def testReduceOp():
+    with Context(), Location.unknown():
+        f32 = T.f32()
+        tensor_type = T.tensor(10, f32)
+
+        @builtin.module
+        def module():
+            @func.func(tensor_type)
+            def reduce_op(input):
+                c1 = arith.constant(f32, 1.0)
+                single_result = ir.RankedTensorType.get((), f32)
+                dims = ir.DenseI64ArrayAttr.get([0])
+                init = tensor.splat(single_result, c1, [])
+
+                @linalg.reduce(
+                    result=[single_result],
+                    inputs=[input],
+                    inits=[init],
+                    dimensions=dims,
+                )
+                def reduced(element: f32, acc: f32):
+                    return arith.mulf(acc, element)
+
+                return tensor.extract(reduced, [])
+
+        print(module)
+
+
+# CHECK-LABEL:   func.func @reduce_op(
+# CHECK-SAME:      %[[ARG0:.*]]: tensor<10xf32>) -> f32 {
+# CHECK:           %[[CONSTANT_0:.*]] = arith.constant 1.000000e+00 : f32
+# CHECK:           %[[SPLAT_0:.*]] = tensor.splat %[[CONSTANT_0]] : tensor<f32>
+# CHECK:           %[[REDUCE_0:.*]] = linalg.reduce { arith.mulf } ins(%[[ARG0]] : tensor<10xf32>) outs(%[[SPLAT_0]] : tensor<f32>) dimensions = [0]
+# CHECK:           %[[EXTRACT_0:.*]] = tensor.extract %[[REDUCE_0]][] : tensor<f32>
+# CHECK:           return %[[EXTRACT_0]] : f32
+# CHECK:         }
+
+
+@run
+def testMapOp():
+    with Context(), Location.unknown():
+        f32 = T.f32()
+        tensor_type = T.tensor(10, f32)
+
+        @builtin.module
+        def module():
+            @func.func(tensor_type)
+            def map_op(input):
+                empty = tensor.empty(tensor_type.shape, f32)
+
+                @linalg.map(
+                    result=[tensor_type],
+                    inputs=[input, input],
+                    init=empty,
+                )
+                def add(element: f32, acc: f32, init: f32):
+                    return arith.addf(element, acc)
+
+                return add
+
+        module.verify()
+        print(module)
+
+
+# CHECK-LABEL:   func.func @map_op(
+# CHECK-SAME:                      %[[ARG0:.*]]: tensor<10xf32>) -> tensor<10xf32> {
+# CHECK:           %[[EMPTY_0:.*]] = tensor.empty() : tensor<10xf32>
+# CHECK:           %[[MAP_0:.*]] = linalg.map { arith.addf } ins(%[[ARG0]], %[[ARG0]] : tensor<10xf32>, tensor<10xf32>) outs(%[[EMPTY_0]] : tensor<10xf32>)
+# CHECK:           return %[[MAP_0]] : tensor<10xf32>
+# CHECK:         }

From 905ee4424d62f80a45f26ac03e29adf3bb7a6c85 Mon Sep 17 00:00:00 2001
From: Chuanqi Xu <yedeng.yd@linux.alibaba.com>
Date: Wed, 12 Nov 2025 11:13:38 +0800
Subject: [PATCH 24/32] [NFC] [C++20] [Modules] Test that we can avoid adding
 more specializations in reduced BMI

---
 ...-specialization-update-in-reduced-bmi.cppm | 28 +++++++++++++++++++
 1 file changed, 28 insertions(+)
 create mode 100644 clang/test/Modules/avoid-specialization-update-in-reduced-bmi.cppm

diff --git a/clang/test/Modules/avoid-specialization-update-in-reduced-bmi.cppm b/clang/test/Modules/avoid-specialization-update-in-reduced-bmi.cppm
new file mode 100644
index 0000000000000..7844344a15427
--- /dev/null
+++ b/clang/test/Modules/avoid-specialization-update-in-reduced-bmi.cppm
@@ -0,0 +1,28 @@
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+//
+// RUN: %clang_cc1 -std=c++20 %t/base.cppm -emit-module-interface -o %t/base.pcm
+// RUN: %clang_cc1 -std=c++20 %t/update.cppm -fmodule-file=base=%t/base.pcm -emit-module-interface -o %t/update.pcm
+// RUN: llvm-bcanalyzer --dump --disable-histogram %t/update.pcm | FileCheck %t/update.cppm --check-prefix=FULL
+//
+// RUN: %clang_cc1 -std=c++20 %t/base.cppm -emit-reduced-module-interface -o %t/base.pcm
+// RUN: %clang_cc1 -std=c++20 %t/update.cppm -fmodule-file=base=%t/base.pcm -emit-reduced-module-interface -o %t/update.pcm
+// RUN: llvm-bcanalyzer --dump --disable-histogram %t/update.pcm | FileCheck %t/update.cppm
+
+//--- base.cppm
+export module base;
+
+export template <typename T>
+struct base {
+    T value;
+};
+
+//--- update.cppm
+export module update;
+import base;
+export int update() {
+    return base<int>().value;
+}
+
+// FULL: TEMPLATE_SPECIALIZATION
+// CHECK-NOT: TEMPLATE_SPECIALIZATION

From 1f38d49ebe96417e368a567efa4d650b8a9ac30f Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <pcc@google.com>
Date: Tue, 11 Nov 2025 19:49:53 -0800
Subject: [PATCH 25/32] ValueMapper: Delete unused initializers of replaced
 appending globals.

A full LTO link time performance and memory regression was introduced
by #137081 in cases where the modules contain large quantities of llvm.used
globals. This was unnoticed because it was not expected that this would
be a typical case, but this is exactly what coverage collection does,
and when this feature is enabled together with full LTO we end up with
quadratic memory consumption (from the unused constants) and quadratic
complexity in the function Verifier::visitGlobalValue (which visits all
the unused constants in the use list of each global value). This is a
targeted fix that avoids reintroducing the quadratic complexity from
before #137081, by having ValueMapper delete the old initializer of an
appending global if it is unused, instead of visiting every global in
the context after every link.

The repro-cfi-64 reproducer from #167037 before and after this change:

```
        Elapsed time   Max RSS (KB)
Before   12:05.11        52537184
After     3:27.68         7520696
```

Fixes #167037.

Reviewers: nikic, teresajohnson

Reviewed By: teresajohnson

Pull Request: https://github.com/llvm/llvm-project/pull/167629
---
 .../llvm/Transforms/Utils/ValueMapper.h       |  2 +-
 llvm/lib/Linker/IRMover.cpp                   |  5 +---
 llvm/lib/Transforms/Utils/ValueMapper.cpp     | 24 ++++++++++++-------
 3 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Utils/ValueMapper.h b/llvm/include/llvm/Transforms/Utils/ValueMapper.h
index 17b5d4b891230..28c4ae840b29f 100644
--- a/llvm/include/llvm/Transforms/Utils/ValueMapper.h
+++ b/llvm/include/llvm/Transforms/Utils/ValueMapper.h
@@ -204,7 +204,7 @@ class ValueMapper {
   LLVM_ABI void scheduleMapGlobalInitializer(GlobalVariable &GV, Constant &Init,
                                              unsigned MappingContextID = 0);
   LLVM_ABI void scheduleMapAppendingVariable(GlobalVariable &GV,
-                                             Constant *InitPrefix,
+                                             GlobalVariable *OldGV,
                                              bool IsOldCtorDtor,
                                              ArrayRef<Constant *> NewMembers,
                                              unsigned MappingContextID = 0);
diff --git a/llvm/lib/Linker/IRMover.cpp b/llvm/lib/Linker/IRMover.cpp
index f78d9b016d8c9..f215f39f41bfb 100644
--- a/llvm/lib/Linker/IRMover.cpp
+++ b/llvm/lib/Linker/IRMover.cpp
@@ -882,10 +882,7 @@ IRLinker::linkAppendingVarProto(GlobalVariable *DstGV,
   NG->copyAttributesFrom(SrcGV);
   forceRenaming(NG, SrcGV->getName());
 
-  Mapper.scheduleMapAppendingVariable(
-      *NG,
-      (DstGV && !DstGV->isDeclaration()) ? DstGV->getInitializer() : nullptr,
-      IsOldStructor, SrcElements);
+  Mapper.scheduleMapAppendingVariable(*NG, DstGV, IsOldStructor, SrcElements);
 
   // Replace any uses of the two global variables with uses of the new
   // global.
diff --git a/llvm/lib/Transforms/Utils/ValueMapper.cpp b/llvm/lib/Transforms/Utils/ValueMapper.cpp
index 8d8a60b6918fe..9021d8b289baf 100644
--- a/llvm/lib/Transforms/Utils/ValueMapper.cpp
+++ b/llvm/lib/Transforms/Utils/ValueMapper.cpp
@@ -77,7 +77,7 @@ struct WorklistEntry {
   };
   struct AppendingGVTy {
     GlobalVariable *GV;
-    Constant *InitPrefix;
+    GlobalVariable *OldGV;
   };
   struct AliasOrIFuncTy {
     GlobalValue *GV;
@@ -162,7 +162,7 @@ class Mapper {
 
   void scheduleMapGlobalInitializer(GlobalVariable &GV, Constant &Init,
                                     unsigned MCID);
-  void scheduleMapAppendingVariable(GlobalVariable &GV, Constant *InitPrefix,
+  void scheduleMapAppendingVariable(GlobalVariable &GV, GlobalVariable *OldGV,
                                     bool IsOldCtorDtor,
                                     ArrayRef<Constant *> NewMembers,
                                     unsigned MCID);
@@ -173,7 +173,7 @@ class Mapper {
   void flush();
 
 private:
-  void mapAppendingVariable(GlobalVariable &GV, Constant *InitPrefix,
+  void mapAppendingVariable(GlobalVariable &GV, GlobalVariable *OldGV,
                             bool IsOldCtorDtor,
                             ArrayRef<Constant *> NewMembers);
 
@@ -944,7 +944,7 @@ void Mapper::flush() {
           drop_begin(AppendingInits, PrefixSize));
       AppendingInits.resize(PrefixSize);
       mapAppendingVariable(*E.Data.AppendingGV.GV,
-                           E.Data.AppendingGV.InitPrefix,
+                           E.Data.AppendingGV.OldGV,
                            E.AppendingGVIsOldCtorDtor, ArrayRef(NewInits));
       break;
     }
@@ -1094,15 +1094,21 @@ void Mapper::remapFunction(Function &F) {
   }
 }
 
-void Mapper::mapAppendingVariable(GlobalVariable &GV, Constant *InitPrefix,
+void Mapper::mapAppendingVariable(GlobalVariable &GV, GlobalVariable *OldGV,
                                   bool IsOldCtorDtor,
                                   ArrayRef<Constant *> NewMembers) {
+  Constant *InitPrefix =
+      (OldGV && !OldGV->isDeclaration()) ? OldGV->getInitializer() : nullptr;
+
   SmallVector<Constant *, 16> Elements;
   if (InitPrefix) {
     unsigned NumElements =
         cast<ArrayType>(InitPrefix->getType())->getNumElements();
     for (unsigned I = 0; I != NumElements; ++I)
       Elements.push_back(InitPrefix->getAggregateElement(I));
+    OldGV->setInitializer(nullptr);
+    if (InitPrefix->hasUseList() && InitPrefix->use_empty())
+      InitPrefix->destroyConstant();
   }
 
   PointerType *VoidPtrTy;
@@ -1148,7 +1154,7 @@ void Mapper::scheduleMapGlobalInitializer(GlobalVariable &GV, Constant &Init,
 }
 
 void Mapper::scheduleMapAppendingVariable(GlobalVariable &GV,
-                                          Constant *InitPrefix,
+                                          GlobalVariable *OldGV,
                                           bool IsOldCtorDtor,
                                           ArrayRef<Constant *> NewMembers,
                                           unsigned MCID) {
@@ -1159,7 +1165,7 @@ void Mapper::scheduleMapAppendingVariable(GlobalVariable &GV,
   WE.Kind = WorklistEntry::MapAppendingVar;
   WE.MCID = MCID;
   WE.Data.AppendingGV.GV = &GV;
-  WE.Data.AppendingGV.InitPrefix = InitPrefix;
+  WE.Data.AppendingGV.OldGV = OldGV;
   WE.AppendingGVIsOldCtorDtor = IsOldCtorDtor;
   WE.AppendingGVNumNewMembers = NewMembers.size();
   Worklist.push_back(WE);
@@ -1282,12 +1288,12 @@ void ValueMapper::scheduleMapGlobalInitializer(GlobalVariable &GV,
 }
 
 void ValueMapper::scheduleMapAppendingVariable(GlobalVariable &GV,
-                                               Constant *InitPrefix,
+                                               GlobalVariable *OldGV,
                                                bool IsOldCtorDtor,
                                                ArrayRef<Constant *> NewMembers,
                                                unsigned MCID) {
   getAsMapper(pImpl)->scheduleMapAppendingVariable(
-      GV, InitPrefix, IsOldCtorDtor, NewMembers, MCID);
+      GV, OldGV, IsOldCtorDtor, NewMembers, MCID);
 }
 
 void ValueMapper::scheduleMapGlobalAlias(GlobalAlias &GA, Constant &Aliasee,

From c5aace42360184a75323d8fab3a26f0a1d8ba5e7 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Tue, 11 Nov 2025 19:50:33 -0800
Subject: [PATCH 26/32] DAG: Move expandMultipleResultFPLibCall to
 TargetLowering (NFC) (#166988)

This kind of helper is higher level and not general enough to go
directly in SelectionDAG. Most similar utilities are in TargetLowering.
---
 llvm/include/llvm/CodeGen/SelectionDAG.h      |  10 --
 llvm/include/llvm/CodeGen/TargetLowering.h    |  10 ++
 llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp |   4 +-
 .../SelectionDAG/LegalizeFloatTypes.cpp       |   2 +-
 .../SelectionDAG/LegalizeVectorOps.cpp        |   4 +-
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 161 ------------------
 .../CodeGen/SelectionDAG/TargetLowering.cpp   | 161 ++++++++++++++++++
 7 files changed, 176 insertions(+), 176 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index 5b331e4444915..b024e8a68bd6e 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -1718,16 +1718,6 @@ class SelectionDAG {
   /// the target's desired shift amount type.
   LLVM_ABI SDValue getShiftAmountOperand(EVT LHSTy, SDValue Op);
 
-  /// Expands a node with multiple results to an FP or vector libcall. The
-  /// libcall is expected to take all the operands of the \p Node followed by
-  /// output pointers for each of the results. \p CallRetResNo can be optionally
-  /// set to indicate that one of the results comes from the libcall's return
-  /// value.
-  LLVM_ABI bool
-  expandMultipleResultFPLibCall(RTLIB::Libcall LC, SDNode *Node,
-                                SmallVectorImpl<SDValue> &Results,
-                                std::optional<unsigned> CallRetResNo = {});
-
   /// Expand the specified \c ISD::VAARG node as the Legalize pass would.
   LLVM_ABI SDValue expandVAArg(SDNode *Node);
 
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 4d5d1fc7dfadc..cec7d09f494d6 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -5757,6 +5757,16 @@ class LLVM_ABI TargetLowering : public TargetLoweringBase {
   /// consisting of zext/sext, extract_subvector, mul and add operations.
   SDValue expandPartialReduceMLA(SDNode *Node, SelectionDAG &DAG) const;
 
+  /// Expands a node with multiple results to an FP or vector libcall. The
+  /// libcall is expected to take all the operands of the \p Node followed by
+  /// output pointers for each of the results. \p CallRetResNo can be optionally
+  /// set to indicate that one of the results comes from the libcall's return
+  /// value.
+  bool expandMultipleResultFPLibCall(
+      SelectionDAG &DAG, RTLIB::Libcall LC, SDNode *Node,
+      SmallVectorImpl<SDValue> &Results,
+      std::optional<unsigned> CallRetResNo = {}) const;
+
   /// Legalize a SETCC or VP_SETCC with given LHS and RHS and condition code CC
   /// on the current target. A VP_SETCC will additionally be given a Mask
   /// and/or EVL not equal to SDValue().
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 3ed84af6a8717..99d14a60c6ed1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -4842,7 +4842,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
     RTLIB::Libcall LC = Node->getOpcode() == ISD::FSINCOS
                             ? RTLIB::getSINCOS(VT)
                             : RTLIB::getSINCOSPI(VT);
-    bool Expanded = DAG.expandMultipleResultFPLibCall(LC, Node, Results);
+    bool Expanded = TLI.expandMultipleResultFPLibCall(DAG, LC, Node, Results);
     if (!Expanded) {
       DAG.getContext()->emitError(Twine("no libcall available for ") +
                                   Node->getOperationName(&DAG));
@@ -4940,7 +4940,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
     EVT VT = Node->getValueType(0);
     RTLIB::Libcall LC = Node->getOpcode() == ISD::FMODF ? RTLIB::getMODF(VT)
                                                         : RTLIB::getFREXP(VT);
-    bool Expanded = DAG.expandMultipleResultFPLibCall(LC, Node, Results,
+    bool Expanded = TLI.expandMultipleResultFPLibCall(DAG, LC, Node, Results,
                                                       /*CallRetResNo=*/0);
     if (!Expanded)
       llvm_unreachable("Expected scalar FFREXP/FMODF to expand to libcall!");
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 58983cb57d7f6..383a025a4d916 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -1726,7 +1726,7 @@ void DAGTypeLegalizer::ExpandFloatRes_UnaryWithTwoFPResults(
     SDNode *N, RTLIB::Libcall LC, std::optional<unsigned> CallRetResNo) {
   assert(!N->isStrictFPOpcode() && "strictfp not implemented");
   SmallVector<SDValue> Results;
-  DAG.expandMultipleResultFPLibCall(LC, N, Results, CallRetResNo);
+  TLI.expandMultipleResultFPLibCall(DAG, LC, N, Results, CallRetResNo);
   for (auto [ResNo, Res] : enumerate(Results)) {
     SDValue Lo, Hi;
     GetPairElements(Res, Lo, Hi);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index c55e55df373e9..7d979caa8bf82 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -1275,7 +1275,7 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
                             ? RTLIB::getSINCOS(VT)
                             : RTLIB::getSINCOSPI(VT);
     if (LC != RTLIB::UNKNOWN_LIBCALL &&
-        DAG.expandMultipleResultFPLibCall(LC, Node, Results))
+        TLI.expandMultipleResultFPLibCall(DAG, LC, Node, Results))
       return;
 
     // TODO: Try to see if there's a narrower call available to use before
@@ -1286,7 +1286,7 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
     EVT VT = Node->getValueType(0);
     RTLIB::Libcall LC = RTLIB::getMODF(VT);
     if (LC != RTLIB::UNKNOWN_LIBCALL &&
-        DAG.expandMultipleResultFPLibCall(LC, Node, Results,
+        TLI.expandMultipleResultFPLibCall(DAG, LC, Node, Results,
                                           /*CallRetResNo=*/0))
       return;
     break;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index f05266967fb68..363c71d84694f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2467,167 +2467,6 @@ SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) {
   return getZExtOrTrunc(Op, SDLoc(Op), ShTy);
 }
 
-/// Given a store node \p StoreNode, return true if it is safe to fold that node
-/// into \p FPNode, which expands to a library call with output pointers.
-static bool canFoldStoreIntoLibCallOutputPointers(StoreSDNode *StoreNode,
-                                                  SDNode *FPNode) {
-  SmallVector<const SDNode *, 8> Worklist;
-  SmallVector<const SDNode *, 8> DeferredNodes;
-  SmallPtrSet<const SDNode *, 16> Visited;
-
-  // Skip FPNode use by StoreNode (that's the use we want to fold into FPNode).
-  for (SDValue Op : StoreNode->ops())
-    if (Op.getNode() != FPNode)
-      Worklist.push_back(Op.getNode());
-
-  unsigned MaxSteps = SelectionDAG::getHasPredecessorMaxSteps();
-  while (!Worklist.empty()) {
-    const SDNode *Node = Worklist.pop_back_val();
-    auto [_, Inserted] = Visited.insert(Node);
-    if (!Inserted)
-      continue;
-
-    if (MaxSteps > 0 && Visited.size() >= MaxSteps)
-      return false;
-
-    // Reached the FPNode (would result in a cycle).
-    // OR Reached CALLSEQ_START (would result in nested call sequences).
-    if (Node == FPNode || Node->getOpcode() == ISD::CALLSEQ_START)
-      return false;
-
-    if (Node->getOpcode() == ISD::CALLSEQ_END) {
-      // Defer looking into call sequences (so we can check we're outside one).
-      // We still need to look through these for the predecessor check.
-      DeferredNodes.push_back(Node);
-      continue;
-    }
-
-    for (SDValue Op : Node->ops())
-      Worklist.push_back(Op.getNode());
-  }
-
-  // True if we're outside a call sequence and don't have the FPNode as a
-  // predecessor. No cycles or nested call sequences possible.
-  return !SDNode::hasPredecessorHelper(FPNode, Visited, DeferredNodes,
-                                       MaxSteps);
-}
-
-bool SelectionDAG::expandMultipleResultFPLibCall(
-    RTLIB::Libcall LC, SDNode *Node, SmallVectorImpl<SDValue> &Results,
-    std::optional<unsigned> CallRetResNo) {
-  if (LC == RTLIB::UNKNOWN_LIBCALL)
-    return false;
-
-  RTLIB::LibcallImpl LibcallImpl = TLI->getLibcallImpl(LC);
-  if (LibcallImpl == RTLIB::Unsupported)
-    return false;
-
-  LLVMContext &Ctx = *getContext();
-  EVT VT = Node->getValueType(0);
-  unsigned NumResults = Node->getNumValues();
-
-  // Find users of the node that store the results (and share input chains). The
-  // destination pointers can be used instead of creating stack allocations.
-  SDValue StoresInChain;
-  SmallVector<StoreSDNode *, 2> ResultStores(NumResults);
-  for (SDNode *User : Node->users()) {
-    if (!ISD::isNormalStore(User))
-      continue;
-    auto *ST = cast<StoreSDNode>(User);
-    SDValue StoreValue = ST->getValue();
-    unsigned ResNo = StoreValue.getResNo();
-    // Ensure the store corresponds to an output pointer.
-    if (CallRetResNo == ResNo)
-      continue;
-    // Ensure the store to the default address space and not atomic or volatile.
-    if (!ST->isSimple() || ST->getAddressSpace() != 0)
-      continue;
-    // Ensure all store chains are the same (so they don't alias).
-    if (StoresInChain && ST->getChain() != StoresInChain)
-      continue;
-    // Ensure the store is properly aligned.
-    Type *StoreType = StoreValue.getValueType().getTypeForEVT(Ctx);
-    if (ST->getAlign() <
-        getDataLayout().getABITypeAlign(StoreType->getScalarType()))
-      continue;
-    // Avoid:
-    //  1. Creating cyclic dependencies.
-    //  2. Expanding the node to a call within a call sequence.
-    if (!canFoldStoreIntoLibCallOutputPointers(ST, Node))
-      continue;
-    ResultStores[ResNo] = ST;
-    StoresInChain = ST->getChain();
-  }
-
-  TargetLowering::ArgListTy Args;
-
-  // Pass the arguments.
-  for (const SDValue &Op : Node->op_values()) {
-    EVT ArgVT = Op.getValueType();
-    Type *ArgTy = ArgVT.getTypeForEVT(Ctx);
-    Args.emplace_back(Op, ArgTy);
-  }
-
-  // Pass the output pointers.
-  SmallVector<SDValue, 2> ResultPtrs(NumResults);
-  Type *PointerTy = PointerType::getUnqual(Ctx);
-  for (auto [ResNo, ST] : llvm::enumerate(ResultStores)) {
-    if (ResNo == CallRetResNo)
-      continue;
-    EVT ResVT = Node->getValueType(ResNo);
-    SDValue ResultPtr = ST ? ST->getBasePtr() : CreateStackTemporary(ResVT);
-    ResultPtrs[ResNo] = ResultPtr;
-    Args.emplace_back(ResultPtr, PointerTy);
-  }
-
-  SDLoc DL(Node);
-
-  if (RTLIB::RuntimeLibcallsInfo::hasVectorMaskArgument(LibcallImpl)) {
-    // Pass the vector mask (if required).
-    EVT MaskVT = TLI->getSetCCResultType(getDataLayout(), Ctx, VT);
-    SDValue Mask = getBoolConstant(true, DL, MaskVT, VT);
-    Args.emplace_back(Mask, MaskVT.getTypeForEVT(Ctx));
-  }
-
-  Type *RetType = CallRetResNo.has_value()
-                      ? Node->getValueType(*CallRetResNo).getTypeForEVT(Ctx)
-                      : Type::getVoidTy(Ctx);
-  SDValue InChain = StoresInChain ? StoresInChain : getEntryNode();
-  SDValue Callee =
-      getExternalSymbol(TLI->getLibcallImplName(LibcallImpl).data(),
-                        TLI->getPointerTy(getDataLayout()));
-  TargetLowering::CallLoweringInfo CLI(*this);
-  CLI.setDebugLoc(DL).setChain(InChain).setLibCallee(
-      TLI->getLibcallImplCallingConv(LibcallImpl), RetType, Callee,
-      std::move(Args));
-
-  auto [Call, CallChain] = TLI->LowerCallTo(CLI);
-
-  for (auto [ResNo, ResultPtr] : llvm::enumerate(ResultPtrs)) {
-    if (ResNo == CallRetResNo) {
-      Results.push_back(Call);
-      continue;
-    }
-    MachinePointerInfo PtrInfo;
-    SDValue LoadResult =
-        getLoad(Node->getValueType(ResNo), DL, CallChain, ResultPtr, PtrInfo);
-    SDValue OutChain = LoadResult.getValue(1);
-
-    if (StoreSDNode *ST = ResultStores[ResNo]) {
-      // Replace store with the library call.
-      ReplaceAllUsesOfValueWith(SDValue(ST, 0), OutChain);
-      PtrInfo = ST->getPointerInfo();
-    } else {
-      PtrInfo = MachinePointerInfo::getFixedStack(
-          getMachineFunction(), cast<FrameIndexSDNode>(ResultPtr)->getIndex());
-    }
-
-    Results.push_back(LoadResult);
-  }
-
-  return true;
-}
-
 SDValue SelectionDAG::expandVAArg(SDNode *Node) {
   SDLoc dl(Node);
   const TargetLowering &TLI = getTargetLoweringInfo();
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index b51d6649af2ec..bb64f4ee70280 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -12126,6 +12126,167 @@ SDValue TargetLowering::expandPartialReduceMLA(SDNode *N,
   return Subvectors[0];
 }
 
+/// Given a store node \p StoreNode, return true if it is safe to fold that node
+/// into \p FPNode, which expands to a library call with output pointers.
+static bool canFoldStoreIntoLibCallOutputPointers(StoreSDNode *StoreNode,
+                                                  SDNode *FPNode) {
+  SmallVector<const SDNode *, 8> Worklist;
+  SmallVector<const SDNode *, 8> DeferredNodes;
+  SmallPtrSet<const SDNode *, 16> Visited;
+
+  // Skip FPNode use by StoreNode (that's the use we want to fold into FPNode).
+  for (SDValue Op : StoreNode->ops())
+    if (Op.getNode() != FPNode)
+      Worklist.push_back(Op.getNode());
+
+  unsigned MaxSteps = SelectionDAG::getHasPredecessorMaxSteps();
+  while (!Worklist.empty()) {
+    const SDNode *Node = Worklist.pop_back_val();
+    auto [_, Inserted] = Visited.insert(Node);
+    if (!Inserted)
+      continue;
+
+    if (MaxSteps > 0 && Visited.size() >= MaxSteps)
+      return false;
+
+    // Reached the FPNode (would result in a cycle).
+    // OR Reached CALLSEQ_START (would result in nested call sequences).
+    if (Node == FPNode || Node->getOpcode() == ISD::CALLSEQ_START)
+      return false;
+
+    if (Node->getOpcode() == ISD::CALLSEQ_END) {
+      // Defer looking into call sequences (so we can check we're outside one).
+      // We still need to look through these for the predecessor check.
+      DeferredNodes.push_back(Node);
+      continue;
+    }
+
+    for (SDValue Op : Node->ops())
+      Worklist.push_back(Op.getNode());
+  }
+
+  // True if we're outside a call sequence and don't have the FPNode as a
+  // predecessor. No cycles or nested call sequences possible.
+  return !SDNode::hasPredecessorHelper(FPNode, Visited, DeferredNodes,
+                                       MaxSteps);
+}
+
+bool TargetLowering::expandMultipleResultFPLibCall(
+    SelectionDAG &DAG, RTLIB::Libcall LC, SDNode *Node,
+    SmallVectorImpl<SDValue> &Results,
+    std::optional<unsigned> CallRetResNo) const {
+  if (LC == RTLIB::UNKNOWN_LIBCALL)
+    return false;
+
+  RTLIB::LibcallImpl LibcallImpl = getLibcallImpl(LC);
+  if (LibcallImpl == RTLIB::Unsupported)
+    return false;
+
+  LLVMContext &Ctx = *DAG.getContext();
+  EVT VT = Node->getValueType(0);
+  unsigned NumResults = Node->getNumValues();
+
+  // Find users of the node that store the results (and share input chains). The
+  // destination pointers can be used instead of creating stack allocations.
+  SDValue StoresInChain;
+  SmallVector<StoreSDNode *, 2> ResultStores(NumResults);
+  for (SDNode *User : Node->users()) {
+    if (!ISD::isNormalStore(User))
+      continue;
+    auto *ST = cast<StoreSDNode>(User);
+    SDValue StoreValue = ST->getValue();
+    unsigned ResNo = StoreValue.getResNo();
+    // Ensure the store corresponds to an output pointer.
+    if (CallRetResNo == ResNo)
+      continue;
+    // Ensure the store to the default address space and not atomic or volatile.
+    if (!ST->isSimple() || ST->getAddressSpace() != 0)
+      continue;
+    // Ensure all store chains are the same (so they don't alias).
+    if (StoresInChain && ST->getChain() != StoresInChain)
+      continue;
+    // Ensure the store is properly aligned.
+    Type *StoreType = StoreValue.getValueType().getTypeForEVT(Ctx);
+    if (ST->getAlign() <
+        DAG.getDataLayout().getABITypeAlign(StoreType->getScalarType()))
+      continue;
+    // Avoid:
+    //  1. Creating cyclic dependencies.
+    //  2. Expanding the node to a call within a call sequence.
+    if (!canFoldStoreIntoLibCallOutputPointers(ST, Node))
+      continue;
+    ResultStores[ResNo] = ST;
+    StoresInChain = ST->getChain();
+  }
+
+  ArgListTy Args;
+
+  // Pass the arguments.
+  for (const SDValue &Op : Node->op_values()) {
+    EVT ArgVT = Op.getValueType();
+    Type *ArgTy = ArgVT.getTypeForEVT(Ctx);
+    Args.emplace_back(Op, ArgTy);
+  }
+
+  // Pass the output pointers.
+  SmallVector<SDValue, 2> ResultPtrs(NumResults);
+  Type *PointerTy = PointerType::getUnqual(Ctx);
+  for (auto [ResNo, ST] : llvm::enumerate(ResultStores)) {
+    if (ResNo == CallRetResNo)
+      continue;
+    EVT ResVT = Node->getValueType(ResNo);
+    SDValue ResultPtr = ST ? ST->getBasePtr() : DAG.CreateStackTemporary(ResVT);
+    ResultPtrs[ResNo] = ResultPtr;
+    Args.emplace_back(ResultPtr, PointerTy);
+  }
+
+  SDLoc DL(Node);
+
+  if (RTLIB::RuntimeLibcallsInfo::hasVectorMaskArgument(LibcallImpl)) {
+    // Pass the vector mask (if required).
+    EVT MaskVT = getSetCCResultType(DAG.getDataLayout(), Ctx, VT);
+    SDValue Mask = DAG.getBoolConstant(true, DL, MaskVT, VT);
+    Args.emplace_back(Mask, MaskVT.getTypeForEVT(Ctx));
+  }
+
+  Type *RetType = CallRetResNo.has_value()
+                      ? Node->getValueType(*CallRetResNo).getTypeForEVT(Ctx)
+                      : Type::getVoidTy(Ctx);
+  SDValue InChain = StoresInChain ? StoresInChain : DAG.getEntryNode();
+  SDValue Callee = DAG.getExternalSymbol(getLibcallImplName(LibcallImpl).data(),
+                                         getPointerTy(DAG.getDataLayout()));
+  TargetLowering::CallLoweringInfo CLI(DAG);
+  CLI.setDebugLoc(DL).setChain(InChain).setLibCallee(
+      getLibcallImplCallingConv(LibcallImpl), RetType, Callee, std::move(Args));
+
+  auto [Call, CallChain] = LowerCallTo(CLI);
+
+  for (auto [ResNo, ResultPtr] : llvm::enumerate(ResultPtrs)) {
+    if (ResNo == CallRetResNo) {
+      Results.push_back(Call);
+      continue;
+    }
+    MachinePointerInfo PtrInfo;
+    SDValue LoadResult = DAG.getLoad(Node->getValueType(ResNo), DL, CallChain,
+                                     ResultPtr, PtrInfo);
+    SDValue OutChain = LoadResult.getValue(1);
+
+    if (StoreSDNode *ST = ResultStores[ResNo]) {
+      // Replace store with the library call.
+      DAG.ReplaceAllUsesOfValueWith(SDValue(ST, 0), OutChain);
+      PtrInfo = ST->getPointerInfo();
+    } else {
+      PtrInfo = MachinePointerInfo::getFixedStack(
+          DAG.getMachineFunction(),
+          cast<FrameIndexSDNode>(ResultPtr)->getIndex());
+    }
+
+    Results.push_back(LoadResult);
+  }
+
+  return true;
+}
+
 bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT,
                                            SDValue &LHS, SDValue &RHS,
                                            SDValue &CC, SDValue Mask,

From 54659793328a0a7f0f1efa9d4d7fc43999b34ae2 Mon Sep 17 00:00:00 2001
From: Gleb Popov <6yearold@gmail.com>
Date: Wed, 12 Nov 2025 07:45:42 +0300
Subject: [PATCH 27/32] libunwind: Implement the unw_strerror function for
 better nongnu libunwind compatibility (#160887)

As it was explained to me in
https://discourse.llvm.org/t/libunwinds-raison-detre/88283/2 the LLVM
version of libunwind is mostly compatible with nongnu one. This change
improves the compatibility a bit further.
---
 libunwind/include/libunwind.h |  1 +
 libunwind/src/libunwind.cpp   | 35 +++++++++++++++++++++++++++++++++++
 libunwind/src/libunwind_ext.h |  1 +
 3 files changed, 37 insertions(+)

diff --git a/libunwind/include/libunwind.h b/libunwind/include/libunwind.h
index 18684ce311f95..56ca7110274a3 100644
--- a/libunwind/include/libunwind.h
+++ b/libunwind/include/libunwind.h
@@ -234,6 +234,7 @@ extern int unw_is_fpreg(unw_cursor_t *, unw_regnum_t) LIBUNWIND_AVAIL;
 extern int unw_is_signal_frame(unw_cursor_t *) LIBUNWIND_AVAIL;
 extern int unw_get_proc_name(unw_cursor_t *, char *, size_t, unw_word_t *) LIBUNWIND_AVAIL;
 //extern int       unw_get_save_loc(unw_cursor_t*, int, unw_save_loc_t*);
+extern const char *unw_strerror(int) LIBUNWIND_AVAIL;
 
 extern unw_addr_space_t unw_local_addr_space;
 
diff --git a/libunwind/src/libunwind.cpp b/libunwind/src/libunwind.cpp
index 3a94b6cf0cc5c..b3036396c379d 100644
--- a/libunwind/src/libunwind.cpp
+++ b/libunwind/src/libunwind.cpp
@@ -409,6 +409,41 @@ void __unw_remove_dynamic_eh_frame_section(unw_word_t eh_frame_start) {
 }
 
 #endif // defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND)
+
+/// Maps the UNW_* error code to a textual representation
+_LIBUNWIND_HIDDEN const char *__unw_strerror(int error_code) {
+  switch (error_code) {
+  case UNW_ESUCCESS:
+    return "no error";
+  case UNW_EUNSPEC:
+    return "unspecified (general) error";
+  case UNW_ENOMEM:
+    return "out of memory";
+  case UNW_EBADREG:
+    return "bad register number";
+  case UNW_EREADONLYREG:
+    return "attempt to write read-only register";
+  case UNW_ESTOPUNWIND:
+    return "stop unwinding";
+  case UNW_EINVALIDIP:
+    return "invalid IP";
+  case UNW_EBADFRAME:
+    return "bad frame";
+  case UNW_EINVAL:
+    return "unsupported operation or bad value";
+  case UNW_EBADVERSION:
+    return "unwind info has unsupported version";
+  case UNW_ENOINFO:
+    return "no unwind info found";
+#if defined(_LIBUNWIND_TARGET_AARCH64) && !defined(_LIBUNWIND_IS_NATIVE_ONLY)
+  case UNW_ECROSSRASIGNING:
+    return "cross unwind with return address signing";
+#endif
+  }
+  return "invalid error code";
+}
+_LIBUNWIND_WEAK_ALIAS(__unw_strerror, unw_strerror)
+
 #endif // !defined(__USING_SJLJ_EXCEPTIONS__) && !defined(__wasm__)
 
 #ifdef __APPLE__
diff --git a/libunwind/src/libunwind_ext.h b/libunwind/src/libunwind_ext.h
index 900e8101f81f1..f5da90d7bd3b7 100644
--- a/libunwind/src/libunwind_ext.h
+++ b/libunwind/src/libunwind_ext.h
@@ -46,6 +46,7 @@ extern int __unw_get_proc_info(unw_cursor_t *, unw_proc_info_t *);
 extern int __unw_is_fpreg(unw_cursor_t *, unw_regnum_t);
 extern int __unw_is_signal_frame(unw_cursor_t *);
 extern int __unw_get_proc_name(unw_cursor_t *, char *, size_t, unw_word_t *);
+extern const char *__unw_strerror(int);
 
 #if defined(_AIX)
 extern uintptr_t __unw_get_data_rel_base(unw_cursor_t *);

From ce32b73a6285bd5db22e64f52c132d1fc90aed39 Mon Sep 17 00:00:00 2001
From: Lang Hames <lhames@gmail.com>
Date: Wed, 12 Nov 2025 16:10:34 +1100
Subject: [PATCH 28/32] Orc rt session wrap unwrap (#167635)

---
 orc-rt/include/orc-rt/Session.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/orc-rt/include/orc-rt/Session.h b/orc-rt/include/orc-rt/Session.h
index c198d374bc849..fe4f07157385f 100644
--- a/orc-rt/include/orc-rt/Session.h
+++ b/orc-rt/include/orc-rt/Session.h
@@ -17,6 +17,8 @@
 #include "orc-rt/ResourceManager.h"
 #include "orc-rt/move_only_function.h"
 
+#include "orc-rt-c/CoreTypes.h"
+
 #include <vector>
 
 namespace orc_rt {
@@ -69,6 +71,14 @@ class Session {
   std::vector<std::unique_ptr<ResourceManager>> ResourceMgrs;
 };
 
+inline orc_rt_SessionRef wrap(Session *S) noexcept {
+  return reinterpret_cast<orc_rt_SessionRef>(S);
+}
+
+inline Session *unwrap(orc_rt_SessionRef S) noexcept {
+  return reinterpret_cast<Session *>(S);
+}
+
 } // namespace orc_rt
 
 #endif // ORC_RT_SESSION_H

From a664cadaf4259da1cd1a3c6eb3cd38520d03ffa0 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Tue, 11 Nov 2025 21:18:42 -0800
Subject: [PATCH 29/32] Wasm: Fix hardcoding _Unwind_CallPersonality function
 name (#167612)

---
 llvm/lib/CodeGen/WasmEHPrepare.cpp | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/WasmEHPrepare.cpp b/llvm/lib/CodeGen/WasmEHPrepare.cpp
index 1ea3e6bcb15ce..2f54578da5113 100644
--- a/llvm/lib/CodeGen/WasmEHPrepare.cpp
+++ b/llvm/lib/CodeGen/WasmEHPrepare.cpp
@@ -85,6 +85,7 @@
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/IntrinsicsWebAssembly.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/RuntimeLibcalls.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 
@@ -273,8 +274,13 @@ bool WasmEHPrepareImpl::prepareEHPads(Function &F) {
   // instruction selection.
   CatchF = Intrinsic::getOrInsertDeclaration(&M, Intrinsic::wasm_catch);
 
+  // FIXME: Verify this is really supported for current module.
+  StringRef UnwindCallPersonalityName =
+      RTLIB::RuntimeLibcallsInfo::getLibcallImplName(
+          RTLIB::impl__Unwind_CallPersonality);
+
   // _Unwind_CallPersonality() wrapper function, which calls the personality
-  CallPersonalityF = M.getOrInsertFunction("_Unwind_CallPersonality",
+  CallPersonalityF = M.getOrInsertFunction(UnwindCallPersonalityName,
                                            IRB.getInt32Ty(), IRB.getPtrTy());
   if (Function *F = dyn_cast<Function>(CallPersonalityF.getCallee()))
     F->setDoesNotThrow();

From 7d9b7e8c7b251d54b5d9d3f9fb8d5dde3483389c Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Tue, 11 Nov 2025 21:26:40 -0800
Subject: [PATCH 30/32] PPC: Mark xfailed sincospi test as unsupported with
 EXPENSIVE_CHECKS (#167639)

---
 llvm/test/CodeGen/PowerPC/llvm.sincospi.ppcfp128.ll | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/test/CodeGen/PowerPC/llvm.sincospi.ppcfp128.ll b/llvm/test/CodeGen/PowerPC/llvm.sincospi.ppcfp128.ll
index 4fbb6a07aa37d..c332f441e8b00 100644
--- a/llvm/test/CodeGen/PowerPC/llvm.sincospi.ppcfp128.ll
+++ b/llvm/test/CodeGen/PowerPC/llvm.sincospi.ppcfp128.ll
@@ -1,4 +1,5 @@
 ; XFAIL: *
+; UNSUPPORTED: expensive_checks
 ; FIXME: asserts
 ; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-gnu-linux -filetype=null -enable-legalize-types-checking=0 \
 ; RUN:   -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names %s

From a7ceeffb30b1b785ab5d5f86ff12e79cd7cb9df6 Mon Sep 17 00:00:00 2001
From: Lang Hames <lhames@gmail.com>
Date: Wed, 12 Nov 2025 16:36:52 +1100
Subject: [PATCH 31/32] [orc-rt] Make Session explicitly immovable. (#167640)

NFCI -- the deleted copy constructor already made this immovable. The
explicit operations just make clear that this was intentional.
---
 orc-rt/include/orc-rt/Session.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/orc-rt/include/orc-rt/Session.h b/orc-rt/include/orc-rt/Session.h
index fe4f07157385f..fbace053bd72f 100644
--- a/orc-rt/include/orc-rt/Session.h
+++ b/orc-rt/include/orc-rt/Session.h
@@ -42,6 +42,8 @@ class Session {
   // Sessions are not copyable or moveable.
   Session(const Session &) = delete;
   Session &operator=(const Session &) = delete;
+  Session(Session &&) = delete;
+  Session &operator=(Session &&) = delete;
 
   ~Session();
 

From ae2b303391c7d626475a1b25db91c1a8ffefbc5d Mon Sep 17 00:00:00 2001
From: Chuanqi Xu <yedeng.yd@linux.alibaba.com>
Date: Wed, 12 Nov 2025 13:37:36 +0800
Subject: [PATCH 32/32] [C++20] [Modules] Don't import initializer/pending
 implicit instantiations from other named module (#167468)

Close https://github.com/llvm/llvm-project/issues/166068

The cause of the problem is that we would import initializers and
pending implicit instantiations from other named module. This is very
bad and it may waste a lot of time.

And we didn't observe it as the weak symbols can live together and the
strong symbols would be removed by other mechanism. So we didn't observe
the bad behavior for a long time. But it indeeds waste compilation time.
---
 clang/lib/Serialization/ASTReader.cpp | 23 ++++++++++------
 clang/lib/Serialization/ASTWriter.cpp | 22 +++++++++-------
 clang/test/Modules/pr166068.cppm      | 38 +++++++++++++++++++++++++++
 3 files changed, 65 insertions(+), 18 deletions(-)
 create mode 100644 clang/test/Modules/pr166068.cppm

diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index a04041c10b4ba..634bf991b2aee 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -4087,10 +4087,14 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
             std::errc::illegal_byte_sequence,
             "Invalid PENDING_IMPLICIT_INSTANTIATIONS block");
 
-      for (unsigned I = 0, N = Record.size(); I != N; /* in loop */) {
-        PendingInstantiations.push_back(
-            {ReadDeclID(F, Record, I),
-             ReadSourceLocation(F, Record, I).getRawEncoding()});
+      // For standard C++20 module, we will only reads the instantiations
+      // if it is the main file.
+      if (!F.StandardCXXModule || F.Kind == MK_MainFile) {
+        for (unsigned I = 0, N = Record.size(); I != N; /* in loop */) {
+          PendingInstantiations.push_back(
+              {ReadDeclID(F, Record, I),
+               ReadSourceLocation(F, Record, I).getRawEncoding()});
+        }
       }
       break;
 
@@ -6438,10 +6442,13 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F,
     case SUBMODULE_INITIALIZERS: {
       if (!ContextObj)
         break;
-      SmallVector<GlobalDeclID, 16> Inits;
-      for (unsigned I = 0; I < Record.size(); /*in loop*/)
-        Inits.push_back(ReadDeclID(F, Record, I));
-      ContextObj->addLazyModuleInitializers(CurrentModule, Inits);
+      // Standard C++ module has its own way to initialize variables.
+      if (!F.StandardCXXModule || F.Kind == MK_MainFile) {
+        SmallVector<GlobalDeclID, 16> Inits;
+        for (unsigned I = 0; I < Record.size(); /*in loop*/)
+          Inits.push_back(ReadDeclID(F, Record, I));
+        ContextObj->addLazyModuleInitializers(CurrentModule, Inits);
+      }
       break;
     }
 
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index 821e7df1bce53..e4618d60a8acb 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -3247,7 +3247,7 @@ void ASTWriter::WriteSubmodules(Module *WritingModule, ASTContext *Context) {
 
     // Emit the reachable initializers.
     // The initializer may only be unreachable in reduced BMI.
-    if (Context) {
+    if (Context && !GeneratingReducedBMI) {
       RecordData Inits;
       for (Decl *D : Context->getModuleInitializers(Mod))
         if (wasDeclEmitted(D))
@@ -5827,17 +5827,19 @@ void ASTWriter::WriteSpecialDeclRecords(Sema &SemaRef) {
     Stream.EmitRecord(UNUSED_LOCAL_TYPEDEF_NAME_CANDIDATES,
                       UnusedLocalTypedefNameCandidates);
 
-  // Write the record containing pending implicit instantiations.
-  RecordData PendingInstantiations;
-  for (const auto &I : SemaRef.PendingInstantiations) {
-    if (!wasDeclEmitted(I.first))
-      continue;
+  if (!GeneratingReducedBMI) {
+    // Write the record containing pending implicit instantiations.
+    RecordData PendingInstantiations;
+    for (const auto &I : SemaRef.PendingInstantiations) {
+      if (!wasDeclEmitted(I.first))
+        continue;
 
-    AddDeclRef(I.first, PendingInstantiations);
-    AddSourceLocation(I.second, PendingInstantiations);
+      AddDeclRef(I.first, PendingInstantiations);
+      AddSourceLocation(I.second, PendingInstantiations);
+    }
+    if (!PendingInstantiations.empty())
+      Stream.EmitRecord(PENDING_IMPLICIT_INSTANTIATIONS, PendingInstantiations);
   }
-  if (!PendingInstantiations.empty())
-    Stream.EmitRecord(PENDING_IMPLICIT_INSTANTIATIONS, PendingInstantiations);
 
   // Write the record containing declaration references of Sema.
   RecordData SemaDeclRefs;
diff --git a/clang/test/Modules/pr166068.cppm b/clang/test/Modules/pr166068.cppm
new file mode 100644
index 0000000000000..b6944b591d264
--- /dev/null
+++ b/clang/test/Modules/pr166068.cppm
@@ -0,0 +1,38 @@
+// RUN: rm -rf %t
+// RUN: mkdir -p %t
+// RUN: split-file %s %t
+//
+// RUN: %clang_cc1 -std=c++20 %t/flyweight.cppm -emit-reduced-module-interface -o %t/flyweight.pcm
+// RUN: %clang_cc1 -std=c++20 %t/account.cppm -emit-reduced-module-interface -o %t/account.pcm -fprebuilt-module-path=%t
+// RUN: %clang_cc1 -std=c++20 %t/core.cppm -emit-reduced-module-interface -o %t/core.pcm -fprebuilt-module-path=%t
+// RUN: %clang_cc1 -std=c++20 %t/core.cppm -fprebuilt-module-path=%t -emit-llvm -disable-llvm-passes -o - | FileCheck %t/core.cppm
+
+//--- flyweight.cppm
+module;
+template <typename> struct flyweight_core {
+  static bool init() { (void)__builtin_operator_new(2); return true; }
+  static bool static_initializer;
+};
+template <typename T> bool flyweight_core<T>::static_initializer = init();
+export module flyweight;
+export template <class> void flyweight() {
+  (void)flyweight_core<int>::static_initializer;
+}
+
+//--- account.cppm
+export module account;
+import flyweight;
+export void account() {
+  (void)::flyweight<int>;
+}
+
+//--- core.cppm
+export module core;
+import account;
+
+extern "C" void core() {}
+
+// Fine enough to check it won't crash.
+// CHECK-NOT: init
+// CHECK-NOT: static_initializer
+// CHECK: define {{.*}}@core(