From 441e5115224a00d4772ed899f59b3aab166f80de Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 11 Nov 2025 15:30:41 -0800 Subject: [PATCH 01/32] AMDGPU: Update register class numbers in test (#167601) --- llvm/test/CodeGen/AMDGPU/limit-coalesce.mir | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/limit-coalesce.mir b/llvm/test/CodeGen/AMDGPU/limit-coalesce.mir index e5b68b48158da..a245c475638f2 100644 --- a/llvm/test/CodeGen/AMDGPU/limit-coalesce.mir +++ b/llvm/test/CodeGen/AMDGPU/limit-coalesce.mir @@ -17,7 +17,7 @@ body: | ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub1:sgpr_64 = COPY $sgpr17 ; CHECK-NEXT: [[COPY:%[0-9]+]].sub0:sgpr_64 = COPY $sgpr16 ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; CHECK-NEXT: INLINEASM &"; def $0", 0 /* attdialect */, 3407882 /* regdef:VReg_64 */, def %4 + ; CHECK-NEXT: INLINEASM &"; def $0", 0 /* attdialect */, 2818058 /* regdef:VReg_64 */, def %4 ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:vreg_128 = COPY %4.sub1 ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR [[V_MOV_B32_e32_]], [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (s128), addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -26,7 +26,7 @@ body: | undef %2.sub0:sgpr_64 = COPY killed %1 %2.sub1:sgpr_64 = COPY killed %0 %3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - INLINEASM &"; def $0", 0 /* attdialect */, 3407882 /* regdef:VReg_64 */, def %4:vreg_64 + INLINEASM &"; def $0", 0 /* attdialect */, 2818058 /* regdef:VReg_64 */, def %4:vreg_64 undef %5.sub0:vreg_128 = COPY killed %4.sub1 GLOBAL_STORE_DWORDX4_SADDR killed %3, killed %5, killed %2, 0, 0, implicit $exec :: (store (s128), addrspace 1) SI_RETURN From e3a9ac5e24d08cef3160fe3e242a4afe1b6d95a4 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 11 Nov 2025 15:31:52 -0800 Subject: [PATCH 02/32] AMDGPU: Remove wrapper around TRI::getRegClass (#159885) This shadows the member in the base class, but differs slightly in behavior. The base method doesn't check for the invalid case. --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 7 ++++--- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 5 +++-- llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 11 ----------- llvm/lib/Target/AMDGPU/SIRegisterInfo.h | 2 -- 4 files changed, 7 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 293005c759e53..2c00e23d113cb 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -1284,10 +1284,11 @@ void SIFoldOperandsImpl::foldOperand( continue; const int SrcIdx = MovOp == AMDGPU::V_MOV_B16_t16_e64 ? 2 : 1; - const TargetRegisterClass *MovSrcRC = - TRI->getRegClass(TII->getOpRegClassID(MovDesc.operands()[SrcIdx])); - if (MovSrcRC) { + int16_t RegClassID = TII->getOpRegClassID(MovDesc.operands()[SrcIdx]); + if (RegClassID != -1) { + const TargetRegisterClass *MovSrcRC = TRI->getRegClass(RegClassID); + if (UseSubReg) MovSrcRC = TRI->getMatchingSuperRegClass(SrcRC, MovSrcRC, UseSubReg); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index e5f0e3e631988..4c4625b8834ee 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -6032,7 +6032,7 @@ const TargetRegisterClass *SIInstrInfo::getRegClass(const MCInstrDesc &TID, return nullptr; const MCOperandInfo &OpInfo = TID.operands()[OpNum]; int16_t RegClass = getOpRegClassID(OpInfo); - return RI.getRegClass(RegClass); + return RegClass < 0 ? nullptr : RI.getRegClass(RegClass); } const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI, @@ -6050,7 +6050,8 @@ const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI, return RI.getPhysRegBaseClass(Reg); } - return RI.getRegClass(getOpRegClassID(Desc.operands()[OpNo])); + int16_t RegClass = getOpRegClassID(Desc.operands()[OpNo]); + return RegClass < 0 ? nullptr : RI.getRegClass(RegClass); } void SIInstrInfo::legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const { diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 8fba74831811f..ad79bdf3190f0 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -3908,17 +3908,6 @@ const TargetRegisterClass *SIRegisterInfo::getVGPR64Class() const { : &AMDGPU::VReg_64RegClass; } -// FIXME: This should be deleted -const TargetRegisterClass * -SIRegisterInfo::getRegClass(unsigned RCID) const { - switch ((int)RCID) { - case -1: - return nullptr; - default: - return AMDGPUGenRegisterInfo::getRegClass(RCID); - } -} - // Find reaching register definition MachineInstr *SIRegisterInfo::findReachingDef(Register Reg, unsigned SubReg, MachineInstr &Use, diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h index 7b91ba7bc581f..813f6bb1a503a 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -391,8 +391,6 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo { MCRegister getExec() const; - const TargetRegisterClass *getRegClass(unsigned RCID) const; - // Find reaching register definition MachineInstr *findReachingDef(Register Reg, unsigned SubReg, MachineInstr &Use, From a664f584f9596bf61aa9be35967dc578e58f6ca3 Mon Sep 17 00:00:00 2001 From: Hanumanth Date: Tue, 11 Nov 2025 18:36:41 -0500 Subject: [PATCH 03/32] [mlir][memref] Fix runtime verification for memref.subview for empty memref subviews (#166581) This PR applies the same fix from #166569 to `memref.subview`. That PR fixed the issue for `tensor.extract_slice`, and this one addresses the identical problem for `memref.subview`. The runtime verification for `memref.subview` incorrectly rejects valid empty subviews (size=0) starting at the memref boundary. **Example that demonstrates the issue:** ```mlir func.func @subview_with_empty_slice(%memref: memref<10x4x1xf32, strided<[?, ?, ?], offset: ?>>, %dim_0: index, %dim_1: index, %dim_2: index, %offset: index) { // When called with: offset=10, dim_0=0, dim_1=4, dim_2=1 // Runtime verification fails: "offset 0 is out-of-bounds" %subview = memref.subview %memref[%offset, 0, 0] [%dim_0, %dim_1, %dim_2] [1, 1, 1] : memref<10x4x1xf32, strided<[?, ?, ?], offset: ?>> to memref> return } ``` When `%offset=10` and `%dim_0=0`, we're creating an empty subview (zero elements along dimension 0) starting at the boundary. The current verification enforces `offset < dim_size`, which evaluates to `10 < 10` and fails. I feel this should be valid since no memory is accessed. **The fix:** Same as #166569 - make the offset check conditional on subview size: - Empty subview (size == 0): allow `0 <= offset <= dim_size` - Non-empty subview (size > 0): require `0 <= offset < dim_size` Please see #166569 for motivation and rationale. --- Co-authored-by: Hanumanth Hanumantharayappa --- .../Transforms/RuntimeOpVerification.cpp | 89 ++++++++++++------- .../MemRef/subview-runtime-verification.mlir | 15 ++++ 2 files changed, 70 insertions(+), 34 deletions(-) diff --git a/mlir/lib/Dialect/MemRef/Transforms/RuntimeOpVerification.cpp b/mlir/lib/Dialect/MemRef/Transforms/RuntimeOpVerification.cpp index 14152c5a1af0c..e5cc41e2c43ba 100644 --- a/mlir/lib/Dialect/MemRef/Transforms/RuntimeOpVerification.cpp +++ b/mlir/lib/Dialect/MemRef/Transforms/RuntimeOpVerification.cpp @@ -268,61 +268,82 @@ struct SubViewOpInterface MemRefType sourceType = subView.getSource().getType(); // For each dimension, assert that: - // 0 <= offset < dim_size - // 0 <= offset + (size - 1) * stride < dim_size + // For empty slices (size == 0) : 0 <= offset <= dim_size + // For non-empty slices (size > 0): 0 <= offset < dim_size + // 0 <= offset + (size - 1) * stride + // dim_size Value zero = arith::ConstantIndexOp::create(builder, loc, 0); Value one = arith::ConstantIndexOp::create(builder, loc, 1); + auto metadataOp = ExtractStridedMetadataOp::create(builder, loc, subView.getSource()); + for (int64_t i : llvm::seq(0, sourceType.getRank())) { - // Reset insertion point to before the operation for each dimension + // Reset insertion point to before the operation for each dimension. builder.setInsertionPoint(subView); + Value offset = getValueOrCreateConstantIndexOp( builder, loc, subView.getMixedOffsets()[i]); Value size = getValueOrCreateConstantIndexOp(builder, loc, subView.getMixedSizes()[i]); Value stride = getValueOrCreateConstantIndexOp( builder, loc, subView.getMixedStrides()[i]); - - // Verify that offset is in-bounds. Value dimSize = metadataOp.getSizes()[i]; - Value offsetInBounds = - generateInBoundsCheck(builder, loc, offset, zero, dimSize); - cf::AssertOp::create(builder, loc, offsetInBounds, + + // Verify that offset is in-bounds (conditional on slice size). + Value sizeIsZero = arith::CmpIOp::create( + builder, loc, arith::CmpIPredicate::eq, size, zero); + auto offsetCheckIf = scf::IfOp::create( + builder, loc, sizeIsZero, + [&](OpBuilder &b, Location loc) { + // For empty slices, offset can be at the boundary: 0 <= offset <= + // dimSize. + Value offsetGEZero = arith::CmpIOp::create( + b, loc, arith::CmpIPredicate::sge, offset, zero); + Value offsetLEDimSize = arith::CmpIOp::create( + b, loc, arith::CmpIPredicate::sle, offset, dimSize); + Value emptyOffsetValid = + arith::AndIOp::create(b, loc, offsetGEZero, offsetLEDimSize); + scf::YieldOp::create(b, loc, emptyOffsetValid); + }, + [&](OpBuilder &b, Location loc) { + // For non-empty slices, offset must be a valid index: 0 <= offset + // dimSize. + Value offsetInBounds = + generateInBoundsCheck(b, loc, offset, zero, dimSize); + scf::YieldOp::create(b, loc, offsetInBounds); + }); + + Value offsetCondition = offsetCheckIf.getResult(0); + cf::AssertOp::create(builder, loc, offsetCondition, generateErrorMessage(op, "offset " + std::to_string(i) + " is out-of-bounds")); - // Only verify if size > 0 + // Verify that the slice endpoint is in-bounds (only for non-empty + // slices). Value sizeIsNonZero = arith::CmpIOp::create( builder, loc, arith::CmpIPredicate::sgt, size, zero); + auto ifOp = scf::IfOp::create( + builder, loc, sizeIsNonZero, + [&](OpBuilder &b, Location loc) { + // Verify that slice does not run out-of-bounds. + Value sizeMinusOne = arith::SubIOp::create(b, loc, size, one); + Value sizeMinusOneTimesStride = + arith::MulIOp::create(b, loc, sizeMinusOne, stride); + Value lastPos = + arith::AddIOp::create(b, loc, offset, sizeMinusOneTimesStride); + Value lastPosInBounds = + generateInBoundsCheck(b, loc, lastPos, zero, dimSize); + scf::YieldOp::create(b, loc, lastPosInBounds); + }, + [&](OpBuilder &b, Location loc) { + Value trueVal = + arith::ConstantOp::create(b, loc, b.getBoolAttr(true)); + scf::YieldOp::create(b, loc, trueVal); + }); - auto ifOp = scf::IfOp::create(builder, loc, builder.getI1Type(), - sizeIsNonZero, /*withElseRegion=*/true); - - // Populate the "then" region (for size > 0). - builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); - - // Verify that slice does not run out-of-bounds. - Value sizeMinusOne = arith::SubIOp::create(builder, loc, size, one); - Value sizeMinusOneTimesStride = - arith::MulIOp::create(builder, loc, sizeMinusOne, stride); - Value lastPos = - arith::AddIOp::create(builder, loc, offset, sizeMinusOneTimesStride); - Value lastPosInBounds = - generateInBoundsCheck(builder, loc, lastPos, zero, dimSize); - - scf::YieldOp::create(builder, loc, lastPosInBounds); - - // Populate the "else" region (for size == 0). - builder.setInsertionPointToStart(&ifOp.getElseRegion().front()); - Value trueVal = - arith::ConstantOp::create(builder, loc, builder.getBoolAttr(true)); - scf::YieldOp::create(builder, loc, trueVal); - - builder.setInsertionPointAfter(ifOp); Value finalCondition = ifOp.getResult(0); - cf::AssertOp::create( builder, loc, finalCondition, generateErrorMessage(op, diff --git a/mlir/test/Integration/Dialect/MemRef/subview-runtime-verification.mlir b/mlir/test/Integration/Dialect/MemRef/subview-runtime-verification.mlir index 84875675ac3d0..09cfee16ccd00 100644 --- a/mlir/test/Integration/Dialect/MemRef/subview-runtime-verification.mlir +++ b/mlir/test/Integration/Dialect/MemRef/subview-runtime-verification.mlir @@ -50,6 +50,17 @@ func.func @subview_zero_size_dim(%memref: memref<10x4x1xf32, strided<[?, ?, ?], return } +func.func @subview_with_empty_slice(%memref: memref<10x4x1xf32, strided<[?, ?, ?], offset: ?>>, + %dim_0: index, + %dim_1: index, + %dim_2: index, + %offset: index) { + %subview = memref.subview %memref[%offset, 0, 0] [%dim_0, %dim_1, %dim_2] [1, 1, 1] : + memref<10x4x1xf32, strided<[?, ?, ?], offset: ?>> to + memref> + return +} + func.func @main() { %0 = arith.constant 0 : index @@ -127,5 +138,9 @@ func.func @main() { func.call @subview_zero_size_dim(%alloca_10x4x1_dyn_stride, %dim_0, %dim_1, %dim_2) : (memref<10x4x1xf32, strided<[?, ?, ?], offset: ?>>, index, index, index) -> () + // CHECK-NOT: ERROR: Runtime op verification failed + %offset = arith.constant 10 : index + func.call @subview_with_empty_slice(%alloca_10x4x1_dyn_stride, %dim_0, %dim_1, %dim_2, %offset) + : (memref<10x4x1xf32, strided<[?, ?, ?], offset: ?>>, index, index, index, index) -> () return } From 81964597f9918e1f294f5b9de27ee662005b8c58 Mon Sep 17 00:00:00 2001 From: Hanumanth Date: Tue, 11 Nov 2025 18:37:15 -0500 Subject: [PATCH 04/32] [mlir][tensor] Fix runtime verification for tensor.extract_slice for empty tensor slices (#166569) I hit another runtime verification issue (similar to https://github.com/llvm/llvm-project/pull/164878) while working with TFLite models. The verifier is incorrectly rejecting `tensor.extract_slice` operations when extracting an empty slice (size=0) that starts exactly at the tensor boundary. The current runtime verification unconditionally enforces `offset < dim_size`. This makes sense for non-empty slices, but it's too strict for empty slices, causing false positives that lead to spurious runtime assertions. **Simple example that demonstrates the issue:** ```mlir func.func @extract_empty_slice(%tensor: tensor, %offset: index, %size: index) { // When called with: tensor size=10, offset=10, size=0 // Runtime verification fails: "offset 0 is out-of-bounds" %slice = tensor.extract_slice %tensor[%offset] [%size] [1] : tensor to tensor return } ``` For the above example, the check evaluates `10 < 10` which is false, so verification fails. However, I believe this operation should be valid - we're extracting zero elements, so there's no actual out-of-bounds access. **Real-world repro from the TensorFlow Lite models:** This issue manifests while lowering TFLite models and a lot of our system tests are failing due to this. Here's a simplified version showing the problematic pattern: In this code, `%extracted_slice_0` becomes an empty tensor when SSA value `%15` reaches 10 (on the final loop iteration), making `%16 = 0`. The operation extracts zero elements along dimension 0, which is semantically valid but fails runtime verification. ```mlir func.func @simplified_repro_from_tensorflowlite_model(%arg0: tensor<10x4x1xf32>) -> tensor<10x4x1xf32> { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index %c2 = arith.constant 2 : index %c10 = arith.constant 10 : index %c-1 = arith.constant -1 : index %0 = "tosa.const"() <{values = dense<0> : tensor}> : () -> tensor %1 = "tosa.const"() <{values = dense<1> : tensor}> : () -> tensor %2 = "tosa.const"() <{values = dense<10> : tensor}> : () -> tensor %3 = "tosa.const"() <{values = dense<-1> : tensor<2xi32>}> : () -> tensor<2xi32> %4 = "tosa.const"() <{values = dense<0> : tensor<2xi32>}> : () -> tensor<2xi32> %5 = "tosa.const"() <{values = dense<0.000000e+00> : tensor<1x4x1xf32>}> : () -> tensor<1x4x1xf32> %c4_1 = tosa.const_shape {values = dense<1> : tensor<1xindex>} : () -> !tosa.shape<1> %6:2 = scf.while (%arg1 = %0, %arg2 = %arg0) : (tensor, tensor<10x4x1xf32>) -> (tensor, tensor<10x4x1xf32>) { %7 = tosa.greater %2, %arg1 : (tensor, tensor) -> tensor %extracted = tensor.extract %7[] : tensor scf.condition(%extracted) %arg1, %arg2 : tensor, tensor<10x4x1xf32> } do { ^bb0(%arg1: tensor, %arg2: tensor<10x4x1xf32>): %7 = tosa.add %arg1, %1 : (tensor, tensor) -> tensor // First slice %8 = tosa.reshape %arg1, %c4_1 : (tensor, !tosa.shape<1>) -> tensor<1xi32> %9 = tosa.concat %8, %3 {axis = 0 : i32} : (tensor<1xi32>, tensor<2xi32>) -> tensor<3xi32> %extracted_0 = tensor.extract %9[%c0] : tensor<3xi32> %10 = index.casts %extracted_0 : i32 to index %11 = arith.cmpi eq, %10, %c-1 : index %12 = arith.select %11, %c10, %10 : index %extracted_slice = tensor.extract_slice %arg2[0, 0, 0] [%12, 4, 1] [1, 1, 1] : tensor<10x4x1xf32> to tensor // Second slice - this is where the failure occurs %13 = tosa.reshape %7, %c4_1 : (tensor, !tosa.shape<1>) -> tensor<1xi32> %14 = tosa.concat %13, %4 {axis = 0 : i32} : (tensor<1xi32>, tensor<2xi32>) -> tensor<3xi32> %extracted_1 = tensor.extract %14[%c0] : tensor<3xi32> %15 = index.castu %extracted_1 : i32 to index %16 = arith.subi %c10, %15 : index // size = 10 - offset %extracted_2 = tensor.extract %14[%c1] : tensor<3xi32> %17 = index.castu %extracted_2 : i32 to index %extracted_3 = tensor.extract %14[%c2] : tensor<3xi32> %18 = index.castu %extracted_3 : i32 to index // On the last loop iteration: %15=10, %16=0 // %extracted_slice_0 becomes an empty tensor // Runtime verification fails: "offset 0 is out-of-bounds" %extracted_slice_0 = tensor.extract_slice %arg2[%15, %17, %18] [%16, 4, 1] [1, 1, 1] : tensor<10x4x1xf32> to tensor %19 = tosa.concat %extracted_slice, %5, %extracted_slice_0 {axis = 0 : i32} : (tensor, tensor<1x4x1xf32>, tensor) -> tensor<10x4x1xf32> scf.yield %7, %19 : tensor, tensor<10x4x1xf32> } return %6#1 : tensor<10x4x1xf32> } ``` **The fix:** Make the offset check conditional on slice size: - Empty slice (size == 0): allow `0 <= offset <= dim_size` - Non-empty slice (size > 0): require `0 <= offset < dim_size` **Question for reviewers:** Should we also relax the static verifier to allow this edge case? Currently, the static verifier rejects the following IR: ```mlir %tensor = arith.constant dense<1.0> : tensor<10xf32> %slice = tensor.extract_slice %tensor[10] [0] [1] : tensor<10xf32> to tensor<0xf32> ``` Since we're allowing it at runtime for dynamic shapes, it seems inconsistent to reject it statically. However, I wanted to get feedback before making that change - this PR focuses only on the runtime verification fix for dynamic shapes. P.S. We have a similar issue with `memref.subview`. I will send a separate patch for the issue. Co-authored-by: Hanumanth Hanumantharayappa --- .../Transforms/RuntimeOpVerification.cpp | 85 ++++++++++++------- .../extract_slice-runtime-verification.mlir | 9 ++ 2 files changed, 61 insertions(+), 33 deletions(-) diff --git a/mlir/lib/Dialect/Tensor/Transforms/RuntimeOpVerification.cpp b/mlir/lib/Dialect/Tensor/Transforms/RuntimeOpVerification.cpp index 753cb95b1c906..d35f458cbdb36 100644 --- a/mlir/lib/Dialect/Tensor/Transforms/RuntimeOpVerification.cpp +++ b/mlir/lib/Dialect/Tensor/Transforms/RuntimeOpVerification.cpp @@ -155,13 +155,15 @@ struct ExtractSliceOpInterface RankedTensorType sourceType = extractSliceOp.getSource().getType(); // For each dimension, assert that: - // 0 <= offset < dim_size - // 0 <= offset + (size - 1) * stride < dim_size + // For empty slices (size == 0) : 0 <= offset <= dim_size + // For non-empty slices (size > 0): 0 <= offset < dim_size + // 0 <= offset + (size - 1) * stride < + // dim_size Value zero = arith::ConstantIndexOp::create(builder, loc, 0); Value one = arith::ConstantIndexOp::create(builder, loc, 1); for (int64_t i : llvm::seq(0, sourceType.getRank())) { - // Reset insertion point to before the operation for each dimension + builder.setInsertionPoint(extractSliceOp); Value offset = getValueOrCreateConstantIndexOp( @@ -170,46 +172,63 @@ struct ExtractSliceOpInterface builder, loc, extractSliceOp.getMixedSizes()[i]); Value stride = getValueOrCreateConstantIndexOp( builder, loc, extractSliceOp.getMixedStrides()[i]); - - // Verify that offset is in-bounds. Value dimSize = builder.createOrFold( loc, extractSliceOp.getSource(), i); - Value offsetInBounds = - generateInBoundsCheck(builder, loc, offset, zero, dimSize); - cf::AssertOp::create(builder, loc, offsetInBounds, + + // Verify that offset is in-bounds (conditional on slice size). + Value sizeIsZero = arith::CmpIOp::create( + builder, loc, arith::CmpIPredicate::eq, size, zero); + auto offsetCheckIf = scf::IfOp::create( + builder, loc, sizeIsZero, + [&](OpBuilder &b, Location loc) { + // For empty slices, offset can be at the boundary: 0 <= offset <= + // dimSize. + Value offsetGEZero = arith::CmpIOp::create( + b, loc, arith::CmpIPredicate::sge, offset, zero); + Value offsetLEDimSize = arith::CmpIOp::create( + b, loc, arith::CmpIPredicate::sle, offset, dimSize); + Value emptyOffsetValid = + arith::AndIOp::create(b, loc, offsetGEZero, offsetLEDimSize); + scf::YieldOp::create(b, loc, emptyOffsetValid); + }, + [&](OpBuilder &b, Location loc) { + // For non-empty slices, offset must be a valid index: 0 <= offset < + // dimSize. + Value offsetInBounds = + generateInBoundsCheck(b, loc, offset, zero, dimSize); + scf::YieldOp::create(b, loc, offsetInBounds); + }); + + Value offsetCondition = offsetCheckIf.getResult(0); + cf::AssertOp::create(builder, loc, offsetCondition, generateErrorMessage(op, "offset " + std::to_string(i) + " is out-of-bounds")); - // Only verify if size > 0 + // Verify that the slice endpoint is in-bounds (only for non-empty + // slices). Value sizeIsNonZero = arith::CmpIOp::create( builder, loc, arith::CmpIPredicate::sgt, size, zero); + auto ifOp = scf::IfOp::create( + builder, loc, sizeIsNonZero, + [&](OpBuilder &b, Location loc) { + // Verify that slice does not run out-of-bounds. + Value sizeMinusOne = arith::SubIOp::create(b, loc, size, one); + Value sizeMinusOneTimesStride = + arith::MulIOp::create(b, loc, sizeMinusOne, stride); + Value lastPos = + arith::AddIOp::create(b, loc, offset, sizeMinusOneTimesStride); + Value lastPosInBounds = + generateInBoundsCheck(b, loc, lastPos, zero, dimSize); + scf::YieldOp::create(b, loc, lastPosInBounds); + }, + [&](OpBuilder &b, Location loc) { + Value trueVal = + arith::ConstantOp::create(b, loc, b.getBoolAttr(true)); + scf::YieldOp::create(b, loc, trueVal); + }); - auto ifOp = scf::IfOp::create(builder, loc, builder.getI1Type(), - sizeIsNonZero, /*withElseRegion=*/true); - - // Populate the "then" region (for size > 0). - builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); - - // Verify that slice does not run out-of-bounds. - Value sizeMinusOne = arith::SubIOp::create(builder, loc, size, one); - Value sizeMinusOneTimesStride = - arith::MulIOp::create(builder, loc, sizeMinusOne, stride); - Value lastPos = - arith::AddIOp::create(builder, loc, offset, sizeMinusOneTimesStride); - Value lastPosInBounds = - generateInBoundsCheck(builder, loc, lastPos, zero, dimSize); - scf::YieldOp::create(builder, loc, lastPosInBounds); - - // Populate the "else" region (for size == 0). - builder.setInsertionPointToStart(&ifOp.getElseRegion().front()); - Value trueVal = - arith::ConstantOp::create(builder, loc, builder.getBoolAttr(true)); - scf::YieldOp::create(builder, loc, trueVal); - - builder.setInsertionPointAfter(ifOp); Value finalCondition = ifOp.getResult(0); - cf::AssertOp::create( builder, loc, finalCondition, generateErrorMessage( diff --git a/mlir/test/Integration/Dialect/Tensor/extract_slice-runtime-verification.mlir b/mlir/test/Integration/Dialect/Tensor/extract_slice-runtime-verification.mlir index a77fa310a3699..745eea37f7fca 100644 --- a/mlir/test/Integration/Dialect/Tensor/extract_slice-runtime-verification.mlir +++ b/mlir/test/Integration/Dialect/Tensor/extract_slice-runtime-verification.mlir @@ -39,6 +39,11 @@ func.func @extract_slice_zero_size_dim(%arg0: tensor<10x4x1xf32>, %dim_0: index, return } +func.func @extract_slice_empty_tensor(%arg0: tensor<10x4x1xf32>, %dim_0: index, %dim_1: index, %dim_2: index, %offset: index) { + tensor.extract_slice %arg0[%offset, 0, 0] [%dim_0, %dim_1, %dim_2] [1, 1, 1] : tensor<10x4x1xf32> to tensor + return +} + func.func @main() { %0 = arith.constant 0 : index @@ -115,5 +120,9 @@ func.func @main() { %dim_2 = arith.constant 1 : index func.call @extract_slice_zero_size_dim(%cst10x4x1xf32, %dim_0, %dim_1, %dim_2) : (tensor<10x4x1xf32>, index, index, index) -> () + // CHECK-NOT: ERROR: Runtime op verification failed + %offset = arith.constant 10 : index + func.call @extract_slice_empty_tensor(%cst10x4x1xf32, %dim_0, %dim_1, %dim_2, %offset) : (tensor<10x4x1xf32>, index, index, index, index) -> () + return } From b07f8b0d98d9679ef2af91dc2a8fa8d7155157e6 Mon Sep 17 00:00:00 2001 From: anoopkg6 Date: Tue, 11 Nov 2025 17:46:30 -0600 Subject: [PATCH 05/32] [JITLINK] Fix large offset issue (#167600) Removed large offset test. It caused issue with ARM 32-bit because of large offset. Co-authored-by: anoopkg6 --- .../JITLink/systemz/ELF_systemz_reloc_pcdbl.s | 30 +++++++------------ 1 file changed, 10 insertions(+), 20 deletions(-) diff --git a/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_reloc_pcdbl.s b/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_reloc_pcdbl.s index efe8357e76bef..6a7ca8bd6e2a6 100644 --- a/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_reloc_pcdbl.s +++ b/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_reloc_pcdbl.s @@ -1,33 +1,34 @@ # REQUIRES: system-linux # RUN: llvm-mc -triple=systemz-unknown-linux -mcpu=z16 -position-independent \ # RUN: -defsym OFF12=0xffe -defsym OFF16=4 -defsym OFF24=6 \ -# RUN: -defsym OFF32=6 -filetype=obj -o %t.o %s +# RUN: -filetype=obj -o %t.o %s # # RUN: llvm-jitlink -noexec -abs OFF12=0xffe -abs OFF16=4 -abs OFF24=6 \ -# RUN: -abs OFF32=6 -check=%s %t.o +# RUN: -check=%s %t.o # # RUN: llvm-mc -triple=systemz-unknown-linux -mcpu=z16 -position-independent \ # RUN: -defsym OFF12=6 -defsym OFF16=0xfffe -defsym OFF24=6 \ -# RUN: -defsym OFF32=6 -filetype=obj -o %t.o %s +# RUN: -filetype=obj -o %t.o %s # # RUN: llvm-jitlink -noexec -abs OFF12=6 -abs OFF16=0xfffe -abs OFF24=6 \ -# RUN: -abs OFF32=6 -check=%s %t.o +# RUN: -check=%s %t.o # # RUN: llvm-mc -triple=systemz-unknown-linux -mcpu=z16 -position-independent \ # RUN: -defsym OFF12=6 -defsym OFF16=4 -defsym OFF24=0xfffffe \ -# RUN: -defsym OFF32=6 -filetype=obj -o %t.o %s +# RUN: -filetype=obj -o %t.o %s # # RUN: llvm-jitlink -noexec -abs OFF12=6 -abs OFF16=4 -abs OFF24=0xfffffe \ -# RUN: -abs OFF32=6 -check=%s %t.o +# RUN: -check=%s %t.o # # RUN: llvm-mc -triple=systemz-unknown-linux -mcpu=z16 -position-independent \ # RUN: -defsym OFF12=6 -defsym OFF16=4 -defsym OFF24=6 \ -# RUN: -defsym OFF32=0xffffffc8 -filetype=obj -o %t.o %s +# RUN: -filetype=obj -o %t.o %s # # RUN: llvm-jitlink -noexec -abs OFF12=6 -abs OFF16=4 -abs OFF24=6 \ -# RUN: -abs OFF32=0xffffffc8 -check=%s %t.o +# RUN: -check=%s %t.o -# Check R_390_PC*dbl relocations. +# Check R_390_PC*dbl relocations. R_390_PC32_DBL test is in +# ELF_systemz_reloc_abs32.s because of large offset. .text .section .text.main @@ -49,17 +50,6 @@ test_pc16dbl: jne test_pc16dbl .size test_pc16dbl,.-test_pc16dbl -# R_390_PC32DBL -# jitlink-check: *{4}(test_pc32dbl + 2) = (OFF32 >> 1) - .globl test_pc32dbl - .p2align 3 -test_pc32dbl: - jge .Lpc32dbl - .space OFF32 - 6 -.Lpc32dbl: - jgne test_pc32dbl - .size test_pc32dbl,.-test_pc32dbl - # R_390_PC12DBL # jitlink-check: ((*{2} (test_pc12dbl + 1)) & 0x0fff) = (OFF12 >> 1) .globl test_pc12dbl From ce175995533c7407a00d72633f867bf93d4d4499 Mon Sep 17 00:00:00 2001 From: Asher Mancinelli Date: Tue, 11 Nov 2025 15:49:45 -0800 Subject: [PATCH 06/32] [MLIR][Python] Add wrappers for scf.index_switch (#167458) The C++ index switch op has utilities for `getCaseBlock(int i)` and `getDefaultBlock()`, so these have been added. Optional body builder args have been added: one for the default case and one for the switch cases. --- mlir/python/mlir/dialects/scf.py | 75 ++++++++++++++++++ mlir/test/python/dialects/scf.py | 126 ++++++++++++++++++++++++++++++- mlir/test/python/ir/operation.py | 4 +- 3 files changed, 198 insertions(+), 7 deletions(-) diff --git a/mlir/python/mlir/dialects/scf.py b/mlir/python/mlir/dialects/scf.py index 678ceeebac204..9e22df3dd50a9 100644 --- a/mlir/python/mlir/dialects/scf.py +++ b/mlir/python/mlir/dialects/scf.py @@ -12,6 +12,7 @@ from ._ods_common import ( get_op_result_or_value as _get_op_result_or_value, get_op_results_or_values as _get_op_results_or_values, + get_op_result_or_op_results as _get_op_result_or_op_results, _cext as _ods_cext, ) except ImportError as e: @@ -254,3 +255,77 @@ def for_( yield iv, iter_args[0], for_op.results[0] else: yield iv + + +@_ods_cext.register_operation(_Dialect, replace=True) +class IndexSwitchOp(IndexSwitchOp): + __doc__ = IndexSwitchOp.__doc__ + + def __init__( + self, + results, + arg, + cases, + case_body_builder=None, + default_body_builder=None, + loc=None, + ip=None, + ): + cases = DenseI64ArrayAttr.get(cases) + super().__init__( + results, arg, cases, num_caseRegions=len(cases), loc=loc, ip=ip + ) + for region in self.regions: + region.blocks.append() + + if default_body_builder is not None: + with InsertionPoint(self.default_block): + default_body_builder(self) + + if case_body_builder is not None: + for i, case in enumerate(cases): + with InsertionPoint(self.case_block(i)): + case_body_builder(self, i, self.cases[i]) + + @property + def default_region(self) -> Region: + return self.regions[0] + + @property + def default_block(self) -> Block: + return self.default_region.blocks[0] + + @property + def case_regions(self) -> Sequence[Region]: + return self.regions[1:] + + def case_region(self, i: int) -> Region: + return self.case_regions[i] + + @property + def case_blocks(self) -> Sequence[Block]: + return [region.blocks[0] for region in self.case_regions] + + def case_block(self, i: int) -> Block: + return self.case_regions[i].blocks[0] + + +def index_switch( + results, + arg, + cases, + case_body_builder=None, + default_body_builder=None, + loc=None, + ip=None, +) -> Union[OpResult, OpResultList, IndexSwitchOp]: + op = IndexSwitchOp( + results=results, + arg=arg, + cases=cases, + case_body_builder=case_body_builder, + default_body_builder=default_body_builder, + loc=loc, + ip=ip, + ) + return _get_op_result_or_op_results(op) diff --git a/mlir/test/python/dialects/scf.py b/mlir/test/python/dialects/scf.py index 62d11d5e189c8..0c0c9b986562b 100644 --- a/mlir/test/python/dialects/scf.py +++ b/mlir/test/python/dialects/scf.py @@ -1,10 +1,14 @@ # RUN: %PYTHON %s | FileCheck %s from mlir.ir import * -from mlir.dialects import arith -from mlir.dialects import func -from mlir.dialects import memref -from mlir.dialects import scf +from mlir.extras import types as T +from mlir.dialects import ( + arith, + func, + memref, + scf, + cf, +) from mlir.passmanager import PassManager @@ -355,3 +359,117 @@ def simple_if_else(cond): # CHECK: scf.yield %[[TWO]], %[[THREE]] # CHECK: arith.addi %[[RET]]#0, %[[RET]]#1 # CHECK: return + + +@constructAndPrintInModule +def testIndexSwitch(): + i32 = T.i32() + + @func.FuncOp.from_py_func(T.index(), results=[i32]) + def index_switch(index): + c1 = arith.constant(i32, 1) + c0 = arith.constant(i32, 0) + value = arith.constant(i32, 5) + switch_op = scf.IndexSwitchOp([i32], index, range(3)) + + assert switch_op.regions[0] == switch_op.default_region + assert switch_op.regions[1] == switch_op.case_regions[0] + assert switch_op.regions[1] == switch_op.case_region(0) + assert len(switch_op.case_regions) == 3 + assert len(switch_op.regions) == 4 + + with InsertionPoint(switch_op.default_block): + cf.assert_(arith.constant(T.bool(), 0), "Whoops!") + scf.yield_([c1]) + + for i, block in enumerate(switch_op.case_blocks): + with InsertionPoint(block): + scf.yield_([arith.constant(i32, i)]) + + func.return_([switch_op.results[0]]) + + return index_switch + + +# CHECK-LABEL: func.func @index_switch( +# CHECK-SAME: %[[ARG0:.*]]: index) -> i32 { +# CHECK: %[[CONSTANT_0:.*]] = arith.constant 1 : i32 +# CHECK: %[[CONSTANT_1:.*]] = arith.constant 0 : i32 +# CHECK: %[[CONSTANT_2:.*]] = arith.constant 5 : i32 +# CHECK: %[[INDEX_SWITCH_0:.*]] = scf.index_switch %[[ARG0]] -> i32 +# CHECK: case 0 { +# CHECK: %[[CONSTANT_3:.*]] = arith.constant 0 : i32 +# CHECK: scf.yield %[[CONSTANT_3]] : i32 +# CHECK: } +# CHECK: case 1 { +# CHECK: %[[CONSTANT_4:.*]] = arith.constant 1 : i32 +# CHECK: scf.yield %[[CONSTANT_4]] : i32 +# CHECK: } +# CHECK: case 2 { +# CHECK: %[[CONSTANT_5:.*]] = arith.constant 2 : i32 +# CHECK: scf.yield %[[CONSTANT_5]] : i32 +# CHECK: } +# CHECK: default { +# CHECK: %[[CONSTANT_6:.*]] = arith.constant false +# CHECK: cf.assert %[[CONSTANT_6]], "Whoops!" +# CHECK: scf.yield %[[CONSTANT_0]] : i32 +# CHECK: } +# CHECK: return %[[INDEX_SWITCH_0]] : i32 +# CHECK: } + + +@constructAndPrintInModule +def testIndexSwitchWithBodyBuilders(): + i32 = T.i32() + + @func.FuncOp.from_py_func(T.index(), results=[i32]) + def index_switch(index): + c1 = arith.constant(i32, 1) + c0 = arith.constant(i32, 0) + value = arith.constant(i32, 5) + + def default_body_builder(switch_op): + cf.assert_(arith.constant(T.bool(), 0), "Whoops!") + scf.yield_([c1]) + + def case_body_builder(switch_op, case_index: int, case_value: int): + scf.yield_([arith.constant(i32, case_value)]) + + result = scf.index_switch( + results=[i32], + arg=index, + cases=range(3), + case_body_builder=case_body_builder, + default_body_builder=default_body_builder, + ) + + func.return_([result]) + + return index_switch + + +# CHECK-LABEL: func.func @index_switch( +# CHECK-SAME: %[[ARG0:.*]]: index) -> i32 { +# CHECK: %[[CONSTANT_0:.*]] = arith.constant 1 : i32 +# CHECK: %[[CONSTANT_1:.*]] = arith.constant 0 : i32 +# CHECK: %[[CONSTANT_2:.*]] = arith.constant 5 : i32 +# CHECK: %[[INDEX_SWITCH_0:.*]] = scf.index_switch %[[ARG0]] -> i32 +# CHECK: case 0 { +# CHECK: %[[CONSTANT_3:.*]] = arith.constant 0 : i32 +# CHECK: scf.yield %[[CONSTANT_3]] : i32 +# CHECK: } +# CHECK: case 1 { +# CHECK: %[[CONSTANT_4:.*]] = arith.constant 1 : i32 +# CHECK: scf.yield %[[CONSTANT_4]] : i32 +# CHECK: } +# CHECK: case 2 { +# CHECK: %[[CONSTANT_5:.*]] = arith.constant 2 : i32 +# CHECK: scf.yield %[[CONSTANT_5]] : i32 +# CHECK: } +# CHECK: default { +# CHECK: %[[CONSTANT_6:.*]] = arith.constant false +# CHECK: cf.assert %[[CONSTANT_6]], "Whoops!" +# CHECK: scf.yield %[[CONSTANT_0]] : i32 +# CHECK: } +# CHECK: return %[[INDEX_SWITCH_0]] : i32 +# CHECK: } diff --git a/mlir/test/python/ir/operation.py b/mlir/test/python/ir/operation.py index 1bdd345d98c05..66ba5d28e49b2 100644 --- a/mlir/test/python/ir/operation.py +++ b/mlir/test/python/ir/operation.py @@ -1212,9 +1212,7 @@ def testIndexSwitch(): @func.FuncOp.from_py_func(T.index()) def index_switch(index): c1 = arith.constant(i32, 1) - switch_op = scf.IndexSwitchOp( - results_=[i32], arg=index, cases=range(3), num_caseRegions=3 - ) + switch_op = scf.IndexSwitchOp(results=[i32], arg=index, cases=range(3)) assert len(switch_op.regions) == 4 assert len(switch_op.regions[2:]) == 2 From 95dfe79cac47f2a4280b5b67e193c94c412ae600 Mon Sep 17 00:00:00 2001 From: Prabhu Rajasekaran Date: Tue, 11 Nov 2025 16:04:39 -0800 Subject: [PATCH 07/32] [MachO] Fix test failure. (#167598) Add requires to not run `invalid-section-index.s` test in non aarch64 supported environments. --- llvm/test/MC/MachO/invalid-section-index.s | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/llvm/test/MC/MachO/invalid-section-index.s b/llvm/test/MC/MachO/invalid-section-index.s index 55a0ce5b40ea7..104e8a82e43af 100644 --- a/llvm/test/MC/MachO/invalid-section-index.s +++ b/llvm/test/MC/MachO/invalid-section-index.s @@ -1,6 +1,8 @@ +// REQUIRES: aarch64-registered-target + /// Test that when there are more than 255 sections, error is shown specifying too many sections. -// RUN: not llvm-mc -filetype=obj -triple arm64-apple-macos %s -o - 2>&1 | FileCheck %s --check-prefix=MACHOERROR +// RUN: not llvm-mc -filetype=obj -triple arm64-apple-darwin %s -o - 2>&1 | FileCheck %s --check-prefix=MACHOERROR // MACHOERROR: error: Too many sections! // MACHOERROR-NEXT: error: Invalid section index! From fcba3040107944604904aeb146c26ec0628160f4 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 11 Nov 2025 16:05:05 -0800 Subject: [PATCH 08/32] AMDGPU: Remove override of TargetInstrInfo::getRegClass (#159886) This should not be overridable and the special case hacks have been replaced with RegClassByHwMode --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 12 ------------ llvm/lib/Target/AMDGPU/SIInstrInfo.h | 3 --- 2 files changed, 15 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 4c4625b8834ee..6e39a2de9b805 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -6023,18 +6023,6 @@ SIInstrInfo::getWholeWaveFunctionSetup(MachineFunction &MF) const { llvm_unreachable("Couldn't find SI_SETUP_WHOLE_WAVE_FUNC instruction"); } -// FIXME: This should not be an overridable function. All subtarget dependent -// operand modifications should go through isLookupRegClassByHwMode in the -// generic handling. -const TargetRegisterClass *SIInstrInfo::getRegClass(const MCInstrDesc &TID, - unsigned OpNum) const { - if (OpNum >= TID.getNumOperands()) - return nullptr; - const MCOperandInfo &OpInfo = TID.operands()[OpNum]; - int16_t RegClass = getOpRegClassID(OpInfo); - return RegClass < 0 ? nullptr : RI.getRegClass(RegClass); -} - const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI, unsigned OpNo) const { const MCInstrDesc &Desc = get(MI.getOpcode()); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index c048b85b1e99a..2ecd94186e1e0 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -1619,9 +1619,6 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { /// Return true if this opcode should not be used by codegen. bool isAsmOnlyOpcode(int MCOp) const; - const TargetRegisterClass *getRegClass(const MCInstrDesc &TID, - unsigned OpNum) const override; - void fixImplicitOperands(MachineInstr &MI) const; MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, From 362119d03cb5156599825cd43ceff65b4dd54068 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Wed, 12 Nov 2025 00:11:10 +0000 Subject: [PATCH 09/32] [gn build] Port 5c3323a59fd2 --- llvm/utils/gn/secondary/libcxx/include/BUILD.gn | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index b36466b3fac8b..3ca6b7fa0565e 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -1712,7 +1712,6 @@ if (current_toolchain == default_toolchain) { "sstream", "stack", "stdatomic.h", - "stdbool.h", "stddef.h", "stdexcept", "stdio.h", From 2acd65288696c854028d2e61482e0b884af44062 Mon Sep 17 00:00:00 2001 From: jeffreytan81 Date: Tue, 11 Nov 2025 16:31:57 -0800 Subject: [PATCH 10/32] Fix lldb-dap non-leaf frame source resolution issue (#165944) Summary ------- While dogfooding lldb-dap, I observed that VSCode frequently displays certain stack frames as greyed out. Although these frames have valid debug information, double-clicking them shows disassembly instead of source code. However, running `bt` from the LLDB command line correctly displays source file and line information for these same frames, indicating this is an lldb-dap specific issue. Root Cause ---------- Investigation revealed that `DAP::ResolveSource()` incorrectly uses a frame's PC address directly to determine whether valid source line information exists. This approach works for leaf frames, but fails for non-leaf (caller) frames where the PC points to the return address immediately after a call instruction. This return address may fall into compiler-generated code with no associated line information, even though the actual call site has valid source location data. The correct approach is to use the symbol context's line entry, which LLDB resolves by effectively checking PC-1 for non-leaf frames, properly identifying the line information for the call instruction rather than the return address. Testing ------- Manually tested with VSCode debugging sessions on production workloads. Verified that non-leaf frames now correctly display source code instead of disassembly view. Before the change symptom: image And here is after the fix: image --------- Co-authored-by: Jeffrey Tan --- .../stackTraceCompilerGeneratedCode/Makefile | 3 + ...TestDAP_stackTraceCompilerGeneratedCode.py | 66 +++++++++++++++++++ .../stackTraceCompilerGeneratedCode/main.c | 19 ++++++ lldb/tools/lldb-dap/DAP.cpp | 12 ++-- lldb/tools/lldb-dap/ProtocolUtils.cpp | 7 +- lldb/tools/lldb-dap/ProtocolUtils.h | 3 +- 6 files changed, 100 insertions(+), 10 deletions(-) create mode 100644 lldb/test/API/tools/lldb-dap/stackTraceCompilerGeneratedCode/Makefile create mode 100644 lldb/test/API/tools/lldb-dap/stackTraceCompilerGeneratedCode/TestDAP_stackTraceCompilerGeneratedCode.py create mode 100644 lldb/test/API/tools/lldb-dap/stackTraceCompilerGeneratedCode/main.c diff --git a/lldb/test/API/tools/lldb-dap/stackTraceCompilerGeneratedCode/Makefile b/lldb/test/API/tools/lldb-dap/stackTraceCompilerGeneratedCode/Makefile new file mode 100644 index 0000000000000..10495940055b6 --- /dev/null +++ b/lldb/test/API/tools/lldb-dap/stackTraceCompilerGeneratedCode/Makefile @@ -0,0 +1,3 @@ +C_SOURCES := main.c + +include Makefile.rules diff --git a/lldb/test/API/tools/lldb-dap/stackTraceCompilerGeneratedCode/TestDAP_stackTraceCompilerGeneratedCode.py b/lldb/test/API/tools/lldb-dap/stackTraceCompilerGeneratedCode/TestDAP_stackTraceCompilerGeneratedCode.py new file mode 100644 index 0000000000000..4ddf92402ad8a --- /dev/null +++ b/lldb/test/API/tools/lldb-dap/stackTraceCompilerGeneratedCode/TestDAP_stackTraceCompilerGeneratedCode.py @@ -0,0 +1,66 @@ +""" +Test lldb-dap stackTrace request for compiler generated code +""" + +import os + +import lldbdap_testcase +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * + + +class TestDAP_stackTraceCompilerGeneratedCode(lldbdap_testcase.DAPTestCaseBase): + def test_non_leaf_frame_compiler_generate_code(self): + """ + Test that non-leaf frames with compiler-generated code are properly resolved. + + This test verifies that LLDB correctly handles stack frames containing + compiler-generated code (code without valid source location information). + When a non-leaf frame contains compiler-generated code immediately after a + call instruction, LLDB should resolve the frame's source location to the + call instruction's line, rather than to the compiler-generated code that + follows, which lacks proper symbolication information. + """ + program = self.getBuildArtifact("a.out") + self.build_and_launch(program) + source = "main.c" + + # Set breakpoint inside bar() function + lines = [line_number(source, "// breakpoint here")] + breakpoint_ids = self.set_source_breakpoints(source, lines) + self.assertEqual( + len(breakpoint_ids), len(lines), "expect correct number of breakpoints" + ) + + self.continue_to_breakpoints(breakpoint_ids) + + # Get the stack frames: [0] = bar(), [1] = foo(), [2] = main() + stack_frames = self.get_stackFrames() + self.assertGreater(len(stack_frames), 2, "Expected more than 2 stack frames") + + # Examine the foo() frame (stack_frames[1]) + # This is the critical frame containing compiler-generated code + foo_frame = stack_frames[1] + + # Verify that the frame's line number points to the bar() call, + # not to the compiler-generated code after it + foo_call_bar_source_line = foo_frame.get("line") + self.assertEqual( + foo_call_bar_source_line, + line_number(source, "foo call bar"), + "Expected foo call bar to be the source line of the frame", + ) + + # Verify the source file name is correctly resolved + foo_source_name = foo_frame.get("source", {}).get("name") + self.assertEqual( + foo_source_name, "main.c", "Expected foo source name to be main.c" + ) + + # When lldb fails to symbolicate a frame it will emit a fake assembly + # source with path of format ` or `
with + # sourceReference to retrieve disassembly source file. + # Verify that this didn't happen - the path should be a real file path. + foo_path = foo_frame.get("source", {}).get("path") + self.assertNotIn("`", foo_path, "Expected foo source path to not contain `") + self.continue_to_exit() diff --git a/lldb/test/API/tools/lldb-dap/stackTraceCompilerGeneratedCode/main.c b/lldb/test/API/tools/lldb-dap/stackTraceCompilerGeneratedCode/main.c new file mode 100644 index 0000000000000..dd3fcc295d492 --- /dev/null +++ b/lldb/test/API/tools/lldb-dap/stackTraceCompilerGeneratedCode/main.c @@ -0,0 +1,19 @@ +void bar() { + int val = 32; // breakpoint here +} + +void at_line_zero() {} + +int foo(); + +int main(int argc, char const *argv[]) { + foo(); + return 0; +} + +int foo() { + bar(); // foo call bar +#line 0 "test.cpp" + at_line_zero(); + return 0; +} diff --git a/lldb/tools/lldb-dap/DAP.cpp b/lldb/tools/lldb-dap/DAP.cpp index f009a902f79e7..11aed33886edb 100644 --- a/lldb/tools/lldb-dap/DAP.cpp +++ b/lldb/tools/lldb-dap/DAP.cpp @@ -657,18 +657,20 @@ std::optional DAP::ResolveSource(const lldb::SBFrame &frame) { if (!frame.IsValid()) return std::nullopt; - const lldb::SBAddress frame_pc = frame.GetPCAddress(); - if (DisplayAssemblySource(debugger, frame_pc)) + const lldb::SBLineEntry frame_line_entry = frame.GetLineEntry(); + if (DisplayAssemblySource(debugger, frame_line_entry)) { + const lldb::SBAddress frame_pc = frame.GetPCAddress(); return ResolveAssemblySource(frame_pc); + } - return CreateSource(frame.GetLineEntry().GetFileSpec()); + return CreateSource(frame_line_entry.GetFileSpec()); } std::optional DAP::ResolveSource(lldb::SBAddress address) { - if (DisplayAssemblySource(debugger, address)) + lldb::SBLineEntry line_entry = GetLineEntryForAddress(target, address); + if (DisplayAssemblySource(debugger, line_entry)) return ResolveAssemblySource(address); - lldb::SBLineEntry line_entry = GetLineEntryForAddress(target, address); if (!line_entry.IsValid()) return std::nullopt; diff --git a/lldb/tools/lldb-dap/ProtocolUtils.cpp b/lldb/tools/lldb-dap/ProtocolUtils.cpp index 868c67ca72986..acf31b03f7af0 100644 --- a/lldb/tools/lldb-dap/ProtocolUtils.cpp +++ b/lldb/tools/lldb-dap/ProtocolUtils.cpp @@ -27,7 +27,7 @@ using namespace lldb_dap::protocol; namespace lldb_dap { static bool ShouldDisplayAssemblySource( - lldb::SBAddress address, + lldb::SBLineEntry line_entry, lldb::StopDisassemblyType stop_disassembly_display) { if (stop_disassembly_display == lldb::eStopDisassemblyTypeNever) return false; @@ -37,7 +37,6 @@ static bool ShouldDisplayAssemblySource( // A line entry of 0 indicates the line is compiler generated i.e. no source // file is associated with the frame. - auto line_entry = address.GetLineEntry(); auto file_spec = line_entry.GetFileSpec(); if (!file_spec.IsValid() || line_entry.GetLine() == 0 || line_entry.GetLine() == LLDB_INVALID_LINE_NUMBER) @@ -174,10 +173,10 @@ bool IsAssemblySource(const protocol::Source &source) { } bool DisplayAssemblySource(lldb::SBDebugger &debugger, - lldb::SBAddress address) { + lldb::SBLineEntry line_entry) { const lldb::StopDisassemblyType stop_disassembly_display = GetStopDisassemblyDisplay(debugger); - return ShouldDisplayAssemblySource(address, stop_disassembly_display); + return ShouldDisplayAssemblySource(line_entry, stop_disassembly_display); } std::string GetLoadAddressString(const lldb::addr_t addr) { diff --git a/lldb/tools/lldb-dap/ProtocolUtils.h b/lldb/tools/lldb-dap/ProtocolUtils.h index a1f7ae0661914..f4d576ba9f608 100644 --- a/lldb/tools/lldb-dap/ProtocolUtils.h +++ b/lldb/tools/lldb-dap/ProtocolUtils.h @@ -53,7 +53,8 @@ std::optional CreateSource(const lldb::SBFileSpec &file); /// Checks if the given source is for assembly code. bool IsAssemblySource(const protocol::Source &source); -bool DisplayAssemblySource(lldb::SBDebugger &debugger, lldb::SBAddress address); +bool DisplayAssemblySource(lldb::SBDebugger &debugger, + lldb::SBLineEntry line_entry); /// Get the address as a 16-digit hex string, e.g. "0x0000000000012345" std::string GetLoadAddressString(const lldb::addr_t addr); From 79d9ae7a777a03452991d222642ffdb6687d9210 Mon Sep 17 00:00:00 2001 From: Chinmay Deshpande Date: Tue, 11 Nov 2025 16:37:42 -0800 Subject: [PATCH 11/32] [AMDGPU][GISel] Add RegBankLegalize support for G_AMDGPU_WAVE_ADDRESS (#167456) --- llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp | 2 ++ .../GlobalISel/regbankselect-amdgpu-wave-address.mir | 7 +++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp index 615b911a22903..90114e44f1a48 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp @@ -931,6 +931,8 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST, .Any({{UniP4}, {{SgprP4}, {}}}) .Any({{UniP8}, {{SgprP8}, {}}}); + addRulesForGOpcs({G_AMDGPU_WAVE_ADDRESS}).Any({{UniP5}, {{SgprP5}, {}}}); + bool hasSALUFloat = ST->hasSALUFloatInsts(); addRulesForGOpcs({G_FADD}, Standard) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgpu-wave-address.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgpu-wave-address.mir index f372c1f81948f..59716a250ff59 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgpu-wave-address.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgpu-wave-address.mir @@ -1,6 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -o - %s | FileCheck %s -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" -regbankselect-greedy -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" -regbankselect-fast -o - %s | FileCheck %s # TODO: We could use scalar --- @@ -25,8 +25,7 @@ body: | ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:sgpr(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p5) = COPY [[AMDGPU_WAVE_ADDRESS]](p5) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) - ; CHECK-NEXT: G_STORE [[COPY]](p5), [[COPY1]](p1) :: (store (p5), addrspace 1) + ; CHECK-NEXT: G_STORE [[COPY]](p5), [[DEF]](p1) :: (store (p5), addrspace 1) %0:_(p1) = G_IMPLICIT_DEF %1:_(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 G_STORE %1, %0 :: (store (p5), addrspace 1) From ea10026b64f66b3b69c0545db20f9daa8579f5cb Mon Sep 17 00:00:00 2001 From: hev Date: Wed, 12 Nov 2025 08:51:08 +0800 Subject: [PATCH 12/32] Reland "[LoongArch] Add `isSafeToMove` hook to prevent unsafe instruction motion" (#167465) This patch introduces a new virtual method `TargetInstrInfo::isSafeToMove()` to allow backends to control whether a machine instruction can be safely moved by optimization passes. The `BranchFolder` pass now respects this hook when hoisting common code. By default, all instructions are considered safe to to move. For LoongArch, `isSafeToMove()` is overridden to prevent relocation-related instruction sequences (e.g. PC-relative addressing and calls) from being broken by instruction motion. Correspondingly, `isSchedulingBoundary()` is updated to reuse this logic for consistency. Relands #163725 --- llvm/include/llvm/CodeGen/TargetInstrInfo.h | 11 ++++ llvm/lib/CodeGen/BranchFolding.cpp | 5 ++ .../Target/LoongArch/LoongArchInstrInfo.cpp | 45 +++++++++------ .../lib/Target/LoongArch/LoongArchInstrInfo.h | 3 + llvm/test/CodeGen/LoongArch/issue163681.ll | 56 +++++++++++++++++++ 5 files changed, 102 insertions(+), 18 deletions(-) create mode 100644 llvm/test/CodeGen/LoongArch/issue163681.ll diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index 43f28ed79f9dd..18142c2c0adf3 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -1765,6 +1765,17 @@ class LLVM_ABI TargetInstrInfo : public MCInstrInfo { return true; } + /// Return true if it's safe to move a machine instruction. + /// This allows the backend to prevent certain special instruction + /// sequences from being broken by instruction motion in optimization + /// passes. + /// By default, this returns true for every instruction. + virtual bool isSafeToMove(const MachineInstr &MI, + const MachineBasicBlock *MBB, + const MachineFunction &MF) const { + return true; + } + /// Test if the given instruction should be considered a scheduling boundary. /// This primarily includes labels and terminators. virtual bool isSchedulingBoundary(const MachineInstr &MI, diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp index 7292bc2be0df2..0b212fb0beb20 100644 --- a/llvm/lib/CodeGen/BranchFolding.cpp +++ b/llvm/lib/CodeGen/BranchFolding.cpp @@ -1979,6 +1979,7 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { MachineBasicBlock::iterator FIB = FBB->begin(); MachineBasicBlock::iterator TIE = TBB->end(); MachineBasicBlock::iterator FIE = FBB->end(); + MachineFunction &MF = *TBB->getParent(); while (TIB != TIE && FIB != FIE) { // Skip dbg_value instructions. These do not count. TIB = skipDebugInstructionsForward(TIB, TIE, false); @@ -1993,6 +1994,10 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { // Hard to reason about register liveness with predicated instruction. break; + if (!TII->isSafeToMove(*TIB, TBB, MF)) + // Don't hoist the instruction if it isn't safe to move. + break; + bool IsSafe = true; for (MachineOperand &MO : TIB->operands()) { // Don't attempt to hoist instructions with register masks. diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp index 9a33dccd002c7..9fc862af7ea24 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp @@ -378,12 +378,9 @@ bool LoongArchInstrInfo::isBranchOffsetInRange(unsigned BranchOp, } } -bool LoongArchInstrInfo::isSchedulingBoundary(const MachineInstr &MI, - const MachineBasicBlock *MBB, - const MachineFunction &MF) const { - if (TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF)) - return true; - +bool LoongArchInstrInfo::isSafeToMove(const MachineInstr &MI, + const MachineBasicBlock *MBB, + const MachineFunction &MF) const { auto MII = MI.getIterator(); auto MIE = MBB->end(); @@ -429,25 +426,25 @@ bool LoongArchInstrInfo::isSchedulingBoundary(const MachineInstr &MI, auto MO2 = Lu32I->getOperand(2).getTargetFlags(); if (MO0 == LoongArchII::MO_PCREL_HI && MO1 == LoongArchII::MO_PCREL_LO && MO2 == LoongArchII::MO_PCREL64_LO) - return true; + return false; if ((MO0 == LoongArchII::MO_GOT_PC_HI || MO0 == LoongArchII::MO_LD_PC_HI || MO0 == LoongArchII::MO_GD_PC_HI) && MO1 == LoongArchII::MO_GOT_PC_LO && MO2 == LoongArchII::MO_GOT_PC64_LO) - return true; + return false; if (MO0 == LoongArchII::MO_IE_PC_HI && MO1 == LoongArchII::MO_IE_PC_LO && MO2 == LoongArchII::MO_IE_PC64_LO) - return true; + return false; if (MO0 == LoongArchII::MO_DESC_PC_HI && MO1 == LoongArchII::MO_DESC_PC_LO && MO2 == LoongArchII::MO_DESC64_PC_LO) - return true; + return false; break; } case LoongArch::LU52I_D: { auto MO = MI.getOperand(2).getTargetFlags(); if (MO == LoongArchII::MO_PCREL64_HI || MO == LoongArchII::MO_GOT_PC64_HI || MO == LoongArchII::MO_IE_PC64_HI || MO == LoongArchII::MO_DESC64_PC_HI) - return true; + return false; break; } default: @@ -487,7 +484,7 @@ bool LoongArchInstrInfo::isSchedulingBoundary(const MachineInstr &MI, auto MO1 = LoongArchII::getDirectFlags(SecondOp->getOperand(2)); auto MO2 = LoongArchII::getDirectFlags(Ld->getOperand(2)); if (MO1 == LoongArchII::MO_DESC_PC_LO && MO2 == LoongArchII::MO_DESC_LD) - return true; + return false; break; } if (SecondOp == MIE || @@ -496,34 +493,34 @@ bool LoongArchInstrInfo::isSchedulingBoundary(const MachineInstr &MI, auto MO1 = LoongArchII::getDirectFlags(SecondOp->getOperand(2)); if (MO0 == LoongArchII::MO_PCREL_HI && SecondOp->getOpcode() == AddiOp && MO1 == LoongArchII::MO_PCREL_LO) - return true; + return false; if (MO0 == LoongArchII::MO_GOT_PC_HI && SecondOp->getOpcode() == LdOp && MO1 == LoongArchII::MO_GOT_PC_LO) - return true; + return false; if ((MO0 == LoongArchII::MO_LD_PC_HI || MO0 == LoongArchII::MO_GD_PC_HI) && SecondOp->getOpcode() == AddiOp && MO1 == LoongArchII::MO_GOT_PC_LO) - return true; + return false; break; } case LoongArch::ADDI_W: case LoongArch::ADDI_D: { auto MO = LoongArchII::getDirectFlags(MI.getOperand(2)); if (MO == LoongArchII::MO_PCREL_LO || MO == LoongArchII::MO_GOT_PC_LO) - return true; + return false; break; } case LoongArch::LD_W: case LoongArch::LD_D: { auto MO = LoongArchII::getDirectFlags(MI.getOperand(2)); if (MO == LoongArchII::MO_GOT_PC_LO) - return true; + return false; break; } case LoongArch::PseudoDESC_CALL: { auto MO = LoongArchII::getDirectFlags(MI.getOperand(2)); if (MO == LoongArchII::MO_DESC_CALL) - return true; + return false; break; } default: @@ -531,6 +528,18 @@ bool LoongArchInstrInfo::isSchedulingBoundary(const MachineInstr &MI, } } + return true; +} + +bool LoongArchInstrInfo::isSchedulingBoundary(const MachineInstr &MI, + const MachineBasicBlock *MBB, + const MachineFunction &MF) const { + if (TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF)) + return true; + + if (!isSafeToMove(MI, MBB, MF)) + return true; + return false; } diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h index 796ef9f3a5715..9f7a0a2239a87 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h @@ -66,6 +66,9 @@ class LoongArchInstrInfo : public LoongArchGenInstrInfo { bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override; + bool isSafeToMove(const MachineInstr &MI, const MachineBasicBlock *MBB, + const MachineFunction &MF) const override; + bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override; diff --git a/llvm/test/CodeGen/LoongArch/issue163681.ll b/llvm/test/CodeGen/LoongArch/issue163681.ll new file mode 100644 index 0000000000000..f6df349253045 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/issue163681.ll @@ -0,0 +1,56 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc --mtriple=loongarch64 -code-model=large --verify-machineinstrs < %s \ +; RUN: | FileCheck %s + +@.str = external constant [1 x i8] + +define void @caller(ptr %0) { +; CHECK-LABEL: caller: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: ld.w $a2, $zero, 0 +; CHECK-NEXT: ld.d $a1, $a0, 0 +; CHECK-NEXT: beqz $a2, .LBB0_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: pcalau12i $a0, %got_pc_hi20(.str) +; CHECK-NEXT: addi.d $a2, $zero, %got_pc_lo12(.str) +; CHECK-NEXT: lu32i.d $a2, %got64_pc_lo20(.str) +; CHECK-NEXT: lu52i.d $a2, $a2, %got64_pc_hi12(.str) +; CHECK-NEXT: ldx.d $a2, $a2, $a0 +; CHECK-NEXT: move $a0, $zero +; CHECK-NEXT: jirl $ra, $zero, 0 +; CHECK-NEXT: b .LBB0_3 +; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: pcalau12i $a0, %got_pc_hi20(.str) +; CHECK-NEXT: addi.d $a2, $zero, %got_pc_lo12(.str) +; CHECK-NEXT: lu32i.d $a2, %got64_pc_lo20(.str) +; CHECK-NEXT: lu52i.d $a2, $a2, %got64_pc_hi12(.str) +; CHECK-NEXT: ldx.d $a2, $a2, $a0 +; CHECK-NEXT: move $a0, $zero +; CHECK-NEXT: move $a3, $zero +; CHECK-NEXT: jirl $ra, $zero, 0 +; CHECK-NEXT: .LBB0_3: +; CHECK-NEXT: st.d $zero, $zero, 0 +; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret + %2 = load i32, ptr null, align 4 + %3 = icmp eq i32 %2, 0 + %4 = load i64, ptr %0, align 8 + br i1 %3, label %6, label %5 + +5: ; preds = %1 + call void null(ptr null, i64 %4, ptr @.str) + br label %7 + +6: ; preds = %1 + tail call void null(ptr null, i64 %4, ptr @.str, i32 0) + br label %7 + +7: ; preds = %6, %5 + store ptr null, ptr null, align 8 + ret void +} From 6655681cd0554f8df91bb0f7631b882f5bb13b81 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Tue, 11 Nov 2025 19:44:20 -0600 Subject: [PATCH 13/32] [llvm-offload-wrapper] Fix Triple and OpenMP handling (#167580) Summary: The OpenMP handling using an offload binary should be optional, it's only used for extra metadata for llvm-objdump. Also the triple was completely wrong, it didn't let anyone correctly choose between ELF and COFF handling. --- .../Frontend/Offloading/OffloadWrapper.cpp | 28 ++++--- llvm/test/Other/offload-wrapper.ll | 52 ------------ .../llvm-offload-wrapper/offload-wrapper.ll | 81 +++++++++++++++++++ .../llvm-offload-wrapper.cpp | 2 +- 4 files changed, 99 insertions(+), 64 deletions(-) delete mode 100644 llvm/test/Other/offload-wrapper.ll create mode 100644 llvm/test/tools/llvm-offload-wrapper/offload-wrapper.ll diff --git a/llvm/lib/Frontend/Offloading/OffloadWrapper.cpp b/llvm/lib/Frontend/Offloading/OffloadWrapper.cpp index 45818deda8aa6..86060d1d2b0b3 100644 --- a/llvm/lib/Frontend/Offloading/OffloadWrapper.cpp +++ b/llvm/lib/Frontend/Offloading/OffloadWrapper.cpp @@ -147,21 +147,27 @@ GlobalVariable *createBinDesc(Module &M, ArrayRef> Bufs, Image->setAlignment(Align(object::OffloadBinary::getAlignment())); StringRef Binary(Buf.data(), Buf.size()); - assert(identify_magic(Binary) == file_magic::offload_binary && - "Invalid binary format"); + uint64_t BeginOffset = 0; + uint64_t EndOffset = Binary.size(); + + // Optionally use an offload binary for its offload dumping support. // The device image struct contains the pointer to the beginning and end of // the image stored inside of the offload binary. There should only be one // of these for each buffer so we parse it out manually. - const auto *Header = - reinterpret_cast( - Binary.bytes_begin()); - const auto *Entry = reinterpret_cast( - Binary.bytes_begin() + Header->EntryOffset); - - auto *Begin = ConstantInt::get(getSizeTTy(M), Entry->ImageOffset); - auto *Size = - ConstantInt::get(getSizeTTy(M), Entry->ImageOffset + Entry->ImageSize); + if (identify_magic(Binary) == file_magic::offload_binary) { + const auto *Header = + reinterpret_cast( + Binary.bytes_begin()); + const auto *Entry = + reinterpret_cast( + Binary.bytes_begin() + Header->EntryOffset); + BeginOffset = Entry->ImageOffset; + EndOffset = Entry->ImageOffset + Entry->ImageSize; + } + + auto *Begin = ConstantInt::get(getSizeTTy(M), BeginOffset); + auto *Size = ConstantInt::get(getSizeTTy(M), EndOffset); Constant *ZeroBegin[] = {Zero, Begin}; Constant *ZeroSize[] = {Zero, Size}; diff --git a/llvm/test/Other/offload-wrapper.ll b/llvm/test/Other/offload-wrapper.ll deleted file mode 100644 index 9107a141ad201..0000000000000 --- a/llvm/test/Other/offload-wrapper.ll +++ /dev/null @@ -1,52 +0,0 @@ -; RUN: llvm-offload-wrapper --triple=x86-64 -kind=hip %s -o %t.bc -; RUN: llvm-dis %t.bc -o - | FileCheck %s --check-prefix=HIP - -; HIP: @__start_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry], section "llvm_offload_entries$OA" -; HIP-NEXT: @__stop_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry], section "llvm_offload_entries$OZ" -; HIP-NEXT: @.fatbin_image = internal constant {{.*}}, section ".hip_fatbin" -; HIP-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1212764230, i32 1, ptr @.fatbin_image, ptr null }, section ".hipFatBinSegment", align 8 -; HIP-NEXT: @.hip.binary_handle = internal global ptr null -; HIP-NEXT: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 101, ptr @.hip.fatbin_reg, ptr null }] - -; HIP: define internal void @.hip.fatbin_reg() section ".text.startup" { -; HIP-NEXT: entry: -; HIP-NEXT: %0 = call ptr @__hipRegisterFatBinary(ptr @.fatbin_wrapper) -; HIP-NEXT: store ptr %0, ptr @.hip.binary_handle, align 8 -; HIP-NEXT: call void @.hip.globals_reg(ptr %0) -; HIP-NEXT: %1 = call i32 @atexit(ptr @.hip.fatbin_unreg) -; HIP-NEXT: ret void -; HIP-NEXT: } - -; HIP: define internal void @.hip.fatbin_unreg() section ".text.startup" { -; HIP-NEXT: entry: -; HIP-NEXT: %0 = load ptr, ptr @.hip.binary_handle, align 8 -; HIP-NEXT: call void @__hipUnregisterFatBinary(ptr %0) -; HIP-NEXT: ret void -; HIP-NEXT: } - -; RUN: llvm-offload-wrapper --triple=x86-64 -kind=cuda %s -o %t.bc -; RUN: llvm-dis %t.bc -o - | FileCheck %s --check-prefix=CUDA - -; CUDA: @__start_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry], section "llvm_offload_entries$OA" -; CUDA-NEXT: @__stop_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry], section "llvm_offload_entries$OZ" -; CUDA-NEXT: @.fatbin_image = internal constant {{.*}}, section ".nv_fatbin" -; CUDA-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1180844977, i32 1, ptr @.fatbin_image, ptr null }, section ".nvFatBinSegment", align 8 -; CUDA-NEXT: @.cuda.binary_handle = internal global ptr null -; CUDA-NEXT: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 101, ptr @.cuda.fatbin_reg, ptr null }] - -; CUDA: define internal void @.cuda.fatbin_reg() section ".text.startup" { -; CUDA-NEXT: entry: -; CUDA-NEXT: %0 = call ptr @__cudaRegisterFatBinary(ptr @.fatbin_wrapper) -; CUDA-NEXT: store ptr %0, ptr @.cuda.binary_handle, align 8 -; CUDA-NEXT: call void @.cuda.globals_reg(ptr %0) -; CUDA-NEXT: call void @__cudaRegisterFatBinaryEnd(ptr %0) -; CUDA-NEXT: %1 = call i32 @atexit(ptr @.cuda.fatbin_unreg) -; CUDA-NEXT: ret void -; CUDA-NEXT: } - -; CUDA: define internal void @.cuda.fatbin_unreg() section ".text.startup" { -; CUDA-NEXT: entry: -; CUDA-NEXT: %0 = load ptr, ptr @.cuda.binary_handle, align 8 -; CUDA-NEXT: call void @__cudaUnregisterFatBinary(ptr %0) -; CUDA-NEXT: ret void -; CUDA-NEXT: } diff --git a/llvm/test/tools/llvm-offload-wrapper/offload-wrapper.ll b/llvm/test/tools/llvm-offload-wrapper/offload-wrapper.ll new file mode 100644 index 0000000000000..32aad0b6cf64e --- /dev/null +++ b/llvm/test/tools/llvm-offload-wrapper/offload-wrapper.ll @@ -0,0 +1,81 @@ +; RUN: llvm-offload-wrapper --triple=x86_64-unknown-linux-gnu -kind=openmp %s -o %t.bc +; RUN: llvm-dis %t.bc -o - | FileCheck %s --check-prefix=OMP + +; OMP: @__start_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry] +; OMP-NEXT: @__stop_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry] +; OMP-NEXT: @__dummy.llvm_offload_entries = internal constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "llvm_offload_entries", align 8 +; OMP-NEXT: @llvm.compiler.used = appending global [1 x ptr] [ptr @__dummy.llvm_offload_entries], section "llvm.metadata" +; OMP-NEXT: @.omp_offloading.device_image = internal unnamed_addr constant [[[SIZE:[0-9]+]] x i8] c"{{.*}}", section ".llvm.offloading", align 8 +; OMP-NEXT: @.omp_offloading.device_images = internal unnamed_addr constant [1 x %__tgt_device_image] [%__tgt_device_image { ptr @.omp_offloading.device_image, ptr getelementptr ([[[SIZE]] x i8], ptr @.omp_offloading.device_image, i64 0, i64 [[SIZE]]), ptr @__start_llvm_offload_entries, ptr @__stop_llvm_offload_entries }] +; OMP-NEXT: @.omp_offloading.descriptor = internal constant %__tgt_bin_desc { i32 1, ptr @.omp_offloading.device_images, ptr @__start_llvm_offload_entries, ptr @__stop_llvm_offload_entries } +; OMP-NEXT: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 101, ptr @.omp_offloading.descriptor_reg, ptr null }] + +; OMP: define internal void @.omp_offloading.descriptor_reg() section ".text.startup" { +; OMP-NEXT: entry: +; OMP-NEXT: call void @__tgt_register_lib(ptr @.omp_offloading.descriptor) +; OMP-NEXT: %0 = call i32 @atexit(ptr @.omp_offloading.descriptor_unreg) +; OMP-NEXT: ret void +; OMP-NEXT: } + +; OMP: define internal void @.omp_offloading.descriptor_unreg() section ".text.startup" { +; OMP-NEXT: entry: +; OMP-NEXT: call void @__tgt_unregister_lib(ptr @.omp_offloading.descriptor) +; OMP-NEXT: ret void +; OMP-NEXT: } + +; RUN: llvm-offload-wrapper --triple=x86_64-unknown-linux-gnu -kind=hip %s -o %t.bc +; RUN: llvm-dis %t.bc -o - | FileCheck %s --check-prefix=HIP + +; HIP: @__start_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry] +; HIP-NEXT: @__stop_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry] +; HIP-NEXT: @__dummy.llvm_offload_entries = internal constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "llvm_offload_entries", align 8 +; HIP-NEXT: @llvm.compiler.used = appending global [1 x ptr] [ptr @__dummy.llvm_offload_entries], section "llvm.metadata" +; HIP-NEXT: @.fatbin_image = internal constant {{.*}}, section ".hip_fatbin" +; HIP-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1212764230, i32 1, ptr @.fatbin_image, ptr null }, section ".hipFatBinSegment", align 8 +; HIP-NEXT: @.hip.binary_handle = internal global ptr null +; HIP-NEXT: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 101, ptr @.hip.fatbin_reg, ptr null }] + +; HIP: define internal void @.hip.fatbin_reg() section ".text.startup" { +; HIP-NEXT: entry: +; HIP-NEXT: %0 = call ptr @__hipRegisterFatBinary(ptr @.fatbin_wrapper) +; HIP-NEXT: store ptr %0, ptr @.hip.binary_handle, align 8 +; HIP-NEXT: call void @.hip.globals_reg(ptr %0) +; HIP-NEXT: %1 = call i32 @atexit(ptr @.hip.fatbin_unreg) +; HIP-NEXT: ret void +; HIP-NEXT: } + +; HIP: define internal void @.hip.fatbin_unreg() section ".text.startup" { +; HIP-NEXT: entry: +; HIP-NEXT: %0 = load ptr, ptr @.hip.binary_handle, align 8 +; HIP-NEXT: call void @__hipUnregisterFatBinary(ptr %0) +; HIP-NEXT: ret void +; HIP-NEXT: } + +; RUN: llvm-offload-wrapper --triple=x86_64-unknown-linux-gnu -kind=cuda %s -o %t.bc +; RUN: llvm-dis %t.bc -o - | FileCheck %s --check-prefix=CUDA + +; CUDA: @__start_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry] +; CUDA-NEXT: @__stop_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry] +; CUDA-NEXT: @__dummy.llvm_offload_entries = internal constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "llvm_offload_entries", align 8 +; CUDA-NEXT: @llvm.compiler.used = appending global [1 x ptr] [ptr @__dummy.llvm_offload_entries], section "llvm.metadata" +; CUDA-NEXT: @.fatbin_image = internal constant {{.*}}, section ".nv_fatbin" +; CUDA-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1180844977, i32 1, ptr @.fatbin_image, ptr null }, section ".nvFatBinSegment", align 8 +; CUDA-NEXT: @.cuda.binary_handle = internal global ptr null +; CUDA-NEXT: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 101, ptr @.cuda.fatbin_reg, ptr null }] + +; CUDA: define internal void @.cuda.fatbin_reg() section ".text.startup" { +; CUDA-NEXT: entry: +; CUDA-NEXT: %0 = call ptr @__cudaRegisterFatBinary(ptr @.fatbin_wrapper) +; CUDA-NEXT: store ptr %0, ptr @.cuda.binary_handle, align 8 +; CUDA-NEXT: call void @.cuda.globals_reg(ptr %0) +; CUDA-NEXT: call void @__cudaRegisterFatBinaryEnd(ptr %0) +; CUDA-NEXT: %1 = call i32 @atexit(ptr @.cuda.fatbin_unreg) +; CUDA-NEXT: ret void +; CUDA-NEXT: } + +; CUDA: define internal void @.cuda.fatbin_unreg() section ".text.startup" { +; CUDA-NEXT: entry: +; CUDA-NEXT: %0 = load ptr, ptr @.cuda.binary_handle, align 8 +; CUDA-NEXT: call void @__cudaUnregisterFatBinary(ptr %0) +; CUDA-NEXT: ret void +; CUDA-NEXT: } diff --git a/llvm/tools/llvm-offload-wrapper/llvm-offload-wrapper.cpp b/llvm/tools/llvm-offload-wrapper/llvm-offload-wrapper.cpp index d65b402571ae8..cda59b6f49b62 100644 --- a/llvm/tools/llvm-offload-wrapper/llvm-offload-wrapper.cpp +++ b/llvm/tools/llvm-offload-wrapper/llvm-offload-wrapper.cpp @@ -64,7 +64,7 @@ static Error wrapImages(ArrayRef> BuffersToWrap) { LLVMContext Context; Module M("offload.wrapper.module", Context); - M.setTargetTriple(Triple()); + M.setTargetTriple(llvm::Triple(TheTriple)); switch (Kind) { case llvm::object::OFK_OpenMP: From a863fd879049b74a32f7657b1b8f2b1faca447ff Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Tue, 11 Nov 2025 17:45:37 -0800 Subject: [PATCH 14/32] [NFC] Generalize the arithmetic type for `getDisjunctionWeights` (#167593) --- llvm/include/llvm/IR/ProfDataUtils.h | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/llvm/include/llvm/IR/ProfDataUtils.h b/llvm/include/llvm/IR/ProfDataUtils.h index a7bcbf010d1bf..f1c2f38c74afd 100644 --- a/llvm/include/llvm/IR/ProfDataUtils.h +++ b/llvm/include/llvm/IR/ProfDataUtils.h @@ -18,6 +18,8 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/IR/Metadata.h" #include "llvm/Support/Compiler.h" +#include +#include namespace llvm { struct MDProfLabels { @@ -216,9 +218,13 @@ LLVM_ABI void scaleProfData(Instruction &I, uint64_t S, uint64_t T); /// branch weights B1 and B2, respectively. In both B1 and B2, the first /// position (index 0) is for the 'true' branch, and the second position (index /// 1) is for the 'false' branch. +template && std::is_arithmetic_v && + sizeof(T1) <= sizeof(uint64_t) && sizeof(T2) <= sizeof(uint64_t)>> inline SmallVector -getDisjunctionWeights(const SmallVector &B1, - const SmallVector &B2) { +getDisjunctionWeights(const SmallVector &B1, + const SmallVector &B2) { // For the first conditional branch, the probability the "true" case is taken // is p(b1) = B1[0] / (B1[0] + B1[1]). The "false" case's probability is // p(not b1) = B1[1] / (B1[0] + B1[1]). @@ -235,8 +241,8 @@ getDisjunctionWeights(const SmallVector &B1, // the product of sums, the subtracted one cancels out). assert(B1.size() == 2); assert(B2.size() == 2); - auto FalseWeight = B1[1] * B2[1]; - auto TrueWeight = B1[0] * B2[0] + B1[0] * B2[1] + B1[1] * B2[0]; + uint64_t FalseWeight = B1[1] * B2[1]; + uint64_t TrueWeight = B1[0] * (B2[0] + B2[1]) + B1[1] * B2[0]; return {TrueWeight, FalseWeight}; } } // namespace llvm From 75e38aa6496e655d692de86fb549925d6476e11b Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 11 Nov 2025 18:01:15 -0800 Subject: [PATCH 15/32] RuntimeLibcalls: Add libcall entries for sleef and armpl modf functions (#166985) --- llvm/include/llvm/IR/RuntimeLibcalls.td | 11 ++++ llvm/lib/IR/RuntimeLibcalls.cpp | 56 ++++++++++++++++++- .../Util/DeclareRuntimeLibcalls/armpl.ll | 8 +++ .../Util/DeclareRuntimeLibcalls/sleef.ll | 10 +++- 4 files changed, 82 insertions(+), 3 deletions(-) diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.td b/llvm/include/llvm/IR/RuntimeLibcalls.td index ee80606ed0dbf..ce7e836f66446 100644 --- a/llvm/include/llvm/IR/RuntimeLibcalls.td +++ b/llvm/include/llvm/IR/RuntimeLibcalls.td @@ -183,6 +183,7 @@ foreach FPTy = ["F32", "F64", "F80", "F128", "PPCF128"] in { } foreach VecTy = ["V4F32", "V2F64", "NXV4F32", "NXV2F64"] in { + def MODF_#VecTy : RuntimeLibcall; def SINCOS_#VecTy : RuntimeLibcall; def SINCOSPI_#VecTy : RuntimeLibcall; } @@ -1093,6 +1094,11 @@ def __security_check_cookie_arm64ec : RuntimeLibcallImpl SleefLibcalls = { + def _ZGVnN2vl8_modf : RuntimeLibcallImpl; + def _ZGVnN4vl4_modff : RuntimeLibcallImpl; + def _ZGVsNxvl8_modf : RuntimeLibcallImpl; + def _ZGVsNxvl4_modff : RuntimeLibcallImpl; + def _ZGVnN2vl8l8_sincos : RuntimeLibcallImpl; def _ZGVnN4vl4l4_sincosf : RuntimeLibcallImpl; def _ZGVsNxvl8l8_sincos : RuntimeLibcallImpl; @@ -1109,6 +1115,11 @@ defset list SleefLibcalls = { //===----------------------------------------------------------------------===// defset list ARMPLLibcalls = { + def armpl_vmodfq_f64 : RuntimeLibcallImpl; // CallingConv::AArch64_VectorCall + def armpl_vmodfq_f32 : RuntimeLibcallImpl; // CallingConv::AArch64_VectorCall + def armpl_svmodf_f64_x : RuntimeLibcallImpl; + def armpl_svmodf_f32_x : RuntimeLibcallImpl; + def armpl_vsincosq_f64 : RuntimeLibcallImpl; // CallingConv::AArch64_VectorCall def armpl_vsincosq_f32 diff --git a/llvm/lib/IR/RuntimeLibcalls.cpp b/llvm/lib/IR/RuntimeLibcalls.cpp index e66b9adb43ac4..ee23b58742b64 100644 --- a/llvm/lib/IR/RuntimeLibcalls.cpp +++ b/llvm/lib/IR/RuntimeLibcalls.cpp @@ -43,7 +43,9 @@ RuntimeLibcallsInfo::RuntimeLibcallsInfo(const Triple &TT, switch (ClVectorLibrary) { case VectorLibrary::SLEEFGNUABI: for (RTLIB::LibcallImpl Impl : - {RTLIB::impl__ZGVnN2vl8l8_sincos, RTLIB::impl__ZGVnN4vl4l4_sincosf, + {RTLIB::impl__ZGVnN2vl8_modf, RTLIB::impl__ZGVnN4vl4_modff, + RTLIB::impl__ZGVsNxvl8_modf, RTLIB::impl__ZGVsNxvl4_modff, + RTLIB::impl__ZGVnN2vl8l8_sincos, RTLIB::impl__ZGVnN4vl4l4_sincosf, RTLIB::impl__ZGVsNxvl8l8_sincos, RTLIB::impl__ZGVsNxvl4l4_sincosf, RTLIB::impl__ZGVnN4vl4l4_sincospif, RTLIB::impl__ZGVnN2vl8l8_sincospi, RTLIB::impl__ZGVsNxvl4l4_sincospif, @@ -52,7 +54,9 @@ RuntimeLibcallsInfo::RuntimeLibcallsInfo(const Triple &TT, break; case VectorLibrary::ArmPL: for (RTLIB::LibcallImpl Impl : - {RTLIB::impl_armpl_vsincosq_f64, RTLIB::impl_armpl_vsincosq_f32, + {RTLIB::impl_armpl_vmodfq_f64, RTLIB::impl_armpl_vmodfq_f32, + RTLIB::impl_armpl_svmodf_f64_x, RTLIB::impl_armpl_svmodf_f32_x, + RTLIB::impl_armpl_vsincosq_f64, RTLIB::impl_armpl_vsincosq_f32, RTLIB::impl_armpl_svsincos_f64_x, RTLIB::impl_armpl_svsincos_f32_x, RTLIB::impl_armpl_vsincospiq_f32, RTLIB::impl_armpl_vsincospiq_f64, RTLIB::impl_armpl_svsincospi_f32_x, @@ -197,6 +201,52 @@ RuntimeLibcallsInfo::getFunctionTy(LLVMContext &Ctx, const Triple &TT, fcNegNormal)); return {FuncTy, Attrs}; } + case RTLIB::impl__ZGVnN2vl8_modf: + case RTLIB::impl__ZGVnN4vl4_modff: + case RTLIB::impl__ZGVsNxvl8_modf: + case RTLIB::impl__ZGVsNxvl4_modff: + case RTLIB::impl_armpl_vmodfq_f64: + case RTLIB::impl_armpl_vmodfq_f32: + case RTLIB::impl_armpl_svmodf_f64_x: + case RTLIB::impl_armpl_svmodf_f32_x: { + AttrBuilder FuncAttrBuilder(Ctx); + + bool IsF32 = LibcallImpl == RTLIB::impl__ZGVnN4vl4_modff || + LibcallImpl == RTLIB::impl__ZGVsNxvl4_modff || + LibcallImpl == RTLIB::impl_armpl_vmodfq_f32 || + LibcallImpl == RTLIB::impl_armpl_svmodf_f32_x; + + bool IsScalable = LibcallImpl == RTLIB::impl__ZGVsNxvl8_modf || + LibcallImpl == RTLIB::impl__ZGVsNxvl4_modff || + LibcallImpl == RTLIB::impl_armpl_svmodf_f64_x || + LibcallImpl == RTLIB::impl_armpl_svmodf_f32_x; + + Type *ScalarTy = IsF32 ? Type::getFloatTy(Ctx) : Type::getDoubleTy(Ctx); + unsigned EC = IsF32 ? 4 : 2; + VectorType *VecTy = VectorType::get(ScalarTy, EC, IsScalable); + + for (Attribute::AttrKind Attr : CommonFnAttrs) + FuncAttrBuilder.addAttribute(Attr); + FuncAttrBuilder.addMemoryAttr(MemoryEffects::argMemOnly(ModRefInfo::Mod)); + + AttributeList Attrs; + Attrs = Attrs.addFnAttributes(Ctx, FuncAttrBuilder); + + { + AttrBuilder ArgAttrBuilder(Ctx); + for (Attribute::AttrKind AK : CommonPtrArgAttrs) + ArgAttrBuilder.addAttribute(AK); + ArgAttrBuilder.addAlignmentAttr(DL.getABITypeAlign(VecTy)); + Attrs = Attrs.addParamAttributes(Ctx, 1, ArgAttrBuilder); + } + + PointerType *PtrTy = PointerType::get(Ctx, 0); + SmallVector ArgTys = {VecTy, PtrTy}; + if (hasVectorMaskArgument(LibcallImpl)) + ArgTys.push_back(VectorType::get(Type::getInt1Ty(Ctx), EC, IsScalable)); + + return {FunctionType::get(VecTy, ArgTys, false), Attrs}; + } case RTLIB::impl__ZGVnN2vl8l8_sincos: case RTLIB::impl__ZGVnN4vl4l4_sincosf: case RTLIB::impl__ZGVsNxvl8l8_sincos: @@ -271,6 +321,8 @@ bool RuntimeLibcallsInfo::hasVectorMaskArgument(RTLIB::LibcallImpl Impl) { /// FIXME: This should be generated by tablegen and support the argument at an /// arbitrary position switch (Impl) { + case RTLIB::impl_armpl_svmodf_f64_x: + case RTLIB::impl_armpl_svmodf_f32_x: case RTLIB::impl_armpl_svsincos_f32_x: case RTLIB::impl_armpl_svsincos_f64_x: case RTLIB::impl_armpl_svsincospi_f32_x: diff --git a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/armpl.ll b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/armpl.ll index 1d9cf6a5d77fe..e79e89c95c14a 100644 --- a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/armpl.ll +++ b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/armpl.ll @@ -1,6 +1,10 @@ ; REQUIRES: aarch64-registered-target ; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=aarch64-unknown-linux -mattr=+neon,+sve -vector-library=ArmPL < %s | FileCheck %s +; CHECK: declare @armpl_svmodf_f32_x(, ptr noalias nonnull writeonly align 16, ) [[ATTRS:#[0-9]+]] + +; CHECK: declare @armpl_svmodf_f64_x(, ptr noalias nonnull writeonly align 16, ) [[ATTRS]] + ; CHECK: declare void @armpl_svsincos_f32_x(, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16, ) [[ATTRS:#[0-9]+]] ; CHECK: declare void @armpl_svsincos_f64_x(, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16, ) [[ATTRS]] @@ -9,6 +13,10 @@ ; CHECK: declare void @armpl_svsincospi_f64_x(, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16, ) [[ATTRS]] +; CHECK: declare <4 x float> @armpl_vmodfq_f32(<4 x float>, ptr noalias nonnull writeonly align 16) [[ATTRS]] + +; CHECK: declare <2 x double> @armpl_vmodfq_f64(<2 x double>, ptr noalias nonnull writeonly align 16) [[ATTRS]] + ; CHECK: declare void @armpl_vsincospiq_f32(<4 x float>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]] ; CHECK: declare void @armpl_vsincospiq_f64(<2 x double>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]] diff --git a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/sleef.ll b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/sleef.ll index 2c6900761b1c0..ef2481111087f 100644 --- a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/sleef.ll +++ b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/sleef.ll @@ -1,18 +1,26 @@ ; REQUIRES: aarch64-registered-target ; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=aarch64-unknown-linux -mattr=+neon,+sve -vector-library=sleefgnuabi < %s | FileCheck %s -; CHECK: declare void @_ZGVnN2vl8l8_sincos(<2 x double>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS:#[0-9]+]] +; CHECK: declare <2 x double> @_ZGVnN2vl8_modf(<2 x double>, ptr noalias nonnull writeonly align 16) [[ATTRS:#[0-9]+]] + +; CHECK: declare void @_ZGVnN2vl8l8_sincos(<2 x double>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]] ; CHECK: declare void @_ZGVnN2vl8l8_sincospi(<2 x double>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]] +; CHECK: declare <4 x float> @_ZGVnN4vl4_modff(<4 x float>, ptr noalias nonnull writeonly align 16) [[ATTRS]] + ; CHECK: declare void @_ZGVnN4vl4l4_sincosf(<4 x float>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]] ; CHECK: declare void @_ZGVnN4vl4l4_sincospif(<4 x float>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]] +; CHECK: declare @_ZGVsNxvl4_modff(, ptr noalias nonnull writeonly align 16) [[ATTRS]] + ; CHECK: declare void @_ZGVsNxvl4l4_sincosf(, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]] ; CHECK: declare void @_ZGVsNxvl4l4_sincospif(, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]] +; CHECK: declare @_ZGVsNxvl8_modf(, ptr noalias nonnull writeonly align 16) [[ATTRS]] + ; CHECK: declare void @_ZGVsNxvl8l8_sincos(, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]] ; CHECK: declare void @_ZGVsNxvl8l8_sincospi(, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]] From 4b9771e41a2a53a0398f53bc90d8d0587f03e162 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 11 Nov 2025 18:05:35 -0800 Subject: [PATCH 16/32] DAG: Use modf vector libcalls through RuntimeLibcalls (#166986) Copy new process from sincos/sincospi --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 41 ++----------------- .../SelectionDAG/LegalizeVectorOps.cpp | 5 ++- llvm/lib/CodeGen/TargetLoweringBase.cpp | 18 ++++++++ 3 files changed, 24 insertions(+), 40 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index a52ad41d0f1b3..944e1714e8f98 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -313,33 +313,17 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { Type *Ty = getContainedTypes(RetTy).front(); EVT VT = getTLI()->getValueType(DL, Ty); - EVT ScalarVT = VT.getScalarType(); RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; - /// Migration flag. IsVectorCall cases directly know about the vector - /// libcall in RuntimeLibcallsInfo and shouldn't try to use - /// LibInfo->getVectorMappingInfo. - bool IsVectorCall = false; - switch (ICA.getID()) { case Intrinsic::modf: - LC = RTLIB::getMODF(ScalarVT); + LC = RTLIB::getMODF(VT); break; case Intrinsic::sincospi: LC = RTLIB::getSINCOSPI(VT); - if (LC == RTLIB::UNKNOWN_LIBCALL) - LC = RTLIB::getSINCOSPI(ScalarVT); - else if (VT.isVector()) - IsVectorCall = true; - break; case Intrinsic::sincos: LC = RTLIB::getSINCOS(VT); - if (LC == RTLIB::UNKNOWN_LIBCALL) - LC = RTLIB::getSINCOS(ScalarVT); - else if (VT.isVector()) - IsVectorCall = true; - break; default: return std::nullopt; @@ -350,33 +334,14 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { if (LibcallImpl == RTLIB::Unsupported) return std::nullopt; - StringRef LCName = - RTLIB::RuntimeLibcallsInfo::getLibcallImplName(LibcallImpl); - - // Search for a corresponding vector variant. - // - // FIXME: Should use RuntimeLibcallsInfo, not TargetLibraryInfo to get the - // vector mapping. LLVMContext &Ctx = RetTy->getContext(); - ElementCount VF = getVectorizedTypeVF(RetTy); - VecDesc const *VD = nullptr; - - if (!IsVectorCall) { - for (bool Masked : {false, true}) { - if ((VD = LibInfo->getVectorMappingInfo(LCName, VF, Masked))) - break; - } - if (!VD) - return std::nullopt; - } // Cost the call + mask. auto Cost = thisT()->getCallInstrCost(nullptr, RetTy, ICA.getArgTypes(), CostKind); - if ((VD && VD->isMasked()) || - (IsVectorCall && - RTLIB::RuntimeLibcallsInfo::hasVectorMaskArgument(LibcallImpl))) { + if (RTLIB::RuntimeLibcallsInfo::hasVectorMaskArgument(LibcallImpl)) { + ElementCount VF = getVectorizedTypeVF(RetTy); auto VecTy = VectorType::get(IntegerType::getInt1Ty(Ctx), VF); Cost += thisT()->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy, VecTy, {}, CostKind, 0, nullptr, {}); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 78d8ea0676dd7..a7ae794459331 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -1283,9 +1283,10 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl &Results) { break; } case ISD::FMODF: { - EVT VT = Node->getValueType(0).getVectorElementType(); + EVT VT = Node->getValueType(0); RTLIB::Libcall LC = RTLIB::getMODF(VT); - if (DAG.expandMultipleResultFPLibCall(LC, Node, Results, VT, + if (LC != RTLIB::UNKNOWN_LIBCALL && + DAG.expandMultipleResultFPLibCall(LC, Node, Results, VT, /*CallRetResNo=*/0)) return; break; diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 36a424f1c8b63..77d9b156e2672 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -475,6 +475,24 @@ RTLIB::Libcall RTLIB::getSINCOS_STRET(EVT RetVT) { } RTLIB::Libcall RTLIB::getMODF(EVT RetVT) { + // TODO: Tablegen should generate this function + if (RetVT.isVector()) { + if (!RetVT.isSimple()) + return RTLIB::UNKNOWN_LIBCALL; + switch (RetVT.getSimpleVT().SimpleTy) { + case MVT::v4f32: + return RTLIB::MODF_V4F32; + case MVT::v2f64: + return RTLIB::MODF_V2F64; + case MVT::nxv4f32: + return RTLIB::MODF_NXV4F32; + case MVT::nxv2f64: + return RTLIB::MODF_NXV2F64; + default: + return RTLIB::UNKNOWN_LIBCALL; + } + } + return getFPLibCall(RetVT, MODF_F32, MODF_F64, MODF_F80, MODF_F128, MODF_PPCF128); } From c1f18a2518898e5f593ce1341d19f61b5ee58cdd Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 11 Nov 2025 18:26:24 -0800 Subject: [PATCH 17/32] InferAddressSpaces: Add more baseline tests for assume handling (#167611) --- .../AMDGPU/builtin-assumed-addrspace.ll | 208 +++++++++++++++++- 1 file changed, 200 insertions(+), 8 deletions(-) diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/builtin-assumed-addrspace.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/builtin-assumed-addrspace.ll index e0c80c0389541..32dca860a7ded 100644 --- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/builtin-assumed-addrspace.ll +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/builtin-assumed-addrspace.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=infer-address-spaces -o - %s | FileCheck %s -define float @f0(ptr %p) { -; CHECK-LABEL: define float @f0( +define float @assume_is_shared_gep(ptr %p) { +; CHECK-LABEL: define float @assume_is_shared_gep( ; CHECK-SAME: ptr [[P:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[P]]) @@ -24,8 +24,8 @@ entry: ret float %load } -define float @f1(ptr %p) { -; CHECK-LABEL: define float @f1( +define float @assume_is_private_gep(ptr %p) { +; CHECK-LABEL: define float @assume_is_private_gep( ; CHECK-SAME: ptr [[P:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[P]]) @@ -47,8 +47,8 @@ entry: ret float %load } -define float @f2(ptr %p) { -; CHECK-LABEL: define float @f2( +define float @assume_not_private_and_not_shared_gep(ptr %p) { +; CHECK-LABEL: define float @assume_not_private_and_not_shared_gep( ; CHECK-SAME: ptr [[P:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[P]]) @@ -78,8 +78,8 @@ entry: ret float %load } -define float @g0(i32 %c, ptr %p) { -; CHECK-LABEL: define float @g0( +define float @conditionally_assume_is_shared_gep(i32 %c, ptr %p) { +; CHECK-LABEL: define float @conditionally_assume_is_shared_gep( ; CHECK-SAME: i32 [[C:%.*]], ptr [[P:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[C]], 0 @@ -127,6 +127,198 @@ if.end: ret float %add2 } +define float @conditionally_assume_is_shared_else_assume_private(i32 %c, ptr %p) { +; CHECK-LABEL: define float @conditionally_assume_is_shared_else_assume_private( +; CHECK-SAME: i32 [[C:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[C]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[IF_THEN_SHARED:.*]], label %[[IF_THEN_PRIVATE:.*]] +; CHECK: [[IF_THEN_SHARED]]: +; CHECK-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[P]]) +; CHECK-NEXT: tail call void @llvm.assume(i1 [[IS_SHARED]]) +; CHECK-NEXT: [[WORKITEM_ID_X_0:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() +; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[WORKITEM_ID_X_0]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(3) +; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds float, ptr addrspace(3) [[TMP0]], i64 [[IDXPROM]] +; CHECK-NEXT: [[LOAD0:%.*]] = load float, ptr addrspace(3) [[ARRAYIDX0]], align 4 +; CHECK-NEXT: [[ADD0:%.*]] = fadd float [[LOAD0]], 4.000000e+00 +; CHECK-NEXT: br label %[[IF_END:.*]] +; CHECK: [[IF_THEN_PRIVATE]]: +; CHECK-NEXT: [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[P]]) +; CHECK-NEXT: tail call void @llvm.assume(i1 [[IS_PRIVATE]]) +; CHECK-NEXT: [[WORKITEM_ID_X_1:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() +; CHECK-NEXT: [[IDXPROM1:%.*]] = zext i32 [[WORKITEM_ID_X_1]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(5) +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr addrspace(5) [[TMP1]], i64 [[IDXPROM1]] +; CHECK-NEXT: [[LOAD1:%.*]] = load float, ptr addrspace(5) [[ARRAYIDX1]], align 4 +; CHECK-NEXT: [[ADD1:%.*]] = fadd float [[LOAD1]], 4.000000e+00 +; CHECK-NEXT: br label %[[IF_END]] +; CHECK: [[IF_END]]: +; CHECK-NEXT: [[PHI:%.*]] = phi float [ [[ADD0]], %[[IF_THEN_SHARED]] ], [ [[ADD1]], %[[IF_THEN_PRIVATE]] ] +; CHECK-NEXT: ret float [[PHI]] +; +entry: + %tobool.not = icmp eq i32 %c, 0 + br i1 %tobool.not, label %if.then.shared, label %if.then.private + +if.then.shared: + %is.shared = call i1 @llvm.amdgcn.is.shared(ptr %p) + tail call void @llvm.assume(i1 %is.shared) + %workitem.id.x.0 = tail call i32 @llvm.amdgcn.workitem.id.x() + %idxprom = zext i32 %workitem.id.x.0 to i64 + %arrayidx0 = getelementptr inbounds float, ptr %p, i64 %idxprom + %load0 = load float, ptr %arrayidx0, align 4 + %add0 = fadd float %load0, 4.0 + br label %if.end + +if.then.private: + %is.private = call i1 @llvm.amdgcn.is.private(ptr %p) + tail call void @llvm.assume(i1 %is.private) + %workitem.id.x.1 = tail call i32 @llvm.amdgcn.workitem.id.x() + %idxprom1 = zext i32 %workitem.id.x.1 to i64 + %arrayidx1 = getelementptr inbounds float, ptr %p, i64 %idxprom1 + %load1 = load float, ptr %arrayidx1, align 4 + %add1 = fadd float %load1, 4.0 + br label %if.end + +if.end: + %phi = phi float [ %add0, %if.then.shared ], [ %add1, %if.then.private ] + ret float %phi +} + +define float @assume_func_arg_is_shared_load(ptr %flat.ptr) { +; CHECK-LABEL: define float @assume_func_arg_is_shared_load( +; CHECK-SAME: ptr [[FLAT_PTR:%.*]]) { +; CHECK-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[FLAT_PTR]]) +; CHECK-NEXT: tail call void @llvm.assume(i1 [[IS_SHARED]]) +; CHECK-NEXT: [[LOAD:%.*]] = load float, ptr [[FLAT_PTR]], align 4 +; CHECK-NEXT: ret float [[LOAD]] +; + %is.shared = call i1 @llvm.amdgcn.is.shared(ptr %flat.ptr) + tail call void @llvm.assume(i1 %is.shared) + %load = load float, ptr %flat.ptr, align 4 + ret float %load +} + +define float @assume_func_arg_is_private_load(ptr %flat.ptr) { +; CHECK-LABEL: define float @assume_func_arg_is_private_load( +; CHECK-SAME: ptr [[FLAT_PTR:%.*]]) { +; CHECK-NEXT: [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[FLAT_PTR]]) +; CHECK-NEXT: tail call void @llvm.assume(i1 [[IS_PRIVATE]]) +; CHECK-NEXT: [[LOAD:%.*]] = load float, ptr [[FLAT_PTR]], align 4 +; CHECK-NEXT: ret float [[LOAD]] +; + %is.private = call i1 @llvm.amdgcn.is.private(ptr %flat.ptr) + tail call void @llvm.assume(i1 %is.private) + %load = load float, ptr %flat.ptr, align 4 + ret float %load +} + +define float @assume_func_arg_is_not_shared_not_private(ptr %flat.ptr) { +; CHECK-LABEL: define float @assume_func_arg_is_not_shared_not_private( +; CHECK-SAME: ptr [[FLAT_PTR:%.*]]) { +; CHECK-NEXT: [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[FLAT_PTR]]) +; CHECK-NEXT: [[NOT_PRIVATE:%.*]] = xor i1 [[IS_PRIVATE]], true +; CHECK-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[FLAT_PTR]]) +; CHECK-NEXT: [[NOT_SHARED:%.*]] = xor i1 [[IS_SHARED]], true +; CHECK-NEXT: [[NOT_PRIVATE_AND_NOT_SHARED:%.*]] = and i1 [[NOT_PRIVATE]], [[NOT_SHARED]] +; CHECK-NEXT: tail call void @llvm.assume(i1 [[NOT_PRIVATE_AND_NOT_SHARED]]) +; CHECK-NEXT: [[LOAD:%.*]] = load float, ptr [[FLAT_PTR]], align 4 +; CHECK-NEXT: ret float [[LOAD]] +; + %is.private = call i1 @llvm.amdgcn.is.private(ptr %flat.ptr) + %not.private = xor i1 %is.private, true + %is.shared = call i1 @llvm.amdgcn.is.shared(ptr %flat.ptr) + %not.shared = xor i1 %is.shared, true + %not.private.and.not.shared = and i1 %not.private, %not.shared + tail call void @llvm.assume(i1 %not.private.and.not.shared) + %load = load float, ptr %flat.ptr, align 4 + ret float %load +} + +define float @assume_func_arg_is_not_private_load(ptr %flat.ptr) { +; CHECK-LABEL: define float @assume_func_arg_is_not_private_load( +; CHECK-SAME: ptr [[FLAT_PTR:%.*]]) { +; CHECK-NEXT: [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[FLAT_PTR]]) +; CHECK-NEXT: [[NOT_IS_PRIVATE:%.*]] = xor i1 [[IS_PRIVATE]], true +; CHECK-NEXT: tail call void @llvm.assume(i1 [[NOT_IS_PRIVATE]]) +; CHECK-NEXT: [[LOAD:%.*]] = load float, ptr [[FLAT_PTR]], align 4 +; CHECK-NEXT: ret float [[LOAD]] +; + %is.private = call i1 @llvm.amdgcn.is.private(ptr %flat.ptr) + %not.is.private = xor i1 %is.private, true + tail call void @llvm.assume(i1 %not.is.private) + %load = load float, ptr %flat.ptr, align 4 + ret float %load +} + +define i64 @assume_func_arg_is_not_private_atomicrmw(ptr %flat.ptr, i64 %val) { +; CHECK-LABEL: define i64 @assume_func_arg_is_not_private_atomicrmw( +; CHECK-SAME: ptr [[FLAT_PTR:%.*]], i64 [[VAL:%.*]]) { +; CHECK-NEXT: [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[FLAT_PTR]]) +; CHECK-NEXT: [[NOT_IS_PRIVATE:%.*]] = xor i1 [[IS_PRIVATE]], true +; CHECK-NEXT: tail call void @llvm.assume(i1 [[NOT_IS_PRIVATE]]) +; CHECK-NEXT: [[RMW:%.*]] = atomicrmw sub ptr [[FLAT_PTR]], i64 [[VAL]] seq_cst, align 4 +; CHECK-NEXT: ret i64 [[RMW]] +; + %is.private = call i1 @llvm.amdgcn.is.private(ptr %flat.ptr) + %not.is.private = xor i1 %is.private, true + tail call void @llvm.assume(i1 %not.is.private) + %rmw = atomicrmw sub ptr %flat.ptr, i64 %val seq_cst, align 4 + ret i64 %rmw +} + +define float @contradictory_assume_after_gep_same_block(ptr %p) { +; CHECK-LABEL: define float @contradictory_assume_after_gep_same_block( +; CHECK-SAME: ptr [[P:%.*]]) { +; CHECK-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[P]]) +; CHECK-NEXT: tail call void @llvm.assume(i1 [[IS_SHARED]]) +; CHECK-NEXT: [[WORKITEM_ID_X:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() +; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[WORKITEM_ID_X]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(3) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds float, ptr addrspace(3) [[TMP1]], i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[P]], i64 [[IDXPROM]] +; CHECK-NEXT: [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[TMP2]]) +; CHECK-NEXT: tail call void @llvm.assume(i1 false) +; CHECK-NEXT: [[LOAD:%.*]] = load float, ptr addrspace(3) [[GEP]], align 4 +; CHECK-NEXT: ret float [[LOAD]] +; + %is.shared = call i1 @llvm.amdgcn.is.shared(ptr %p) + tail call void @llvm.assume(i1 %is.shared) + %workitem.id.x = tail call i32 @llvm.amdgcn.workitem.id.x() + %idxprom = zext i32 %workitem.id.x to i64 + %gep = getelementptr inbounds float, ptr %p, i64 %idxprom + %is.private = call i1 @llvm.amdgcn.is.private(ptr %gep) + tail call void @llvm.assume(i1 %is.private) + %load = load float, ptr %gep, align 4 + ret float %load +} + +define float @contradictory_assume_argument_same_block(ptr %p) { +; CHECK-LABEL: define float @contradictory_assume_argument_same_block( +; CHECK-SAME: ptr [[P:%.*]]) { +; CHECK-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[P]]) +; CHECK-NEXT: [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[P]]) +; CHECK-NEXT: tail call void @llvm.assume(i1 [[IS_SHARED]]) +; CHECK-NEXT: tail call void @llvm.assume(i1 [[IS_PRIVATE]]) +; CHECK-NEXT: [[WORKITEM_ID_X:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() +; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[WORKITEM_ID_X]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(3) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds float, ptr addrspace(3) [[TMP1]], i64 [[IDXPROM]] +; CHECK-NEXT: [[LOAD:%.*]] = load float, ptr addrspace(3) [[GEP]], align 4 +; CHECK-NEXT: ret float [[LOAD]] +; + %is.shared = call i1 @llvm.amdgcn.is.shared(ptr %p) + %is.private = call i1 @llvm.amdgcn.is.private(ptr %p) + tail call void @llvm.assume(i1 %is.shared) + tail call void @llvm.assume(i1 %is.private) + %workitem.id.x = tail call i32 @llvm.amdgcn.workitem.id.x() + %idxprom = zext i32 %workitem.id.x to i64 + %gep = getelementptr inbounds float, ptr %p, i64 %idxprom + %load = load float, ptr %gep, align 4 + ret float %load +} + declare void @llvm.assume(i1) declare i1 @llvm.amdgcn.is.shared(ptr nocapture) declare i1 @llvm.amdgcn.is.private(ptr nocapture) From a100a6c97be0259c8d0b5758830cba14d4eb5c4d Mon Sep 17 00:00:00 2001 From: Timur Baydyusenov Date: Wed, 12 Nov 2025 05:29:40 +0300 Subject: [PATCH 18/32] [llvm][llvm-dis] Fix 'llvm-dis' with '--materialize-metadata --show-annotations' crashes (#167487) Added handling the case of a non-materialized module, also don't call printInfoComment for immaterializable values --- llvm/lib/IR/AsmWriter.cpp | 13 ++++++------ llvm/test/Assembler/metadata-annotations.ll | 22 +++++++++++++++++---- llvm/tools/llvm-dis/llvm-dis.cpp | 13 ++++++++++++ 3 files changed, 37 insertions(+), 11 deletions(-) diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index 0c8565c927a24..4d4ffe93a8067 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -2931,7 +2931,7 @@ class AssemblyWriter { // printInfoComment - Print a little comment after the instruction indicating // which slot it occupies. - void printInfoComment(const Value &V); + void printInfoComment(const Value &V, bool isMaterializable = false); // printGCRelocateComment - print comment after call to the gc.relocate // intrinsic indicating base and derived pointer names. @@ -3963,7 +3963,7 @@ void AssemblyWriter::printGlobal(const GlobalVariable *GV) { if (Attrs.hasAttributes()) Out << " #" << Machine.getAttributeGroupSlot(Attrs); - printInfoComment(*GV); + printInfoComment(*GV, GV->isMaterializable()); } void AssemblyWriter::printAlias(const GlobalAlias *GA) { @@ -4001,7 +4001,7 @@ void AssemblyWriter::printAlias(const GlobalAlias *GA) { Out << '"'; } - printInfoComment(*GA); + printInfoComment(*GA, GA->isMaterializable()); Out << '\n'; } @@ -4040,7 +4040,7 @@ void AssemblyWriter::printIFunc(const GlobalIFunc *GI) { printMetadataAttachments(MDs, ", "); } - printInfoComment(*GI); + printInfoComment(*GI, GI->isMaterializable()); Out << '\n'; } @@ -4319,13 +4319,12 @@ void AssemblyWriter::printGCRelocateComment(const GCRelocateInst &Relocate) { /// printInfoComment - Print a little comment after the instruction indicating /// which slot it occupies. -void AssemblyWriter::printInfoComment(const Value &V) { +void AssemblyWriter::printInfoComment(const Value &V, bool isMaterializable) { if (const auto *Relocate = dyn_cast(&V)) printGCRelocateComment(*Relocate); - if (AnnotationWriter) { + if (AnnotationWriter && !isMaterializable) AnnotationWriter->printInfoComment(V, Out); - } if (PrintInstDebugLocs) { if (auto *I = dyn_cast(&V)) { diff --git a/llvm/test/Assembler/metadata-annotations.ll b/llvm/test/Assembler/metadata-annotations.ll index 4fd471338cd0a..2a08a17849dbd 100644 --- a/llvm/test/Assembler/metadata-annotations.ll +++ b/llvm/test/Assembler/metadata-annotations.ll @@ -1,9 +1,23 @@ ; RUN: llvm-as < %s | llvm-dis --materialize-metadata --show-annotations | FileCheck %s +; CHECK: @global_var = global i32 1 +; CHECK: @alias = alias i32, ptr @global_var +; CHECK: @ifunc = ifunc i32 (), ptr @ifunc_resolver +@global_var = global i32 1 +@alias = alias i32, ptr @global_var +@ifunc = ifunc i32 (), ptr @ifunc_resolver + +; CHECK: ; Materializable +; CHECK-NEXT: define ptr @ifunc_resolver() {} +define ptr @ifunc_resolver() { + ret ptr @defined_function +} + ; CHECK: ; Materializable -; CHECK-NEXT: define dso_local i32 @test() {} -define dso_local i32 @test() { -entry: - ret i32 0 +; CHECK-NEXT: define void @defined_function() {} +define void @defined_function() { + ret void } +; CHECK: declare void @declared_function() +declare void @declared_function() diff --git a/llvm/tools/llvm-dis/llvm-dis.cpp b/llvm/tools/llvm-dis/llvm-dis.cpp index 35c540963a487..90ae3ef077ae9 100644 --- a/llvm/tools/llvm-dis/llvm-dis.cpp +++ b/llvm/tools/llvm-dis/llvm-dis.cpp @@ -101,13 +101,26 @@ static void printDebugLoc(const DebugLoc &DL, formatted_raw_ostream &OS) { } } class CommentWriter : public AssemblyAnnotationWriter { +private: + bool canSafelyAccessUses(const Value &V) { + // Can't safely access uses, if module not materialized. + const GlobalValue *GV = dyn_cast(&V); + return !GV || (GV->getParent() && GV->getParent()->isMaterialized()); + } + public: void emitFunctionAnnot(const Function *F, formatted_raw_ostream &OS) override { + if (!canSafelyAccessUses(*F)) + return; + OS << "; [#uses=" << F->getNumUses() << ']'; // Output # uses OS << '\n'; } void printInfoComment(const Value &V, formatted_raw_ostream &OS) override { + if (!canSafelyAccessUses(V)) + return; + bool Padded = false; if (!V.getType()->isVoidTy()) { OS.PadToColumn(50); From 3e6442a516deccc9b734a8bfc5904e726481cb36 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 11 Nov 2025 18:31:43 -0800 Subject: [PATCH 19/32] [WebAssembly] Use MCRegister::id(). NFC (#167609) --- .../WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp | 4 ++-- .../WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp index d8bfed9dc0390..651f631c1ee55 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp @@ -317,8 +317,8 @@ void WebAssemblyInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, const MCOperand &Op = MI->getOperand(OpNo); if (Op.isReg()) { const MCInstrDesc &Desc = MII.get(MI->getOpcode()); - unsigned WAReg = Op.getReg(); - if (int(WAReg) >= 0) + MCRegister WAReg = Op.getReg(); + if (int(WAReg.id()) >= 0) printRegName(O, WAReg); else if (OpNo >= Desc.getNumDefs() && !IsVariadicDef) O << "$pop" << WebAssembly::getWARegStackId(WAReg); diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h index fe9a4bada2430..5dc0e3aa91622 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h @@ -608,9 +608,9 @@ inline bool isLocalTee(unsigned Opc) { static const unsigned UnusedReg = -1u; // For a given stackified WAReg, return the id number to print with push/pop. -unsigned inline getWARegStackId(unsigned Reg) { - assert(Reg & INT32_MIN); - return Reg & INT32_MAX; +unsigned inline getWARegStackId(MCRegister Reg) { + assert(Reg.id() & INT32_MIN); + return Reg.id() & INT32_MAX; } } // end namespace WebAssembly From cf35502dd569d842a860696650a783f896db0648 Mon Sep 17 00:00:00 2001 From: Med Ismail Bennani Date: Tue, 11 Nov 2025 18:31:57 -0800 Subject: [PATCH 20/32] [libunwind] Fix execution flow imbalance when using C++ Exceptions (#165066) --- libunwind/src/Registers.hpp | 16 +- libunwind/src/UnwindCursor.hpp | 66 ++++++- libunwind/src/UnwindLevel1.c | 24 ++- libunwind/src/UnwindRegistersRestore.S | 15 +- libunwind/src/assembly.h | 4 + libunwind/src/config.h | 9 + libunwind/src/libunwind.cpp | 22 ++- libunwind/src/libunwind_ext.h | 6 +- .../Python/lldbsuite/test/decorators.py | 25 +++ .../unwind/libunwind_ret_injection/Makefile | 6 + .../TestLibUnwindRetInjection.py | 177 ++++++++++++++++++ .../unwind/libunwind_ret_injection/main.cpp | 45 +++++ 12 files changed, 396 insertions(+), 19 deletions(-) create mode 100644 lldb/test/API/functionalities/unwind/libunwind_ret_injection/Makefile create mode 100644 lldb/test/API/functionalities/unwind/libunwind_ret_injection/TestLibUnwindRetInjection.py create mode 100644 lldb/test/API/functionalities/unwind/libunwind_ret_injection/main.cpp diff --git a/libunwind/src/Registers.hpp b/libunwind/src/Registers.hpp index 9d4c8344150f6..28649fafb23d5 100644 --- a/libunwind/src/Registers.hpp +++ b/libunwind/src/Registers.hpp @@ -1832,8 +1832,9 @@ inline const char *Registers_ppc64::getRegisterName(int regNum) { /// Registers_arm64 holds the register state of a thread in a 64-bit arm /// process. class _LIBUNWIND_HIDDEN Registers_arm64; -extern "C" void __libunwind_Registers_arm64_jumpto(Registers_arm64 *); extern "C" int64_t __libunwind_Registers_arm64_za_disable(); +extern "C" void __libunwind_Registers_arm64_jumpto(Registers_arm64 *, + unsigned walkedFrames); #if defined(_LIBUNWIND_USE_GCS) extern "C" void *__libunwind_shstk_get_jump_target() { @@ -1861,10 +1862,17 @@ class _LIBUNWIND_HIDDEN Registers_arm64 { v128 getVectorRegister(int num) const; void setVectorRegister(int num, v128 value); static const char *getRegisterName(int num); - void jumpto() { - zaDisable(); - __libunwind_Registers_arm64_jumpto(this); +#ifdef _LIBUNWIND_TRACE_RET_INJECT + _LIBUNWIND_TRACE_NO_INLINE + void returnto(unsigned walkedFrames) { + __libunwind_Registers_arm64_jumpto(this, walkedFrames); + } +#else + void jumpto() { + zaDisable(); + __libunwind_Registers_arm64_jumpto(this, 0); } +#endif static constexpr int lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_ARM64; } diff --git a/libunwind/src/UnwindCursor.hpp b/libunwind/src/UnwindCursor.hpp index 7ec5f9e91578a..d7348254af07b 100644 --- a/libunwind/src/UnwindCursor.hpp +++ b/libunwind/src/UnwindCursor.hpp @@ -472,7 +472,9 @@ class _LIBUNWIND_HIDDEN AbstractUnwindCursor { virtual void getInfo(unw_proc_info_t *) { _LIBUNWIND_ABORT("getInfo not implemented"); } - virtual void jumpto() { _LIBUNWIND_ABORT("jumpto not implemented"); } + _LIBUNWIND_TRACE_NO_INLINE virtual void jumpto() { + _LIBUNWIND_ABORT("jumpto not implemented"); + } virtual bool isSignalFrame() { _LIBUNWIND_ABORT("isSignalFrame not implemented"); } @@ -489,6 +491,12 @@ class _LIBUNWIND_HIDDEN AbstractUnwindCursor { virtual void saveVFPAsX() { _LIBUNWIND_ABORT("saveVFPAsX not implemented"); } #endif +#ifdef _LIBUNWIND_TRACE_RET_INJECT + virtual void setWalkedFrames(unsigned) { + _LIBUNWIND_ABORT("setWalkedFrames not implemented"); + } +#endif + #ifdef _AIX virtual uintptr_t getDataRelBase() { _LIBUNWIND_ABORT("getDataRelBase not implemented"); @@ -965,7 +973,8 @@ class UnwindCursor : public AbstractUnwindCursor{ virtual void setFloatReg(int, unw_fpreg_t); virtual int step(bool stage2 = false); virtual void getInfo(unw_proc_info_t *); - virtual void jumpto(); + _LIBUNWIND_TRACE_NO_INLINE + virtual void jumpto(); virtual bool isSignalFrame(); virtual bool getFunctionName(char *buf, size_t len, unw_word_t *off); virtual void setInfoBasedOnIPRegister(bool isReturnAddress = false); @@ -974,6 +983,10 @@ class UnwindCursor : public AbstractUnwindCursor{ virtual void saveVFPAsX(); #endif +#ifdef _LIBUNWIND_TRACE_RET_INJECT + virtual void setWalkedFrames(unsigned); +#endif + #ifdef _AIX virtual uintptr_t getDataRelBase(); #endif @@ -1356,6 +1369,9 @@ class UnwindCursor : public AbstractUnwindCursor{ defined(_LIBUNWIND_TARGET_HAIKU) bool _isSigReturn = false; #endif +#ifdef _LIBUNWIND_TRACE_RET_INJECT + uint32_t _walkedFrames; +#endif }; @@ -1410,7 +1426,46 @@ void UnwindCursor::setFloatReg(int regNum, unw_fpreg_t value) { } template void UnwindCursor::jumpto() { +#ifdef _LIBUNWIND_TRACE_RET_INJECT + /* + + The value of `_walkedFrames` is computed in `unwind_phase2` and represents the + number of frames walked starting `unwind_phase2` to get to the landing pad. + + ``` + // uc is initialized by __unw_getcontext in the parent frame. + // The first stack frame walked is unwind_phase2. + unsigned framesWalked = 1; + ``` + + To that, we need to add the number of function calls in libunwind between + `unwind_phase2` & `__libunwind_Registers_arm64_jumpto` which performs the long + jump, to rebalance the execution flow. + + ``` + frame #0: libunwind.1.dylib`__libunwind_Registers_arm64_jumpto at UnwindRegistersRestore.S:646 + frame #1: libunwind.1.dylib`libunwind::Registers_arm64::returnto at Registers.hpp:2291:3 + frame #2: libunwind.1.dylib`libunwind::UnwindCursor::jumpto at UnwindCursor.hpp:1474:14 + frame #3: libunwind.1.dylib`__unw_resume at libunwind.cpp:375:7 + frame #4: libunwind.1.dylib`__unw_resume_with_frames_walked at libunwind.cpp:363:10 + frame #5: libunwind.1.dylib`unwind_phase2 at UnwindLevel1.c:328:9 + frame #6: libunwind.1.dylib`_Unwind_RaiseException at UnwindLevel1.c:480:10 + frame #7: libc++abi.dylib`__cxa_throw at cxa_exception.cpp:295:5 + ... + ``` + + If we look at the backtrace from `__libunwind_Registers_arm64_jumpto`, we see + there are 5 frames on the stack to reach `unwind_phase2`. However, only 4 of + them will never return, since `__libunwind_Registers_arm64_jumpto` returns + back to the landing pad, so we need to subtract 1 to the number of + `_EXTRA_LIBUNWIND_FRAMES_WALKED`. + */ + + static constexpr size_t _EXTRA_LIBUNWIND_FRAMES_WALKED = 5 - 1; + _registers.returnto(_walkedFrames + _EXTRA_LIBUNWIND_FRAMES_WALKED); +#else _registers.jumpto(); +#endif } #ifdef __arm__ @@ -1419,6 +1474,13 @@ template void UnwindCursor::saveVFPAsX() { } #endif +#ifdef _LIBUNWIND_TRACE_RET_INJECT +template +void UnwindCursor::setWalkedFrames(unsigned walkedFrames) { + _walkedFrames = walkedFrames; +} +#endif + #ifdef _AIX template uintptr_t UnwindCursor::getDataRelBase() { diff --git a/libunwind/src/UnwindLevel1.c b/libunwind/src/UnwindLevel1.c index b0cd60dfb9141..79398bac8b531 100644 --- a/libunwind/src/UnwindLevel1.c +++ b/libunwind/src/UnwindLevel1.c @@ -48,16 +48,15 @@ // avoided when invoking the `jumpto()` function. To do this, we use inline // assemblies to "goto" the `jumpto()` for these architectures. #if !defined(_LIBUNWIND_USE_CET) && !defined(_LIBUNWIND_USE_GCS) -#define __unw_phase2_resume(cursor, fn) \ +#define __unw_phase2_resume(cursor, payload) \ do { \ - (void)fn; \ - __unw_resume((cursor)); \ + __unw_resume_with_frames_walked((cursor), (payload)); \ } while (0) #elif defined(_LIBUNWIND_TARGET_I386) #define __shstk_step_size (4) -#define __unw_phase2_resume(cursor, fn) \ +#define __unw_phase2_resume(cursor, payload) \ do { \ - _LIBUNWIND_POP_SHSTK_SSP((fn)); \ + _LIBUNWIND_POP_SHSTK_SSP((payload)); \ void *shstkRegContext = __libunwind_shstk_get_registers((cursor)); \ void *shstkJumpAddress = __libunwind_shstk_get_jump_target(); \ __asm__ volatile("push %%edi\n\t" \ @@ -67,9 +66,9 @@ } while (0) #elif defined(_LIBUNWIND_TARGET_X86_64) #define __shstk_step_size (8) -#define __unw_phase2_resume(cursor, fn) \ +#define __unw_phase2_resume(cursor, payload) \ do { \ - _LIBUNWIND_POP_SHSTK_SSP((fn)); \ + _LIBUNWIND_POP_SHSTK_SSP((payload)); \ void *shstkRegContext = __libunwind_shstk_get_registers((cursor)); \ void *shstkJumpAddress = __libunwind_shstk_get_jump_target(); \ __asm__ volatile("jmpq *%%rdx\n\t" ::"D"(shstkRegContext), \ @@ -77,16 +76,17 @@ } while (0) #elif defined(_LIBUNWIND_TARGET_AARCH64) #define __shstk_step_size (8) -#define __unw_phase2_resume(cursor, fn) \ +#define __unw_phase2_resume(cursor, payload) \ do { \ - _LIBUNWIND_POP_SHSTK_SSP((fn)); \ + _LIBUNWIND_POP_SHSTK_SSP((payload)); \ void *shstkRegContext = __libunwind_shstk_get_registers((cursor)); \ void *shstkJumpAddress = __libunwind_shstk_get_jump_target(); \ __asm__ volatile("mov x0, %0\n\t" \ + "mov x1, wzr\n\t" \ "br %1\n\t" \ : \ : "r"(shstkRegContext), "r"(shstkJumpAddress) \ - : "x0"); \ + : "x0", "x1"); \ } while (0) #endif @@ -205,6 +205,8 @@ extern int __unw_step_stage2(unw_cursor_t *); #if defined(_LIBUNWIND_USE_GCS) // Enable the GCS target feature to permit gcspop instructions to be used. __attribute__((target("+gcs"))) +#else +_LIBUNWIND_TRACE_NO_INLINE #endif static _Unwind_Reason_Code unwind_phase2(unw_context_t *uc, unw_cursor_t *cursor, @@ -349,6 +351,8 @@ unwind_phase2(unw_context_t *uc, unw_cursor_t *cursor, #if defined(_LIBUNWIND_USE_GCS) // Enable the GCS target feature to permit gcspop instructions to be used. __attribute__((target("+gcs"))) +#else +_LIBUNWIND_TRACE_NO_INLINE #endif static _Unwind_Reason_Code unwind_phase2_forced(unw_context_t *uc, unw_cursor_t *cursor, diff --git a/libunwind/src/UnwindRegistersRestore.S b/libunwind/src/UnwindRegistersRestore.S index fd306ed8c5230..76a80344034f7 100644 --- a/libunwind/src/UnwindRegistersRestore.S +++ b/libunwind/src/UnwindRegistersRestore.S @@ -645,13 +645,26 @@ Lnovec: #endif // -// extern "C" void __libunwind_Registers_arm64_jumpto(Registers_arm64 *); +// extern "C" void __libunwind_Registers_arm64_jumpto(Registers_arm64 *, unsigned); // // On entry: // thread_state pointer is in x0 +// walked_frames counter is in x1 // .p2align 2 DEFINE_LIBUNWIND_FUNCTION(__libunwind_Registers_arm64_jumpto) + + #if defined(_LIBUNWIND_TRACE_RET_INJECT) + cbz w1, 1f + 0: + subs w1, w1, #1 + adr x16, #8 + ret x16 + + b.ne 0b + 1: + #endif + // skip restore of x0,x1 for now ldp x2, x3, [x0, #0x010] ldp x4, x5, [x0, #0x020] diff --git a/libunwind/src/assembly.h b/libunwind/src/assembly.h index f0fcd006f2073..84c9d526f1d75 100644 --- a/libunwind/src/assembly.h +++ b/libunwind/src/assembly.h @@ -132,6 +132,10 @@ #if defined(__APPLE__) +#if defined(__aarch64__) || defined(__arm64__) || defined(__arm64e__) +#define _LIBUNWIND_TRACE_RET_INJECT 1 +#endif + #define SYMBOL_IS_FUNC(name) #define HIDDEN_SYMBOL(name) .private_extern name #if defined(_LIBUNWIND_HIDE_SYMBOLS) diff --git a/libunwind/src/config.h b/libunwind/src/config.h index deb5a4d4d73d4..f017403fa2234 100644 --- a/libunwind/src/config.h +++ b/libunwind/src/config.h @@ -28,6 +28,9 @@ #define _LIBUNWIND_SUPPORT_COMPACT_UNWIND 1 #define _LIBUNWIND_SUPPORT_DWARF_UNWIND 1 #endif + #if defined(__aarch64__) || defined(__arm64__) || defined(__arm64e__) + #define _LIBUNWIND_TRACE_RET_INJECT 1 + #endif #elif defined(_WIN32) #ifdef __SEH__ #define _LIBUNWIND_SUPPORT_SEH_UNWIND 1 @@ -61,6 +64,12 @@ #endif #endif +#ifdef _LIBUNWIND_TRACE_RET_INJECT +#define _LIBUNWIND_TRACE_NO_INLINE __attribute__((noinline, disable_tail_calls)) +#else +#define _LIBUNWIND_TRACE_NO_INLINE +#endif + #if defined(_LIBUNWIND_HIDE_SYMBOLS) // The CMake file passes -fvisibility=hidden to control ELF/Mach-O visibility. #define _LIBUNWIND_EXPORT diff --git a/libunwind/src/libunwind.cpp b/libunwind/src/libunwind.cpp index 951d87db868bc..3a94b6cf0cc5c 100644 --- a/libunwind/src/libunwind.cpp +++ b/libunwind/src/libunwind.cpp @@ -247,7 +247,27 @@ _LIBUNWIND_HIDDEN int __unw_get_proc_info(unw_cursor_t *cursor, } _LIBUNWIND_WEAK_ALIAS(__unw_get_proc_info, unw_get_proc_info) -/// Resume execution at cursor position (aka longjump). +/// Rebalance the execution flow by injecting the right amount of `ret` +/// instruction relatively to the amount of `walkedFrames` then resume execution +/// at cursor position (aka longjump). +_LIBUNWIND_HIDDEN int __unw_resume_with_frames_walked(unw_cursor_t *cursor, + unsigned walkedFrames) { + _LIBUNWIND_TRACE_API("__unw_resume(cursor=%p, walkedFrames=%u)", + static_cast(cursor), walkedFrames); +#if __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__) + // Inform the ASan runtime that now might be a good time to clean stuff up. + __asan_handle_no_return(); +#endif +#ifdef _LIBUNWIND_TRACE_RET_INJECT + AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; + co->setWalkedFrames(walkedFrames); +#endif + return __unw_resume(cursor); +} +_LIBUNWIND_WEAK_ALIAS(__unw_resume_with_frames_walked, + unw_resume_with_frames_walked) + +/// Legacy function. Resume execution at cursor position (aka longjump). _LIBUNWIND_HIDDEN int __unw_resume(unw_cursor_t *cursor) { _LIBUNWIND_TRACE_API("__unw_resume(cursor=%p)", static_cast(cursor)); #if __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__) diff --git a/libunwind/src/libunwind_ext.h b/libunwind/src/libunwind_ext.h index 28db43a4f6eef..900e8101f81f1 100644 --- a/libunwind/src/libunwind_ext.h +++ b/libunwind/src/libunwind_ext.h @@ -30,7 +30,11 @@ extern int __unw_get_reg(unw_cursor_t *, unw_regnum_t, unw_word_t *); extern int __unw_get_fpreg(unw_cursor_t *, unw_regnum_t, unw_fpreg_t *); extern int __unw_set_reg(unw_cursor_t *, unw_regnum_t, unw_word_t); extern int __unw_set_fpreg(unw_cursor_t *, unw_regnum_t, unw_fpreg_t); -extern int __unw_resume(unw_cursor_t *); +_LIBUNWIND_TRACE_NO_INLINE + extern int __unw_resume_with_frames_walked(unw_cursor_t *, unsigned); +// `__unw_resume` is a legacy function. Use `__unw_resume_with_frames_walked` instead. +_LIBUNWIND_TRACE_NO_INLINE + extern int __unw_resume(unw_cursor_t *); #ifdef __arm__ /* Save VFP registers in FSTMX format (instead of FSTMD). */ diff --git a/lldb/packages/Python/lldbsuite/test/decorators.py b/lldb/packages/Python/lldbsuite/test/decorators.py index 454196e1b0264..23d2165e07f7e 100644 --- a/lldb/packages/Python/lldbsuite/test/decorators.py +++ b/lldb/packages/Python/lldbsuite/test/decorators.py @@ -647,6 +647,31 @@ def is_out_of_tree_debugserver(): return skipTestIfFn(is_out_of_tree_debugserver)(func) +def skipIfOutOfTreeLibunwind(func): + """Decorate the item to skip tests if libunwind was not built in-tree.""" + + def is_out_of_tree_libunwind(): + if not configuration.llvm_tools_dir: + return "out-of-tree libunwind" + + # llvm_tools_dir is typically /bin, so lib is a sibling. + llvm_lib_dir = os.path.join( + os.path.dirname(configuration.llvm_tools_dir), "lib" + ) + + if not os.path.isdir(llvm_lib_dir): + return "out-of-tree libunwind" + + # Check for libunwind library (any extension). + for filename in os.listdir(llvm_lib_dir): + if filename.startswith("libunwind.") or filename.startswith("unwind."): + return None + + return "out-of-tree libunwind" + + return skipTestIfFn(is_out_of_tree_libunwind)(func) + + def skipIfRemote(func): """Decorate the item to skip tests if testing remotely.""" return unittest.skipIf(lldb.remote_platform, "skip on remote platform")(func) diff --git a/lldb/test/API/functionalities/unwind/libunwind_ret_injection/Makefile b/lldb/test/API/functionalities/unwind/libunwind_ret_injection/Makefile new file mode 100644 index 0000000000000..4698eaa815b83 --- /dev/null +++ b/lldb/test/API/functionalities/unwind/libunwind_ret_injection/Makefile @@ -0,0 +1,6 @@ +CXX_SOURCES := main.cpp + +# Build with C++ exceptions enabled +CXXFLAGS := -g -O0 -fexceptions + +include Makefile.rules diff --git a/lldb/test/API/functionalities/unwind/libunwind_ret_injection/TestLibUnwindRetInjection.py b/lldb/test/API/functionalities/unwind/libunwind_ret_injection/TestLibUnwindRetInjection.py new file mode 100644 index 0000000000000..e03234d1b5077 --- /dev/null +++ b/lldb/test/API/functionalities/unwind/libunwind_ret_injection/TestLibUnwindRetInjection.py @@ -0,0 +1,177 @@ +""" +Test that libunwind correctly injects 'ret' instructions to rebalance execution flow +when unwinding C++ exceptions. This is important for Apple Processor Trace analysis. +""" + +import lldb +import os +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil +from lldbsuite.test import configuration + + +class LibunwindRetInjectionTestCase(TestBase): + @skipIf(archs=no_match(["arm64", "arm64e", "aarch64"])) + @skipUnlessDarwin + @skipIfOutOfTreeLibunwind + def test_ret_injection_on_exception_unwind(self): + """Test that __libunwind_Registers_arm64_jumpto receives correct walkedFrames count and injects the right number of ret instructions.""" + self.build() + + exe = self.getBuildArtifact("a.out") + target = self.dbg.CreateTarget(exe) + self.assertTrue(target, VALID_TARGET) + + # Find the just-built libunwind, not the system one. + # llvm_tools_dir is typically /bin, so lib is a sibling. + self.assertIsNotNone( + configuration.llvm_tools_dir, + "llvm_tools_dir must be set to find in-tree libunwind", + ) + + llvm_lib_dir = os.path.join( + os.path.dirname(configuration.llvm_tools_dir), "lib" + ) + + # Find the libunwind library (platform-agnostic). + libunwind_path = None + for filename in os.listdir(llvm_lib_dir): + if filename.startswith("libunwind.") or filename.startswith("unwind."): + libunwind_path = os.path.join(llvm_lib_dir, filename) + break + + self.assertIsNotNone( + libunwind_path, f"Could not find libunwind in {llvm_lib_dir}" + ) + + # Set breakpoint in __libunwind_Registers_arm64_jumpto. + # This is the function that performs the actual jump and ret injection. + bp = target.BreakpointCreateByName("__libunwind_Registers_arm64_jumpto") + self.assertTrue(bp.IsValid()) + self.assertGreater(bp.GetNumLocations(), 0) + + # Set up DYLD_INSERT_LIBRARIES to use the just-built libunwind. + launch_info = lldb.SBLaunchInfo(None) + env = target.GetEnvironment() + env.Set("DYLD_INSERT_LIBRARIES", libunwind_path, True) + launch_info.SetEnvironment(env, False) + + # Launch the process with our custom libunwind. + error = lldb.SBError() + process = target.Launch(launch_info, error) + self.assertSuccess( + error, f"Failed to launch process with libunwind at {libunwind_path}" + ) + self.assertTrue(process, PROCESS_IS_VALID) + + # We should hit the breakpoint in __libunwind_Registers_arm64_jumpto + # during the exception unwinding phase 2. + threads = lldbutil.get_threads_stopped_at_breakpoint(process, bp) + self.assertEqual(len(threads), 1, "Should have stopped at breakpoint") + + thread = threads[0] + frame = thread.GetFrameAtIndex(0) + + # Verify we're in __libunwind_Registers_arm64_jumpto. + function_name = frame.GetFunctionName() + self.assertTrue( + "__libunwind_Registers_arm64_jumpto" in function_name, + f"Expected to be in __libunwind_Registers_arm64_jumpto, got {function_name}", + ) + + # On ARM64, the walkedFrames parameter should be in register x1 (second parameter). + # According to the ARM64 calling convention, integer arguments are passed in x0-x7. + # x0 = Registers_arm64* pointer. + # x1 = unsigned walkedFrames. + error = lldb.SBError() + x1_value = frame.register["x1"].GetValueAsUnsigned(error) + self.assertSuccess(error, "Failed to read x1 register") + + # According to the code in UnwindCursor.hpp, the walkedFrames value represents: + # 1. The number of frames walked in unwind_phase2 to reach the landing pad. + # 2. Plus _EXTRA_LIBUNWIND_FRAMES_WALKED = 5 - 1 = 4 additional libunwind frames. + # + # From the comment in the code: + # frame #0: __libunwind_Registers_arm64_jumpto + # frame #1: Registers_arm64::returnto + # frame #2: UnwindCursor::jumpto + # frame #3: __unw_resume + # frame #4: __unw_resume_with_frames_walked + # frame #5: unwind_phase2 + # + # Since __libunwind_Registers_arm64_jumpto returns to the landing pad, + # we subtract 1, so _EXTRA_LIBUNWIND_FRAMES_WALKED = 4. + # + # For our test program: + # - unwind_phase2 starts walking (frame 0 counted here). + # - Walks through: func_d (throw site), func_c, func_b, func_a. + # - Finds landing pad in main. + # That's approximately 4-5 frames from the user code. + # Plus the 4 extra libunwind frames. + # + # So we expect x1 to be roughly 8-10. + expected_min_frames = 8 + expected_max_frames = 13 # Allow some variation for libc++abi frames. + + self.assertGreaterEqual( + x1_value, + expected_min_frames, + f"walkedFrames (x1) should be >= {expected_min_frames}, got {x1_value}. " + "This is the number of 'ret' instructions that will be executed.", + ) + + self.assertLessEqual( + x1_value, + expected_max_frames, + f"walkedFrames (x1) should be <= {expected_max_frames}, got {x1_value}. " + "Value seems too high.", + ) + + # Now step through the ret injection loop and count the actual number of 'ret' executions. + # The loop injects exactly x1_value ret instructions before continuing with register restoration. + # We step until we hit the first 'ldp' instruction (register restoration starts with 'ldp x2, x3, [x0, #0x010]'). + ret_executed_count = 0 + max_steps = 100 # Safety limit to prevent infinite loops. + + for step_count in range(max_steps): + # Get current instruction. + pc = frame.GetPC() + inst = process.ReadMemory(pc, 4, lldb.SBError()) + + # Disassemble current instruction. + current_inst = target.GetInstructions(lldb.SBAddress(pc, target), inst)[0] + mnemonic = current_inst.GetMnemonic(target) + operands = current_inst.GetOperands(target) + + # Check if we've reached the register restoration part (first ldp after the loop). + if mnemonic == "ldp": + # We've exited the ret injection loop. + break + + # Count 'ret' instructions that get executed. + if mnemonic == "ret": + self.assertEqual(operands, "x16") + ret_executed_count += 1 + + # Step one instruction. + thread.StepInstruction(False) # False = step over. + + # Update frame reference. + frame = thread.GetFrameAtIndex(0) + + # Verify we didn't hit the safety limit. + self.assertLess( + step_count, + max_steps - 1, + f"Stepped {max_steps} times without reaching 'ldp' instruction. Something is wrong.", + ) + + # The number of executed 'ret' instructions should match x1_value. + # According to the implementation, the loop executes exactly x1_value times. + self.assertEqual( + ret_executed_count, + x1_value, + f"Expected {x1_value} 'ret' instructions to be executed (matching x1 register), " + f"but counted {ret_executed_count} executed 'ret' instructions.", + ) diff --git a/lldb/test/API/functionalities/unwind/libunwind_ret_injection/main.cpp b/lldb/test/API/functionalities/unwind/libunwind_ret_injection/main.cpp new file mode 100644 index 0000000000000..00685e4d6b137 --- /dev/null +++ b/lldb/test/API/functionalities/unwind/libunwind_ret_injection/main.cpp @@ -0,0 +1,45 @@ +// Test program to verify libunwind ret injection feature for execution flow +// rebalancing. +// +// This test creates a multi-frame call stack and throws a C++ exception to +// trigger libunwind's two-phase exception handling. The test verifies that +// libunwind correctly injects the right amount of 'ret' instructions to +// rebalance the execution flow when returning to the landing pad, which is +// important for Apple Processor Trace analysis. + +#include +#include +#include + +// Marker functions with noinline to ensure they appear in the stack. +static void __attribute__((noinline)) func_d() { + printf("In func_d, about to throw exception\n"); + throw std::runtime_error("test exception"); +} + +static void __attribute__((noinline)) func_c() { + printf("In func_c\n"); + func_d(); +} + +static void __attribute__((noinline)) func_b() { + printf("In func_b\n"); + func_c(); +} + +static void __attribute__((noinline)) func_a() { + printf("In func_a\n"); + func_b(); +} + +int main(int argc, char *argv[]) { + try { + printf("In main, about to call func_a\n"); + func_a(); + printf("ERROR: Should not reach here\n"); + return 1; + } catch (const std::exception &e) { + printf("Caught exception in main: %s\n", e.what()); + return 0; + } +} From 95f2728b5cdaf03c5f0c13983903f8e7b50b22b4 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 11 Nov 2025 18:47:28 -0800 Subject: [PATCH 21/32] DAG: Stop using TargetLibraryInfo for multi-result FP intrinsic codegen (#166987) Only use RuntimeLibcallsInfo. Remove the helper functions used to transition. --- llvm/include/llvm/CodeGen/SelectionDAG.h | 10 +--- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 4 +- .../SelectionDAG/LegalizeFloatTypes.cpp | 3 +- .../SelectionDAG/LegalizeVectorOps.cpp | 4 +- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 60 ++++--------------- 5 files changed, 16 insertions(+), 65 deletions(-) diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index 62d2f222110e4..5b331e4444915 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1725,17 +1725,9 @@ class SelectionDAG { /// value. LLVM_ABI bool expandMultipleResultFPLibCall(RTLIB::Libcall LC, SDNode *Node, - SmallVectorImpl &Results, EVT CallType, + SmallVectorImpl &Results, std::optional CallRetResNo = {}); - // FIXME: Ths should be removed, and form using RTLIB::Libcall should be - // preferred. Callers should resolve the exact type libcall to use. - LLVM_ABI bool - expandMultipleResultFPLibCall(StringRef LibcallName, CallingConv::ID CC, - SDNode *Node, SmallVectorImpl &Results, - std::optional CallRetResNo = {}, - bool IsVectorMasked = false); - /// Expand the specified \c ISD::VAARG node as the Legalize pass would. LLVM_ABI SDValue expandVAArg(SDNode *Node); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index a0baf821698a8..3ed84af6a8717 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -4842,7 +4842,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { RTLIB::Libcall LC = Node->getOpcode() == ISD::FSINCOS ? RTLIB::getSINCOS(VT) : RTLIB::getSINCOSPI(VT); - bool Expanded = DAG.expandMultipleResultFPLibCall(LC, Node, Results, VT); + bool Expanded = DAG.expandMultipleResultFPLibCall(LC, Node, Results); if (!Expanded) { DAG.getContext()->emitError(Twine("no libcall available for ") + Node->getOperationName(&DAG)); @@ -4940,7 +4940,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { EVT VT = Node->getValueType(0); RTLIB::Libcall LC = Node->getOpcode() == ISD::FMODF ? RTLIB::getMODF(VT) : RTLIB::getFREXP(VT); - bool Expanded = DAG.expandMultipleResultFPLibCall(LC, Node, Results, VT, + bool Expanded = DAG.expandMultipleResultFPLibCall(LC, Node, Results, /*CallRetResNo=*/0); if (!Expanded) llvm_unreachable("Expected scalar FFREXP/FMODF to expand to libcall!"); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 29c4dac12a81a..58983cb57d7f6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -1726,8 +1726,7 @@ void DAGTypeLegalizer::ExpandFloatRes_UnaryWithTwoFPResults( SDNode *N, RTLIB::Libcall LC, std::optional CallRetResNo) { assert(!N->isStrictFPOpcode() && "strictfp not implemented"); SmallVector Results; - DAG.expandMultipleResultFPLibCall(LC, N, Results, N->getValueType(0), - CallRetResNo); + DAG.expandMultipleResultFPLibCall(LC, N, Results, CallRetResNo); for (auto [ResNo, Res] : enumerate(Results)) { SDValue Lo, Hi; GetPairElements(Res, Lo, Hi); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index a7ae794459331..c55e55df373e9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -1275,7 +1275,7 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl &Results) { ? RTLIB::getSINCOS(VT) : RTLIB::getSINCOSPI(VT); if (LC != RTLIB::UNKNOWN_LIBCALL && - DAG.expandMultipleResultFPLibCall(LC, Node, Results, VT)) + DAG.expandMultipleResultFPLibCall(LC, Node, Results)) return; // TODO: Try to see if there's a narrower call available to use before @@ -1286,7 +1286,7 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl &Results) { EVT VT = Node->getValueType(0); RTLIB::Libcall LC = RTLIB::getMODF(VT); if (LC != RTLIB::UNKNOWN_LIBCALL && - DAG.expandMultipleResultFPLibCall(LC, Node, Results, VT, + DAG.expandMultipleResultFPLibCall(LC, Node, Results, /*CallRetResNo=*/0)) return; break; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index b5d502b90c90c..f05266967fb68 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2514,56 +2514,14 @@ static bool canFoldStoreIntoLibCallOutputPointers(StoreSDNode *StoreNode, bool SelectionDAG::expandMultipleResultFPLibCall( RTLIB::Libcall LC, SDNode *Node, SmallVectorImpl &Results, - EVT CallVT, std::optional CallRetResNo) { + std::optional CallRetResNo) { if (LC == RTLIB::UNKNOWN_LIBCALL) return false; - EVT VT = Node->getValueType(0); - - RTLIB::LibcallImpl Impl = TLI->getLibcallImpl(LC); - if (Impl == RTLIB::Unsupported) - return false; - - StringRef LCName = TLI->getLibcallImplName(Impl); - - // FIXME: This should not use TargetLibraryInfo. There should be - // RTLIB::Libcall entries for each used vector type, and directly matched. - auto getVecDesc = [&]() -> VecDesc const * { - for (bool Masked : {false, true}) { - if (VecDesc const *VD = getLibInfo().getVectorMappingInfo( - LCName, VT.getVectorElementCount(), Masked)) { - return VD; - } - } - return nullptr; - }; - - // For vector types, we must find a vector mapping for the libcall. - VecDesc const *VD = nullptr; - if (VT.isVector() && !CallVT.isVector() && !(VD = getVecDesc())) + RTLIB::LibcallImpl LibcallImpl = TLI->getLibcallImpl(LC); + if (LibcallImpl == RTLIB::Unsupported) return false; - bool IsMasked = (VD && VD->isMasked()) || - RTLIB::RuntimeLibcallsInfo::hasVectorMaskArgument(Impl); - - // This wrapper function exists because getVectorMappingInfo works in terms of - // function names instead of RTLIB enums. - - // FIXME: If we used a vector mapping, this assumes the calling convention of - // the vector function is the same as the scalar. - - StringRef Name = VD ? VD->getVectorFnName() : LCName; - - return expandMultipleResultFPLibCall(Name, - TLI->getLibcallImplCallingConv(Impl), - Node, Results, CallRetResNo, IsMasked); -} - -// FIXME: This belongs in TargetLowering -bool SelectionDAG::expandMultipleResultFPLibCall( - StringRef Name, CallingConv::ID CC, SDNode *Node, - SmallVectorImpl &Results, std::optional CallRetResNo, - bool IsMasked) { LLVMContext &Ctx = *getContext(); EVT VT = Node->getValueType(0); unsigned NumResults = Node->getNumValues(); @@ -2624,8 +2582,8 @@ bool SelectionDAG::expandMultipleResultFPLibCall( SDLoc DL(Node); - // Pass the vector mask (if required). - if (IsMasked) { + if (RTLIB::RuntimeLibcallsInfo::hasVectorMaskArgument(LibcallImpl)) { + // Pass the vector mask (if required). EVT MaskVT = TLI->getSetCCResultType(getDataLayout(), Ctx, VT); SDValue Mask = getBoolConstant(true, DL, MaskVT, VT); Args.emplace_back(Mask, MaskVT.getTypeForEVT(Ctx)); @@ -2636,10 +2594,12 @@ bool SelectionDAG::expandMultipleResultFPLibCall( : Type::getVoidTy(Ctx); SDValue InChain = StoresInChain ? StoresInChain : getEntryNode(); SDValue Callee = - getExternalSymbol(Name.data(), TLI->getPointerTy(getDataLayout())); + getExternalSymbol(TLI->getLibcallImplName(LibcallImpl).data(), + TLI->getPointerTy(getDataLayout())); TargetLowering::CallLoweringInfo CLI(*this); - CLI.setDebugLoc(DL).setChain(InChain).setLibCallee(CC, RetType, Callee, - std::move(Args)); + CLI.setDebugLoc(DL).setChain(InChain).setLibCallee( + TLI->getLibcallImplCallingConv(LibcallImpl), RetType, Callee, + std::move(Args)); auto [Call, CallChain] = TLI->LowerCallTo(CLI); From 389a23c538e33b30903b4e733f5d30f629199829 Mon Sep 17 00:00:00 2001 From: William Tran-Viet Date: Tue, 11 Nov 2025 22:00:08 -0500 Subject: [PATCH 22/32] [libc++] Implement P2988R12: `std::optional` (#155202) Resolves #148131 - Unlock `std::optional` implementation - Allow instantiations of `optional` and `optional` but disables `value_or()` and `optional::iterator` + all `iterator` related functions - Update documentation - Update tests --- libcxx/docs/FeatureTestMacroTable.rst | 2 + libcxx/docs/ReleaseNotes/22.rst | 1 + libcxx/docs/Status/Cxx2cPapers.csv | 2 +- libcxx/include/__iterator/wrap_iter.h | 4 +- libcxx/include/optional | 326 ++++++++++++------ libcxx/include/version | 5 +- libcxx/modules/std/optional.inc | 3 +- .../iterator.compile.pass.cpp | 9 +- .../value_or.compile.pass.cpp | 28 ++ .../optional.version.compile.pass.cpp | 4 +- .../version.version.compile.pass.cpp | 4 +- .../optional/optional.iterator/begin.pass.cpp | 7 +- .../borrowed_range.compile.pass.cpp | 34 ++ .../optional/optional.iterator/end.pass.cpp | 8 +- .../optional.iterator/iterator.pass.cpp | 40 ++- .../optional.monadic/and_then.pass.cpp | 87 +++++ .../optional.monadic/or_else.pass.cpp | 26 ++ .../optional.monadic/transform.pass.cpp | 131 ++++++- .../assign_value.pass.cpp | 56 ++- .../optional.object.assign/emplace.pass.cpp | 27 +- .../optional.object.ctor/ctor.verify.cpp | 24 +- .../optional.object.ctor/move.pass.cpp | 124 +++---- .../ref_constructs_from_temporary.verify.cpp | 35 ++ .../optional.object.ctor/ref_t.pass.cpp | 75 ++++ .../optional.object.dtor/dtor.pass.cpp | 22 +- .../optional.object.mod/reset.pass.cpp | 13 +- .../dereference.pass.cpp | 14 +- .../dereference_const.pass.cpp | 19 + .../has_value.pass.cpp | 9 +- .../optional.object.observe/op_arrow.pass.cpp | 30 +- .../op_arrow_const.pass.cpp | 19 + .../optional.object.observe/value.pass.cpp | 8 + .../optional.object.observe/value_or.pass.cpp | 8 + .../value_or_const.pass.cpp | 10 +- .../optional.object.swap/swap.pass.cpp | 77 ++++- ...al_requires_destructible_object.verify.cpp | 12 +- .../optional/optional.object/types.pass.cpp | 9 +- .../optional.specalg/make_optional.pass.cpp | 4 +- .../make_optional_explicit.pass.cpp | 29 +- .../optional/optional.specalg/swap.pass.cpp | 76 +++- .../generate_feature_test_macro_components.py | 1 + 41 files changed, 1176 insertions(+), 246 deletions(-) create mode 100644 libcxx/test/libcxx/utilities/optional/optional.object/optional.object.observe/value_or.compile.pass.cpp create mode 100644 libcxx/test/std/utilities/optional/optional.iterator/borrowed_range.compile.pass.cpp create mode 100644 libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/ref_constructs_from_temporary.verify.cpp create mode 100644 libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/ref_t.pass.cpp diff --git a/libcxx/docs/FeatureTestMacroTable.rst b/libcxx/docs/FeatureTestMacroTable.rst index d5ed9188b1b23..756bdf71f8b22 100644 --- a/libcxx/docs/FeatureTestMacroTable.rst +++ b/libcxx/docs/FeatureTestMacroTable.rst @@ -486,6 +486,8 @@ Status ---------------------------------------------------------- ----------------- ``__cpp_lib_not_fn`` ``202306L`` ---------------------------------------------------------- ----------------- + ``__cpp_lib_optional`` ``202506L`` + ---------------------------------------------------------- ----------------- ``__cpp_lib_optional_range_support`` ``202406L`` ---------------------------------------------------------- ----------------- ``__cpp_lib_out_ptr`` ``202311L`` diff --git a/libcxx/docs/ReleaseNotes/22.rst b/libcxx/docs/ReleaseNotes/22.rst index a6a0ac8670fb5..2c19dfc57a3f8 100644 --- a/libcxx/docs/ReleaseNotes/22.rst +++ b/libcxx/docs/ReleaseNotes/22.rst @@ -40,6 +40,7 @@ Implemented Papers - P2321R2: ``zip`` (`Github `__) (The paper is partially implemented. ``zip_transform_view`` is implemented in this release) +- P2988R12: ``std::optional`` (`Github `__) - P3044R2: sub-``string_view`` from ``string`` (`Github `__) - P3223R2: Making ``std::istream::ignore`` less surprising (`Github `__) - P3060R3: Add ``std::views::indices(n)`` (`Github `__) diff --git a/libcxx/docs/Status/Cxx2cPapers.csv b/libcxx/docs/Status/Cxx2cPapers.csv index 0f4dbb882088a..0455643446f8e 100644 --- a/libcxx/docs/Status/Cxx2cPapers.csv +++ b/libcxx/docs/Status/Cxx2cPapers.csv @@ -122,7 +122,7 @@ "`P3293R3 `__","Splicing a base class subobject","2025-06 (Sofia)","","","`#148125 `__","" "`P3491R3 `__","``define_static_{string,object,array}``","2025-06 (Sofia)","","","`#148126 `__","" "`P3096R12 `__","Function Parameter Reflection in Reflection for C++26","2025-06 (Sofia)","","","`#148127 `__","" -"`P2988R12 `__","``std::optional``","2025-06 (Sofia)","","","`#148131 `__","" +"`P2988R12 `__","``std::optional``","2025-06 (Sofia)","|Complete|","22","`#148131 `__","" "`P3348R4 `__","C++26 should refer to C23 not C17","2025-06 (Sofia)","","","`#148133 `__","" "`P3037R6 `__","``constexpr`` ``std::shared_ptr`` and friends","2025-06 (Sofia)","","","`#148135 `__","" "`P3284R4 `__","``write_env`` and ``unstoppable`` Sender Adaptors","2025-06 (Sofia)","","","`#148136 `__","" diff --git a/libcxx/include/__iterator/wrap_iter.h b/libcxx/include/__iterator/wrap_iter.h index d18d9682da449..98745f600a6ec 100644 --- a/libcxx/include/__iterator/wrap_iter.h +++ b/libcxx/include/__iterator/wrap_iter.h @@ -117,8 +117,8 @@ class __wrap_iter { friend class span; template friend struct array; - template - friend class optional; + template + friend struct __optional_iterator; }; template diff --git a/libcxx/include/optional b/libcxx/include/optional index a3023622e2067..ad672f6a9914f 100644 --- a/libcxx/include/optional +++ b/libcxx/include/optional @@ -210,6 +210,7 @@ namespace std { # include <__iterator/wrap_iter.h> # include <__memory/addressof.h> # include <__memory/construct_at.h> +# include <__ranges/enable_borrowed_range.h> # include <__ranges/enable_view.h> # include <__tuple/sfinae_helpers.h> # include <__type_traits/add_pointer.h> @@ -239,6 +240,7 @@ namespace std { # include <__type_traits/is_trivially_relocatable.h> # include <__type_traits/is_unbounded_array.h> # include <__type_traits/negation.h> +# include <__type_traits/reference_constructs_from_temporary.h> # include <__type_traits/remove_const.h> # include <__type_traits/remove_cv.h> # include <__type_traits/remove_cvref.h> @@ -409,39 +411,30 @@ struct __optional_storage_base : __optional_destruct_base<_Tp> { __construct(std::forward<_That>(__opt).__get()); } } + + template + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __assign_from_val(_Up&& __val) { + this->__get() = std::forward<_Up>(__val); + } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __swap(__optional_storage_base& __rhs) { + using std::swap; + swap(this->__get(), __rhs.__get()); + } }; -// optional is currently required to be ill-formed. However, it may -// be allowed in the future. For this reason, it has already been implemented -// to ensure we can make the change in an ABI-compatible manner. template struct __optional_storage_base<_Tp, true> { using value_type = _Tp; using __raw_type _LIBCPP_NODEBUG = remove_reference_t<_Tp>; __raw_type* __value_; - template - static _LIBCPP_HIDE_FROM_ABI constexpr bool __can_bind_reference() { - using _RawUp = __libcpp_remove_reference_t<_Up>; - using _UpPtr = _RawUp*; - using _RawTp = __libcpp_remove_reference_t<_Tp>; - using _TpPtr = _RawTp*; - using _CheckLValueArg = - integral_constant::value && is_convertible<_UpPtr, _TpPtr>::value) || - is_same<_RawUp, reference_wrapper<_RawTp>>::value || - is_same<_RawUp, reference_wrapper<__remove_const_t<_RawTp>>>::value >; - return (is_lvalue_reference<_Tp>::value && _CheckLValueArg::value) || - (is_rvalue_reference<_Tp>::value && !is_lvalue_reference<_Up>::value && - is_convertible<_UpPtr, _TpPtr>::value); - } - _LIBCPP_HIDE_FROM_ABI constexpr __optional_storage_base() noexcept : __value_(nullptr) {} template _LIBCPP_HIDE_FROM_ABI constexpr explicit __optional_storage_base(in_place_t, _UArg&& __uarg) : __value_(std::addressof(__uarg)) { - static_assert(__can_bind_reference<_UArg>(), + static_assert(!__reference_constructs_from_temporary_v<_Tp, _UArg>, "Attempted to construct a reference element in tuple from a " "possible temporary"); } @@ -457,7 +450,7 @@ struct __optional_storage_base<_Tp, true> { template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __construct(_UArg&& __val) { _LIBCPP_ASSERT_INTERNAL(!has_value(), "__construct called for engaged __optional_storage"); - static_assert(__can_bind_reference<_UArg>(), + static_assert(!__reference_constructs_from_temporary_v<_Tp, _UArg>, "Attempted to construct a reference element in tuple from a " "possible temporary"); __value_ = std::addressof(__val); @@ -481,6 +474,15 @@ struct __optional_storage_base<_Tp, true> { __construct(std::forward<_That>(__opt).__get()); } } + + template + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __assign_from_val(_Up&& __val) noexcept { + __value_ = std::addressof(__val); + } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __swap(__optional_storage_base& __rhs) noexcept { + std::swap(__value_, __rhs.__value_); + } }; template ::value> @@ -592,6 +594,10 @@ constexpr bool ranges::enable_view> = true; template constexpr range_format format_kind> = range_format::disabled; + +template +constexpr bool ranges::enable_borrowed_range> = true; + # endif # if _LIBCPP_STD_VER >= 20 @@ -606,19 +612,19 @@ struct __is_std_optional : false_type {}; template struct __is_std_optional> : true_type {}; -template -class _LIBCPP_DECLSPEC_EMPTY_BASES optional - : private __optional_move_assign_base<_Tp>, - private __optional_sfinae_ctor_base_t<_Tp>, - private __optional_sfinae_assign_base_t<_Tp> { - using __base _LIBCPP_NODEBUG = __optional_move_assign_base<_Tp>; +template +struct __optional_iterator {}; - using __pointer _LIBCPP_NODEBUG = std::add_pointer_t<_Tp>; - using __const_pointer _LIBCPP_NODEBUG = std::add_pointer_t; +template +struct __optional_iterator< + _Tp, + enable_if_t && is_function_v<__libcpp_remove_reference_t<_Tp>>) && + !(is_lvalue_reference_v<_Tp> && is_array_v<__libcpp_remove_reference_t<_Tp>>)> > { +private: + using __pointer _LIBCPP_NODEBUG = add_pointer_t>; + using __const_pointer _LIBCPP_NODEBUG = add_pointer_t>; public: - using value_type = _Tp; - # if _LIBCPP_STD_VER >= 26 # ifdef _LIBCPP_ABI_BOUNDED_ITERATORS_IN_OPTIONAL using iterator = __bounded_iter<__wrap_iter<__pointer>>; @@ -627,19 +633,86 @@ public: using iterator = __wrap_iter<__pointer>; using const_iterator = __wrap_iter<__const_pointer>; # endif + + // [optional.iterators], iterator support + _LIBCPP_HIDE_FROM_ABI constexpr iterator begin() noexcept { + auto& __derived_self = static_cast&>(*this); + auto __ptr = [&__derived_self]() { + if constexpr (is_lvalue_reference_v<_Tp>) { + return __derived_self.has_value() ? std::addressof(__derived_self.__get()) : nullptr; + } + return std::addressof(__derived_self.__get()); + }(); + +# ifdef _LIBCPP_ABI_BOUNDED_ITERATORS_IN_OPTIONAL + return std::__make_bounded_iter( + __wrap_iter<__pointer>(__ptr), + __wrap_iter<__pointer>(__ptr), + __wrap_iter<__pointer>(__ptr) + (__derived_self.has_value() ? 1 : 0)); +# else + return iterator(__ptr); +# endif + } + + _LIBCPP_HIDE_FROM_ABI constexpr const_iterator begin() const noexcept { + auto& __derived_self = static_cast&>(*this); + auto* __ptr = [&__derived_self]() { + if constexpr (is_lvalue_reference_v<_Tp>) { + return __derived_self.has_value() ? std::addressof(__derived_self.__get()) : nullptr; + } + return std::addressof(__derived_self.__get()); + }(); + +# ifdef _LIBCPP_ABI_BOUNDED_ITERATORS_IN_OPTIONAL + return std::__make_bounded_iter( + __wrap_iter<__const_pointer>(__ptr), + __wrap_iter<__const_pointer>(__ptr), + __wrap_iter<__const_pointer>(__ptr) + (__derived_self.has_value() ? 1 : 0)); +# else + return const_iterator(__ptr); +# endif + } + + _LIBCPP_HIDE_FROM_ABI constexpr iterator end() noexcept { + return begin() + (static_cast&>(*this).has_value() ? 1 : 0); + } + _LIBCPP_HIDE_FROM_ABI constexpr const_iterator end() const noexcept { + return begin() + (static_cast&>(*this).has_value() ? 1 : 0); + } # endif +}; + +template +class _LIBCPP_DECLSPEC_EMPTY_BASES optional + : private __optional_move_assign_base<_Tp>, + private __optional_sfinae_ctor_base_t<_Tp>, + private __optional_sfinae_assign_base_t<_Tp>, + public __optional_iterator<_Tp> { + using __base _LIBCPP_NODEBUG = __optional_move_assign_base<_Tp>; + +public: + using value_type = __libcpp_remove_reference_t<_Tp>; + using __trivially_relocatable _LIBCPP_NODEBUG = conditional_t<__libcpp_is_trivially_relocatable<_Tp>::value, optional, void>; private: - // Disable the reference extension using this static assert. - static_assert(!is_same_v<__remove_cvref_t, in_place_t>, + static_assert(!is_same_v<__remove_cvref_t<_Tp>, in_place_t>, "instantiation of optional with in_place_t is ill-formed"); - static_assert(!is_same_v<__remove_cvref_t, nullopt_t>, - "instantiation of optional with nullopt_t is ill-formed"); - static_assert(!is_reference_v, "instantiation of optional with a reference type is ill-formed"); - static_assert(is_destructible_v, "instantiation of optional with a non-destructible type is ill-formed"); - static_assert(!is_array_v, "instantiation of optional with an array type is ill-formed"); + static_assert(!is_same_v<__remove_cvref_t<_Tp>, nullopt_t>, "instantiation of optional with nullopt_t is ill-formed"); +# if _LIBCPP_STD_VER >= 26 + static_assert(!is_rvalue_reference_v<_Tp>, "instantiation of optional with an rvalue reference type is ill-formed"); +# else + static_assert(!is_reference_v<_Tp>, "instantiation of optional with a reference type is ill-formed"); +# endif + static_assert(is_destructible_v<_Tp>, "instantiation of optional with a non-destructible type is ill-formed"); + static_assert(!is_array_v<_Tp>, "instantiation of optional with an array type is ill-formed"); + +# if _LIBCPP_STD_VER >= 26 + template + constexpr static bool __libcpp_opt_ref_ctor_deleted = + is_lvalue_reference_v<_Tp> && reference_constructs_from_temporary_v<_Tp, _Up>; +# endif // LWG2756: conditionally explicit conversion from _Up struct _CheckOptionalArgsConstructor { @@ -714,18 +787,15 @@ public: template , is_constructible>::value, int> = 0> + enable_if_t<_And<_IsSame<_InPlaceT, in_place_t>, is_constructible<_Tp, _Args...>>::value, int> = 0> _LIBCPP_HIDE_FROM_ABI constexpr explicit optional(_InPlaceT, _Args&&... __args) : __base(in_place, std::forward<_Args>(__args)...) {} - template &, _Args...>, int> = 0> + template &, _Args...>, int> = 0> _LIBCPP_HIDE_FROM_ABI constexpr explicit optional(in_place_t, initializer_list<_Up> __il, _Args&&... __args) : __base(in_place, __il, std::forward<_Args>(__args)...) {} - template ::template __enable_implicit<_Up>(), int> = 0> + template ::template __enable_implicit<_Up>(), int> = 0> _LIBCPP_HIDE_FROM_ABI constexpr optional(_Up&& __v) : __base(in_place, std::forward<_Up>(__v)) {} template , @@ -752,6 +822,38 @@ public: this->__construct_from(std::move(__v)); } + // deleted optional constructors +# if _LIBCPP_STD_VER >= 26 + template &, _Args...>, int> = 0> + requires __libcpp_opt_ref_ctor_deleted<_Up> + explicit optional(in_place_t, initializer_list<_Up>, _Args&&...) = delete; + + template ::template __enable_implicit<_Up>(), int> = 0> + requires __libcpp_opt_ref_ctor_deleted<_Up> + optional(_Up&&) = delete; + + template , + enable_if_t<_CheckOptionalArgsCtor<_Up>::template __enable_explicit<_Up>(), int> = 0> + requires __libcpp_opt_ref_ctor_deleted<_Up> + explicit optional(_Up&&) = delete; + + template ::template __enable_implicit<_Up>(), int> = 0> + requires __libcpp_opt_ref_ctor_deleted<_Up> + optional(const optional<_Up>&) = delete; + + template ::template __enable_explicit<_Up>(), int> = 0> + requires __libcpp_opt_ref_ctor_deleted<_Up> + explicit optional(const optional<_Up>&) = delete; + + template ::template __enable_implicit<_Up>(), int> = 0> + requires __libcpp_opt_ref_ctor_deleted<_Up> + optional(optional<_Up>&&) = delete; + + template ::template __enable_explicit<_Up>(), int> = 0> + requires __libcpp_opt_ref_ctor_deleted<_Up> + explicit optional(optional<_Up>&&) = delete; +# endif + # if _LIBCPP_STD_VER >= 23 template , + template , enable_if_t<_And<_IsNotSame<__remove_cvref_t<_Up>, optional>, - _Or<_IsNotSame<__remove_cvref_t<_Up>, value_type>, _Not>>, - is_constructible, - is_assignable>::value, + _Or<_IsNotSame<__remove_cvref_t<_Up>, _Tp>, _Not>>, + is_constructible<_Tp, _Up>, + is_assignable<_Tp&, _Up>>::value, int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 optional& operator=(_Up&& __v) { if (this->has_value()) - this->__get() = std::forward<_Up>(__v); + this->__assign_from_val(std::forward<_Up>(__v)); else this->__construct(std::forward<_Up>(__v)); return *this; @@ -798,7 +900,7 @@ public: return *this; } - template , int> = 0> + template , int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Tp& emplace(_Args&&... __args) { reset(); this->__construct(std::forward<_Args>(__args)...); @@ -807,7 +909,12 @@ public: template &, _Args...>, int> = 0> + enable_if_t&, _Args...> +# if _LIBCPP_STD_VER >= 26 + && !reference_constructs_from_temporary_v<_Tp&, _Up> +# endif + , + int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Tp& emplace(initializer_list<_Up> __il, _Args&&... __args) { reset(); this->__construct(__il, std::forward<_Args>(__args)...); @@ -815,11 +922,10 @@ public: } _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void - swap(optional& __opt) noexcept(is_nothrow_move_constructible_v && is_nothrow_swappable_v) { + swap(optional& __opt) noexcept(is_nothrow_move_constructible_v<_Tp> && is_nothrow_swappable_v<_Tp>) { if (this->has_value() == __opt.has_value()) { - using std::swap; if (this->has_value()) - swap(this->__get(), __opt.__get()); + this->__swap(__opt); } else { if (this->has_value()) { __opt.__construct(std::move(this->__get())); @@ -831,60 +937,32 @@ public: } } -# if _LIBCPP_STD_VER >= 26 - // [optional.iterators], iterator support - _LIBCPP_HIDE_FROM_ABI constexpr iterator begin() noexcept { -# ifdef _LIBCPP_ABI_BOUNDED_ITERATORS_IN_OPTIONAL - return std::__make_bounded_iter( - std::__wrap_iter<__pointer>(std::addressof(this->__get())), - std::__wrap_iter<__pointer>(std::addressof(this->__get())), - std::__wrap_iter<__pointer>(std::addressof(this->__get()) + (this->has_value() ? 1 : 0))); -# else - return iterator(std::addressof(this->__get())); -# endif - } - - _LIBCPP_HIDE_FROM_ABI constexpr const_iterator begin() const noexcept { -# ifdef _LIBCPP_ABI_BOUNDED_ITERATORS_IN_OPTIONAL - return std::__make_bounded_iter( - std::__wrap_iter<__const_pointer>(std::addressof(this->__get())), - std::__wrap_iter<__const_pointer>(std::addressof(this->__get())), - std::__wrap_iter<__const_pointer>(std::addressof(this->__get()) + (this->has_value() ? 1 : 0))); -# else - return const_iterator(std::addressof(this->__get())); -# endif - } - - _LIBCPP_HIDE_FROM_ABI constexpr iterator end() noexcept { return begin() + (this->has_value() ? 1 : 0); } - _LIBCPP_HIDE_FROM_ABI constexpr const_iterator end() const noexcept { return begin() + (this->has_value() ? 1 : 0); } -# endif - - _LIBCPP_HIDE_FROM_ABI constexpr add_pointer_t operator->() const noexcept { + _LIBCPP_HIDE_FROM_ABI constexpr add_pointer_t<_Tp const> operator->() const noexcept { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(this->has_value(), "optional operator-> called on a disengaged value"); return std::addressof(this->__get()); } - _LIBCPP_HIDE_FROM_ABI constexpr add_pointer_t operator->() noexcept { + _LIBCPP_HIDE_FROM_ABI constexpr add_pointer_t<_Tp> operator->() noexcept { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(this->has_value(), "optional operator-> called on a disengaged value"); return std::addressof(this->__get()); } - _LIBCPP_HIDE_FROM_ABI constexpr const value_type& operator*() const& noexcept { + _LIBCPP_HIDE_FROM_ABI constexpr const _Tp& operator*() const& noexcept { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(this->has_value(), "optional operator* called on a disengaged value"); return this->__get(); } - _LIBCPP_HIDE_FROM_ABI constexpr value_type& operator*() & noexcept { + _LIBCPP_HIDE_FROM_ABI constexpr _Tp& operator*() & noexcept { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(this->has_value(), "optional operator* called on a disengaged value"); return this->__get(); } - _LIBCPP_HIDE_FROM_ABI constexpr value_type&& operator*() && noexcept { + _LIBCPP_HIDE_FROM_ABI constexpr _Tp&& operator*() && noexcept { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(this->has_value(), "optional operator* called on a disengaged value"); return std::move(this->__get()); } - _LIBCPP_HIDE_FROM_ABI constexpr const value_type&& operator*() const&& noexcept { + _LIBCPP_HIDE_FROM_ABI constexpr const _Tp&& operator*() const&& noexcept { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(this->has_value(), "optional operator* called on a disengaged value"); return std::move(this->__get()); } @@ -894,48 +972,66 @@ public: using __base::__get; using __base::has_value; - _LIBCPP_HIDE_FROM_ABI constexpr value_type const& value() const& { + _LIBCPP_HIDE_FROM_ABI constexpr _Tp const& value() const& { if (!this->has_value()) std::__throw_bad_optional_access(); return this->__get(); } - _LIBCPP_HIDE_FROM_ABI constexpr value_type& value() & { + _LIBCPP_HIDE_FROM_ABI constexpr _Tp& value() & { if (!this->has_value()) std::__throw_bad_optional_access(); return this->__get(); } - _LIBCPP_HIDE_FROM_ABI constexpr value_type&& value() && { + _LIBCPP_HIDE_FROM_ABI constexpr _Tp&& value() && { if (!this->has_value()) std::__throw_bad_optional_access(); return std::move(this->__get()); } - _LIBCPP_HIDE_FROM_ABI constexpr value_type const&& value() const&& { + _LIBCPP_HIDE_FROM_ABI constexpr _Tp const&& value() const&& { if (!this->has_value()) std::__throw_bad_optional_access(); return std::move(this->__get()); } template > - _LIBCPP_HIDE_FROM_ABI constexpr value_type value_or(_Up&& __v) const& { - static_assert(is_copy_constructible_v, "optional::value_or: T must be copy constructible"); - static_assert(is_convertible_v<_Up, value_type>, "optional::value_or: U must be convertible to T"); - return this->has_value() ? this->__get() : static_cast(std::forward<_Up>(__v)); +# if _LIBCPP_STD_VER >= 26 + requires(!(is_lvalue_reference_v<_Tp> && is_function_v<__libcpp_remove_reference_t<_Tp>>) && + !(is_lvalue_reference_v<_Tp> && is_array_v<__libcpp_remove_reference_t<_Tp>>)) +# endif + _LIBCPP_HIDE_FROM_ABI constexpr _Tp value_or(_Up&& __v) const& { + static_assert(is_copy_constructible_v<_Tp>, "optional::value_or: T must be copy constructible"); + static_assert(is_convertible_v<_Up, _Tp>, "optional::value_or: U must be convertible to T"); + return this->has_value() ? this->__get() : static_cast<_Tp>(std::forward<_Up>(__v)); } template > - _LIBCPP_HIDE_FROM_ABI constexpr value_type value_or(_Up&& __v) && { - static_assert(is_move_constructible_v, "optional::value_or: T must be move constructible"); - static_assert(is_convertible_v<_Up, value_type>, "optional::value_or: U must be convertible to T"); - return this->has_value() ? std::move(this->__get()) : static_cast(std::forward<_Up>(__v)); +# if _LIBCPP_STD_VER >= 26 + requires(!is_lvalue_reference_v<_Tp>) +# endif + _LIBCPP_HIDE_FROM_ABI constexpr _Tp value_or(_Up&& __v) && { + static_assert(is_move_constructible_v<_Tp>, "optional::value_or: T must be move constructible"); + static_assert(is_convertible_v<_Up, _Tp>, "optional::value_or: U must be convertible to T"); + return this->has_value() ? std::move(this->__get()) : static_cast<_Tp>(std::forward<_Up>(__v)); + } + +# if _LIBCPP_STD_VER >= 26 + template > + requires(is_lvalue_reference_v<_Tp> && + !(is_function_v<__libcpp_remove_reference_t<_Tp>> || is_array_v<__libcpp_remove_reference_t<_Tp>>)) + _LIBCPP_HIDE_FROM_ABI constexpr _Tp value_or(_Up&& __v) && { + static_assert(is_move_constructible_v<_Tp>, "optional::value_or: T must be move constructible"); + static_assert(is_convertible_v<_Up, _Tp>, "optional::value_or: U must be convertible to T"); + return this->has_value() ? this->__get() : static_cast<_Tp>(std::forward<_Up>(__v)); } +# endif # if _LIBCPP_STD_VER >= 23 template _LIBCPP_HIDE_FROM_ABI constexpr auto and_then(_Func&& __f) & { - using _Up = invoke_result_t<_Func, value_type&>; + using _Up = invoke_result_t<_Func, _Tp&>; static_assert(__is_std_optional>::value, "Result of f(value()) must be a specialization of std::optional"); if (*this) @@ -945,7 +1041,7 @@ public: template _LIBCPP_HIDE_FROM_ABI constexpr auto and_then(_Func&& __f) const& { - using _Up = invoke_result_t<_Func, const value_type&>; + using _Up = invoke_result_t<_Func, const _Tp&>; static_assert(__is_std_optional>::value, "Result of f(value()) must be a specialization of std::optional"); if (*this) @@ -955,7 +1051,7 @@ public: template _LIBCPP_HIDE_FROM_ABI constexpr auto and_then(_Func&& __f) && { - using _Up = invoke_result_t<_Func, value_type&&>; + using _Up = invoke_result_t<_Func, _Tp&&>; static_assert(__is_std_optional>::value, "Result of f(std::move(value())) must be a specialization of std::optional"); if (*this) @@ -965,7 +1061,7 @@ public: template _LIBCPP_HIDE_FROM_ABI constexpr auto and_then(_Func&& __f) const&& { - using _Up = invoke_result_t<_Func, const value_type&&>; + using _Up = invoke_result_t<_Func, const _Tp&&>; static_assert(__is_std_optional>::value, "Result of f(std::move(value())) must be a specialization of std::optional"); if (*this) @@ -975,7 +1071,7 @@ public: template _LIBCPP_HIDE_FROM_ABI constexpr auto transform(_Func&& __f) & { - using _Up = remove_cv_t>; + using _Up = remove_cv_t>; static_assert(!is_array_v<_Up>, "Result of f(value()) should not be an Array"); static_assert(!is_same_v<_Up, in_place_t>, "Result of f(value()) should not be std::in_place_t"); static_assert(!is_same_v<_Up, nullopt_t>, "Result of f(value()) should not be std::nullopt_t"); @@ -987,7 +1083,7 @@ public: template _LIBCPP_HIDE_FROM_ABI constexpr auto transform(_Func&& __f) const& { - using _Up = remove_cv_t>; + using _Up = remove_cv_t>; static_assert(!is_array_v<_Up>, "Result of f(value()) should not be an Array"); static_assert(!is_same_v<_Up, in_place_t>, "Result of f(value()) should not be std::in_place_t"); static_assert(!is_same_v<_Up, nullopt_t>, "Result of f(value()) should not be std::nullopt_t"); @@ -999,7 +1095,7 @@ public: template _LIBCPP_HIDE_FROM_ABI constexpr auto transform(_Func&& __f) && { - using _Up = remove_cv_t>; + using _Up = remove_cv_t>; static_assert(!is_array_v<_Up>, "Result of f(std::move(value())) should not be an Array"); static_assert(!is_same_v<_Up, in_place_t>, "Result of f(std::move(value())) should not be std::in_place_t"); static_assert(!is_same_v<_Up, nullopt_t>, "Result of f(std::move(value())) should not be std::nullopt_t"); @@ -1011,7 +1107,7 @@ public: template _LIBCPP_HIDE_FROM_ABI constexpr auto transform(_Func&& __f) const&& { - using _Up = remove_cvref_t>; + using _Up = remove_cvref_t>; static_assert(!is_array_v<_Up>, "Result of f(std::move(value())) should not be an Array"); static_assert(!is_same_v<_Up, in_place_t>, "Result of f(std::move(value())) should not be std::in_place_t"); static_assert(!is_same_v<_Up, nullopt_t>, "Result of f(std::move(value())) should not be std::nullopt_t"); @@ -1023,7 +1119,7 @@ public: template _LIBCPP_HIDE_FROM_ABI constexpr optional or_else(_Func&& __f) const& - requires is_copy_constructible_v + requires is_copy_constructible_v<_Tp> { static_assert(is_same_v>, optional>, "Result of f() should be the same type as this optional"); @@ -1034,7 +1130,7 @@ public: template _LIBCPP_HIDE_FROM_ABI constexpr optional or_else(_Func&& __f) && - requires is_move_constructible_v + requires is_move_constructible_v<_Tp> { static_assert(is_same_v>, optional>, "Result of f() should be the same type as this optional"); @@ -1336,7 +1432,15 @@ swap(optional<_Tp>& __x, optional<_Tp>& __y) noexcept(noexcept(__x.swap(__y))) { __x.swap(__y); } -template +struct __make_optional_barrier_tag { + explicit __make_optional_barrier_tag() = default; +}; + +template < +# if _LIBCPP_STD_VER >= 26 + __make_optional_barrier_tag = __make_optional_barrier_tag{}, +# endif + class _Tp> _LIBCPP_HIDE_FROM_ABI constexpr optional> make_optional(_Tp&& __v) { return optional>(std::forward<_Tp>(__v)); } diff --git a/libcxx/include/version b/libcxx/include/version index b0030602f854a..05532ea731ff3 100644 --- a/libcxx/include/version +++ b/libcxx/include/version @@ -187,7 +187,8 @@ __cpp_lib_nonmember_container_access 201411L __cpp_lib_not_fn 202306L 201603L // C++17 __cpp_lib_null_iterators 201304L -__cpp_lib_optional 202110L +__cpp_lib_optional 202506L + 202110L // C++23 202106L // C++20 201606L // C++17 __cpp_lib_optional_range_support 202406L @@ -594,6 +595,8 @@ __cpp_lib_void_t 201411L # define __cpp_lib_mdspan 202406L # undef __cpp_lib_not_fn # define __cpp_lib_not_fn 202306L +# undef __cpp_lib_optional +# define __cpp_lib_optional 202506L # define __cpp_lib_optional_range_support 202406L # undef __cpp_lib_out_ptr # define __cpp_lib_out_ptr 202311L diff --git a/libcxx/modules/std/optional.inc b/libcxx/modules/std/optional.inc index 9ee51117277ce..88de0bb4db12b 100644 --- a/libcxx/modules/std/optional.inc +++ b/libcxx/modules/std/optional.inc @@ -13,8 +13,9 @@ export namespace std { #if _LIBCPP_STD_VER >= 26 // [optional.iterators], iterator support namespace ranges { + using std::ranges::enable_borrowed_range; using std::ranges::enable_view; - } + } // namespace ranges #endif // [optional.nullopt], no-value state indicator using std::nullopt; diff --git a/libcxx/test/libcxx/utilities/optional/optional.iterator/iterator.compile.pass.cpp b/libcxx/test/libcxx/utilities/optional/optional.iterator/iterator.compile.pass.cpp index 3cdd7553e2e5d..b604579e43557 100644 --- a/libcxx/test/libcxx/utilities/optional/optional.iterator/iterator.compile.pass.cpp +++ b/libcxx/test/libcxx/utilities/optional/optional.iterator/iterator.compile.pass.cpp @@ -23,8 +23,7 @@ concept has_iterator_aliases = requires { static_assert(has_iterator_aliases>); static_assert(has_iterator_aliases>); - -// TODO: Uncomment these once P2988R12 is implemented, as they would be testing optional - -// static_assert(!has_iterator_aliases>); -// static_assert(!has_iterator_aliases>); +static_assert(has_iterator_aliases>); +static_assert(has_iterator_aliases>); +static_assert(!has_iterator_aliases>); +static_assert(!has_iterator_aliases>); diff --git a/libcxx/test/libcxx/utilities/optional/optional.object/optional.object.observe/value_or.compile.pass.cpp b/libcxx/test/libcxx/utilities/optional/optional.object/optional.object.observe/value_or.compile.pass.cpp new file mode 100644 index 0000000000000..25df0dd6c1936 --- /dev/null +++ b/libcxx/test/libcxx/utilities/optional/optional.object/optional.object.observe/value_or.compile.pass.cpp @@ -0,0 +1,28 @@ + +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: std-at-least-c++26 + +// + +// template T optional::value_or(U&&); + +#include +#include + +template +concept has_value_or = requires(Opt opt, T&& t) { + { opt.value_or(t) } -> std::same_as; +}; + +static_assert(has_value_or, int>); +static_assert(has_value_or, int&>); +static_assert(has_value_or, const int&>); +static_assert(!has_value_or&&, int (&)[1]>); +static_assert(!has_value_or&&, int (&)()>); diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/optional.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/optional.version.compile.pass.cpp index aca6290f5a4bf..c4e652979a4e6 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/optional.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/optional.version.compile.pass.cpp @@ -142,8 +142,8 @@ # ifndef __cpp_lib_optional # error "__cpp_lib_optional should be defined in c++26" # endif -# if __cpp_lib_optional != 202110L -# error "__cpp_lib_optional should have the value 202110L in c++26" +# if __cpp_lib_optional != 202506L +# error "__cpp_lib_optional should have the value 202506L in c++26" # endif # ifndef __cpp_lib_optional_range_support diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp index 8189c5c4e5985..996ec29dce697 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp @@ -7509,8 +7509,8 @@ # ifndef __cpp_lib_optional # error "__cpp_lib_optional should be defined in c++26" # endif -# if __cpp_lib_optional != 202110L -# error "__cpp_lib_optional should have the value 202110L in c++26" +# if __cpp_lib_optional != 202506L +# error "__cpp_lib_optional should have the value 202506L in c++26" # endif # ifndef __cpp_lib_optional_range_support diff --git a/libcxx/test/std/utilities/optional/optional.iterator/begin.pass.cpp b/libcxx/test/std/utilities/optional/optional.iterator/begin.pass.cpp index df95a8df3793f..81234525923a1 100644 --- a/libcxx/test/std/utilities/optional/optional.iterator/begin.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.iterator/begin.pass.cpp @@ -21,7 +21,8 @@ template constexpr bool test() { - std::optional opt{T{}}; + std::remove_reference_t t = std::remove_reference_t{}; + std::optional opt{t}; { // begin() is marked noexcept static_assert(noexcept(opt.begin())); @@ -53,6 +54,10 @@ constexpr bool tests() { assert(test()); assert(test()); assert(test()); + assert(test()); + assert(test()); + assert(test()); + assert(test()); return true; } diff --git a/libcxx/test/std/utilities/optional/optional.iterator/borrowed_range.compile.pass.cpp b/libcxx/test/std/utilities/optional/optional.iterator/borrowed_range.compile.pass.cpp new file mode 100644 index 0000000000000..a79d1d51a5b11 --- /dev/null +++ b/libcxx/test/std/utilities/optional/optional.iterator/borrowed_range.compile.pass.cpp @@ -0,0 +1,34 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: std-at-least-c++26 + +// + +// template class optional::iterator; +// template class optional::const_iterator; +// template +// constexpr bool ranges::enable_borrowed_range> = true; + +#include +#include +#include + +template +void borrowed_range() { + static_assert(std::ranges::enable_borrowed_range>); + static_assert(std::ranges::range> == std::ranges::borrowed_range>); +} + +void test_borrowed_range() { + borrowed_range(); + borrowed_range(); + borrowed_range(); + borrowed_range(); + borrowed_range(); +} diff --git a/libcxx/test/std/utilities/optional/optional.iterator/end.pass.cpp b/libcxx/test/std/utilities/optional/optional.iterator/end.pass.cpp index 966c3e7441880..c62c9fc7746d6 100644 --- a/libcxx/test/std/utilities/optional/optional.iterator/end.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.iterator/end.pass.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include template @@ -41,7 +42,8 @@ constexpr bool test() { assert(it2 == std::as_const(disengaged).end()); } - std::optional engaged{T{}}; + std::remove_reference_t t = std::remove_reference_t{}; + std::optional engaged{t}; { // end() != begin() if the optional is engaged auto it = engaged.end(); @@ -62,6 +64,10 @@ constexpr bool tests() { assert(test()); assert(test()); assert(test()); + assert(test()); + assert(test()); + assert(test()); + assert(test()); return true; } diff --git a/libcxx/test/std/utilities/optional/optional.iterator/iterator.pass.cpp b/libcxx/test/std/utilities/optional/optional.iterator/iterator.pass.cpp index 1203290a0290a..671fac35e732a 100644 --- a/libcxx/test/std/utilities/optional/optional.iterator/iterator.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.iterator/iterator.pass.cpp @@ -14,15 +14,23 @@ // template class optional::const_iterator; #include -#include #include #include #include #include -template +template +constexpr bool test_range_concept() { + return std::ranges::range>; +} + +template __val> constexpr bool test() { - std::optional opt{__val}; + std::remove_reference_t v{__val}; + std::optional opt{v}; + { + assert(test_range_concept()); + } { // Dereferencing an iterator of an engaged optional will return the same value that the optional holds. auto it = opt.begin(); @@ -41,13 +49,14 @@ constexpr bool test() { assert(std::random_access_iterator); } - { // const_iterator::value_type == std::remove_cv_t, const_iterator::reference == const T&, iterator::value_type = std::remove_cv_t, iterator::reference == T& + { // const_iterator::value_type == std::remove_cvref_t, const_iterator::reference == const T&, iterator::value_type = std::remove_cvref_t, iterator::reference == T& + // std::remove_cv_t is impossible for optional auto it = opt.begin(); auto it2 = std::as_const(opt).begin(); - assert((std::is_same_v>)); - assert((std::is_same_v)); - assert((std::is_same_v>)); - assert((std::is_same_v)); + assert((std::is_same_v>)); + assert((std::is_same_v&>)); + assert((std::is_same_v>)); + assert((std::is_same_v&>)); } { // std::ranges::size for an engaged optional == 1, disengaged optional == 0 @@ -68,13 +77,13 @@ constexpr bool test() { // An optional with value that is reset will have a begin() == end(), then when it is reassigned a value, // begin() != end(), and *begin() will contain the new value. { - std::optional val{__val}; + std::optional val{v}; assert(val.begin() != val.end()); val.reset(); assert(val.begin() == val.end()); - val.emplace(__val); + val.emplace(v); assert(val.begin() != val.end()); - assert(*(val.begin()) == __val); + assert(*(val.begin()) == v); } return true; @@ -86,6 +95,15 @@ constexpr bool tests() { assert((test())); assert((test())); assert((test())); + assert((test())); + assert((test())); + assert((test())); + assert((test())); + assert((test())); + + assert(!test_range_concept()); + assert(!test_range_concept()); + assert(!test_range_concept()); return true; } diff --git a/libcxx/test/std/utilities/optional/optional.monadic/and_then.pass.cpp b/libcxx/test/std/utilities/optional/optional.monadic/and_then.pass.cpp index 97305d976e066..133eed4a606bb 100644 --- a/libcxx/test/std/utilities/optional/optional.monadic/and_then.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.monadic/and_then.pass.cpp @@ -16,6 +16,7 @@ // template constexpr auto and_then(F&&) const&&; #include +#include #include #include "test_macros.h" @@ -257,8 +258,94 @@ constexpr bool test() { return true; } +#if TEST_STD_VER >= 26 +constexpr bool test_ref() { + // Test & overload + { + // Without & qualifier on F's operator() + { + int j = 42; + std::optional i{j}; + std::same_as> decltype(auto) r = i.and_then(LVal{}); + + assert(r == 1); + assert(i.and_then(NOLVal{}) == std::nullopt); + } + + //With & qualifier on F's operator() + { + int j = 42; + std::optional i{j}; + RefQual l{}; + NORefQual nl{}; + std::same_as> decltype(auto) r = i.and_then(l); + + assert(r == 1); + assert(i.and_then(nl) == std::nullopt); + } + } + + // Test const& overload + { + // Without & qualifier on F's operator() + { + int j = 42; + std::optional i{j}; + std::same_as> decltype(auto) r = i.and_then(CLVal{}); + + assert(r == 1); + assert(i.and_then(NOCLVal{}) == std::nullopt); + } + + //With & qualifier on F's operator() + { + int j = 42; + const std::optional i{j}; + const CRefQual l{}; + const NOCRefQual nl{}; + std::same_as> decltype(auto) r = i.and_then(l); + + assert(r == 1); + assert(i.and_then(nl) == std::nullopt); + } + } + // Test && overload + { + //With & qualifier on F's operator() + { + int j = 42; + std::optional i{j}; + std::same_as> decltype(auto) r = i.and_then(RVRefQual{}); + + assert(r == 1); + assert(i.and_then(NORVRefQual{}) == std::nullopt); + } + } + + // Test const&& overload + { + //With & qualifier on F's operator() + { + int j = 42; + const std::optional i{j}; + const RVCRefQual l{}; + const NORVCRefQual nl{}; + std::same_as> decltype(auto) r = i.and_then(std::move(l)); + + assert(r == 1); + assert(i.and_then(std::move(nl)) == std::nullopt); + } + } + return true; +} +#endif + int main(int, char**) { test(); static_assert(test()); +#if TEST_STD_VER >= 26 + test_ref(); + static_assert(test_ref()); +#endif return 0; } diff --git a/libcxx/test/std/utilities/optional/optional.monadic/or_else.pass.cpp b/libcxx/test/std/utilities/optional/optional.monadic/or_else.pass.cpp index ccc94ab9be2cb..de0a67c1579ee 100644 --- a/libcxx/test/std/utilities/optional/optional.monadic/or_else.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.monadic/or_else.pass.cpp @@ -62,6 +62,32 @@ constexpr bool test() { return std::optional{}; }); } +#if TEST_STD_VER >= 26 + { + int i = 2; + std::optional opt; + assert(opt.or_else([&] { return std::optional{i}; }) == i); + int j = 3; + opt = j; + opt.or_else([] { + assert(false); + return std::optional{}; + }); + assert(opt == j); + } + { + int i = 2; + std::optional opt; + assert(std::move(opt).or_else([&] { return std::optional{i}; }) == i); + int j = 3; + opt = j; + std::move(opt).or_else([] { + assert(false); + return std::optional{}; + }); + assert(opt == j); + } +#endif return true; } diff --git a/libcxx/test/std/utilities/optional/optional.monadic/transform.pass.cpp b/libcxx/test/std/utilities/optional/optional.monadic/transform.pass.cpp index 0a151517b101c..ad2713f2ac5b8 100644 --- a/libcxx/test/std/utilities/optional/optional.monadic/transform.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.monadic/transform.pass.cpp @@ -17,62 +17,64 @@ #include "test_macros.h" #include +#include #include #include +#include struct LVal { constexpr int operator()(int&) { return 1; } - int operator()(const int&) = delete; - int operator()(int&&) = delete; + int operator()(const int&) = delete; + int operator()(int&&) = delete; int operator()(const int&&) = delete; }; struct CLVal { int operator()(int&) = delete; constexpr int operator()(const int&) { return 1; } - int operator()(int&&) = delete; + int operator()(int&&) = delete; int operator()(const int&&) = delete; }; struct RVal { - int operator()(int&) = delete; + int operator()(int&) = delete; int operator()(const int&) = delete; constexpr int operator()(int&&) { return 1; } int operator()(const int&&) = delete; }; struct CRVal { - int operator()(int&) = delete; + int operator()(int&) = delete; int operator()(const int&) = delete; - int operator()(int&&) = delete; + int operator()(int&&) = delete; constexpr int operator()(const int&&) { return 1; } }; struct RefQual { constexpr int operator()(int) & { return 1; } - int operator()(int) const& = delete; - int operator()(int) && = delete; + int operator()(int) const& = delete; + int operator()(int) && = delete; int operator()(int) const&& = delete; }; struct CRefQual { int operator()(int) & = delete; constexpr int operator()(int) const& { return 1; } - int operator()(int) && = delete; + int operator()(int) && = delete; int operator()(int) const&& = delete; }; struct RVRefQual { - int operator()(int) & = delete; + int operator()(int) & = delete; int operator()(int) const& = delete; constexpr int operator()(int) && { return 1; } int operator()(int) const&& = delete; }; struct RVCRefQual { - int operator()(int) & = delete; + int operator()(int) & = delete; int operator()(int) const& = delete; - int operator()(int) && = delete; + int operator()(int) && = delete; constexpr int operator()(int) const&& { return 1; } }; @@ -83,7 +85,7 @@ struct NoCopy { }; struct NoMove { - NoMove() = default; + NoMove() = default; NoMove(NoMove&&) = delete; NoMove operator()(const NoCopy&&) { return NoMove{}; } }; @@ -200,8 +202,111 @@ constexpr bool test() { return true; } +#if TEST_STD_VER >= 26 +constexpr bool test_ref() { + { + std::optional opt1; + std::same_as> decltype(auto) opt1r = opt1.transform([](int i) { return i + 2; }); + assert(!opt1); + assert(!opt1r); + } + + { + int i = 42; + std::optional opt{i}; + std::same_as> decltype(auto) o2 = opt.transform([](int j) { return j + 2; }); + + assert(*o2 == 44); + } + // Test & overload + { + // Without & qualifier on F's operator() + { + int i = 42; + std::optional opt{i}; + std::same_as> decltype(auto) o3 = opt.transform(LVal{}); + + assert(*o3 == 1); + } + + //With & qualifier on F's operator() + { + int i = 42; + std::optional opt{i}; + RefQual l{}; + std::same_as> decltype(auto) o3 = opt.transform(l); + + assert(*o3 == 1); + } + } + // const& overload + { + // Without & qualifier on F's operator() + { + int i = 42; + std::optional opt{i}; + std::same_as> decltype(auto) o3 = std::as_const(opt).transform(CLVal{}); + + assert(*o3 == 1); + } + + //With & qualifier on F's operator() + { + int i = 42; + const std::optional opt{i}; + const CRefQual l{}; + std::same_as> decltype(auto) o3 = opt.transform(l); + + assert(*o3 == 1); + } + } + + // Test && overload + { + // Without & qualifier on F's operator() + { + int i = 42; + std::optional opt{i}; + std::same_as> decltype(auto) o3 = std::move(opt).transform(RVal{}); + + assert(*o3 == 1); + } + + //With & qualifier on F's operator() + { + int i = 42; + std::optional opt{i}; + std::same_as> decltype(auto) o3 = std::move(opt).transform(RVRefQual{}); + assert(*o3 == 1); + } + } + + // const&& overload + { + //With & qualifier on F's operator() + { + int i = 42; + std::optional opt{i}; + const RVCRefQual rvc{}; + std::same_as> decltype(auto) o3 = opt.transform(std::move(rvc)); + assert(*o3 == 1); + } + } + { + std::optional o6 = std::nullopt; + auto o6r = o6.transform([](int) { return 42; }); + assert(!o6r); + } + return true; +} +#endif + int main(int, char**) { test(); static_assert(test()); +#if TEST_STD_VER >= 26 + test_ref(); + static_assert(test_ref()); +#endif return 0; } diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.assign/assign_value.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.assign/assign_value.pass.cpp index eaca111b72dca..ddb9ffc4bf80c 100644 --- a/libcxx/test/std/utilities/optional/optional.object/optional.object.assign/assign_value.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.assign/assign_value.pass.cpp @@ -250,6 +250,57 @@ constexpr T pr38638(T v) return *o + 2; } +#if TEST_STD_VER >= 26 + +template _Val> +constexpr void test_with_ref() { + T t{_Val}; + { // to empty + optional opt; + opt = t; + assert(static_cast(opt) == true); + assert(*opt == t); + } + { // to existing + optional opt{t}; + opt = t; + assert(static_cast(opt) == true); + assert(*opt == t); + } + { // test default argument + optional opt; + opt = {t}; + assert(static_cast(opt) == true); + assert(*opt == t); + } + { // test default argument + optional opt{t}; + opt = {}; + assert(static_cast(opt) == false); + } + // test two objects, make sure that the optional only changes what it holds a reference to + { + T t2{_Val}; + optional opt{t}; + opt = t2; + + assert(std::addressof(*opt) != std::addressof(t)); + assert(std::addressof(*opt) == std::addressof(t2)); + } + // test that reassigning the reference for an optional doesn't affect the objet it's holding a reference to + { + int i = -1; + int j = 2; + optional opt{i}; + opt = j; + + assert(i == -1); + assert(std::addressof(*opt) != std::addressof(i)); + assert(std::addressof(*opt) == std::addressof(j)); + assert(*opt == 2); + } +} +#endif int main(int, char**) { @@ -281,5 +332,8 @@ int main(int, char**) static_assert(pr38638(3) == 5, ""); - return 0; +#if TEST_STD_VER >= 26 + test_with_ref(); +#endif + return 0; } diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.assign/emplace.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.assign/emplace.pass.cpp index 245d8ff3d2146..629e315add4d9 100644 --- a/libcxx/test/std/utilities/optional/optional.object/optional.object.assign/emplace.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.assign/emplace.pass.cpp @@ -221,6 +221,24 @@ TEST_CONSTEXPR_CXX20 bool test_empty_emplace() { return true; } +#if TEST_STD_VER >= 26 +template _Val> +constexpr bool test_ref() { + using Opt = std::optional; + T t{_Val}; + { + Opt opt; + auto& v = opt.emplace(t); + static_assert(std::is_same_v); + assert(static_cast(opt) == true); + assert(*opt == t); + assert(&v == &*opt); + assert(&t == &*opt); + } + return true; +} +#endif + int main(int, char**) { { @@ -291,6 +309,11 @@ int main(int, char**) } } #endif - - return 0; +#if TEST_STD_VER >= 26 + static_assert(test_ref()); + static_assert(test_ref()); + assert((test_ref())); + assert((test_ref())); +#endif + return 0; } diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/ctor.verify.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/ctor.verify.cpp index 775d2bde7d13d..c5281783d4350 100644 --- a/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/ctor.verify.cpp +++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/ctor.verify.cpp @@ -23,18 +23,26 @@ struct NonDestructible { ~NonDestructible() = delete; }; int main(int, char**) { - { - std::optional o1; // expected-error-re@optional:* {{static assertion failed{{.*}}instantiation of optional with a reference type is ill-formed}} - std::optional o2; // expected-error-re@optional:* {{static assertion failed{{.*}}instantiation of optional with a non-destructible type is ill-formed}} - std::optional o3; // expected-error-re@optional:* {{static assertion failed{{.*}}instantiation of optional with an array type is ill-formed}} - } - - { + { +#if TEST_STD_VER >= 26 + std::optional + opt2; // expected-error-re@optional:* {{static assertion failed{{.*}}instantiation of optional with an rvalue reference type is ill-formed}} +#else + std::optional + o1; // expected-error-re@optional:* {{static assertion failed{{.*}}instantiation of optional with a reference type is ill-formed}} +#endif + std::optional + o2; // expected-error-re@optional:* {{static assertion failed{{.*}}instantiation of optional with a non-destructible type is ill-formed}} + std::optional + o3; // expected-error-re@optional:* {{static assertion failed{{.*}}instantiation of optional with an array type is ill-formed}} + } + + { std::optional< std::in_place_t> o1; // expected-error-re@optional:* {{static assertion failed{{.*}}instantiation of optional with in_place_t is ill-formed}} std::optional o2; // expected-error-re@optional:* {{static assertion failed{{.*}}instantiation of optional with in_place_t is ill-formed}} std::optional< volatile std::in_place_t> o3; // expected-error-re@optional:* {{static assertion failed{{.*}}instantiation of optional with in_place_t is ill-formed}} std::optional o4; // expected-error-re@optional:* {{static assertion failed{{.*}}instantiation of optional with in_place_t is ill-formed}} - } + } { std::optional< std::nullopt_t> o1; // expected-error-re@optional:* {{static assertion failed{{.*}}instantiation of optional with nullopt_t is ill-formed}} diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/move.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/move.pass.cpp index f856c1d41d05a..f59fc3b82ad7f 100644 --- a/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/move.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/move.pass.cpp @@ -78,71 +78,71 @@ void test_ref(InitArgs&&... args) assert(&(*lhs) == &(*rhs)); } -void test_reference_extension() -{ -#if defined(_LIBCPP_VERSION) && 0 // FIXME these extensions are currently disabled. - using T = TestTypes::TestType; - T::reset(); - { - T t; - T::reset_constructors(); - test_ref(); - test_ref(t); - assert(T::alive == 1); - assert(T::constructed == 0); - assert(T::assigned == 0); - assert(T::destroyed == 0); - } - assert(T::destroyed == 1); - assert(T::alive == 0); - { - T t; - const T& ct = t; - T::reset_constructors(); - test_ref(); - test_ref(t); - test_ref(ct); - assert(T::alive == 1); - assert(T::constructed == 0); - assert(T::assigned == 0); - assert(T::destroyed == 0); - } - assert(T::alive == 0); - assert(T::destroyed == 1); - { - T t; - T::reset_constructors(); - test_ref(); - test_ref(std::move(t)); - assert(T::alive == 1); - assert(T::constructed == 0); - assert(T::assigned == 0); - assert(T::destroyed == 0); - } - assert(T::alive == 0); - assert(T::destroyed == 1); - { - T t; - const T& ct = t; - T::reset_constructors(); - test_ref(); - test_ref(std::move(t)); - test_ref(std::move(ct)); - assert(T::alive == 1); - assert(T::constructed == 0); - assert(T::assigned == 0); - assert(T::destroyed == 0); - } - assert(T::alive == 0); - assert(T::destroyed == 1); - { - static_assert(!std::is_copy_constructible>::value, ""); - static_assert(!std::is_copy_constructible>::value, ""); - } +void test_reference_extension() { +#if TEST_STD_VER >= 26 + using T = TestTypes::TestType; + T::reset(); + { + T t; + T::reset_constructors(); + test_ref(); + test_ref(t); + assert(T::alive == 1); + assert(T::constructed == 0); + assert(T::assigned == 0); + assert(T::destroyed == 0); + } + assert(T::destroyed == 1); + assert(T::alive == 0); + { + T t; + const T& ct = t; + T::reset_constructors(); + test_ref(); + test_ref(t); + test_ref(ct); + assert(T::alive == 1); + assert(T::constructed == 0); + assert(T::assigned == 0); + assert(T::destroyed == 0); + } + assert(T::alive == 0); + assert(T::destroyed == 1); +# if 0 // FIXME: optional is not allowed. + { + T t; + T::reset_constructors(); + test_ref(); + test_ref(std::move(t)); + assert(T::alive == 1); + assert(T::constructed == 0); + assert(T::assigned == 0); + assert(T::destroyed == 0); + } + assert(T::alive == 0); + assert(T::destroyed == 1); + { + T t; + const T& ct = t; + T::reset_constructors(); + test_ref(); + test_ref(std::move(t)); + test_ref(std::move(ct)); + assert(T::alive == 1); + assert(T::constructed == 0); + assert(T::assigned == 0); + assert(T::destroyed == 0); + } + assert(T::alive == 0); + assert(T::destroyed == 1); + { + static_assert(!std::is_copy_constructible_v>); + static_assert(!std::is_copy_constructible_v>); + } +# endif #endif } - int main(int, char**) { test(); diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/ref_constructs_from_temporary.verify.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/ref_constructs_from_temporary.verify.cpp new file mode 100644 index 0000000000000..01b241ffbe79b --- /dev/null +++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/ref_constructs_from_temporary.verify.cpp @@ -0,0 +1,35 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: std-at-least-c++26 + +// optional + +#include +#include + +struct X { + int i; + + X(int j) : i(j) {} +}; + +int main(int, char**) { + const std::optional _co(1); + std::optional _o(1); + + // expected-error-re@*:* 8 {{call to deleted constructor of 'std::optional<{{.*}}>'}} + std::optional o1{1}; // optional(U&&) + std::optional o2{std::optional(1)}; // optional(optional&&) + std::optional o3{_co}; // optional(const optional&) + std::optional o4{_o}; // optional(optional&) + std::optional o5{1}; // optional(U&&) + std::optional o6{std::optional(1)}; // optional(optional&&) + std::optional o7{_co}; // optional(const optional&) + std::optional o8{_o}; // optional(optional&) +} diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/ref_t.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/ref_t.pass.cpp new file mode 100644 index 0000000000000..57552743af138 --- /dev/null +++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/ref_t.pass.cpp @@ -0,0 +1,75 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: std-at-least-c++26 + +// + +#include +#include +#include +#include + +template _Val> +constexpr bool test() { + std::remove_reference_t item{_Val}; + std::optional opt{item}; + + { + assert(*opt == item); + assert(&(*opt) == &item); + } + { + assert(*std::as_const(opt) == item); + assert(&(*std::as_const(opt)) == &item); + } + + return true; +} + +template +constexpr T foo(T val) { + return val; +} + +template +constexpr bool fn_ref_test() { + std::optional opt{foo}; + assert(opt.has_value()); + assert((*opt)(_Val) == _Val); + + return true; +} + +template +constexpr bool array_ref_test() { + T arr[5]{}; + std::optional opt{arr}; + + assert(opt.has_value()); + (*opt)[0] = _Val; + assert((*opt)[0] == _Val); + assert(arr[0] == _Val); + + return true; +} + +constexpr bool tests() { + assert((test())); + assert((test())); + assert((fn_ref_test())); + assert((array_ref_test())); + assert((fn_ref_test())); + assert((array_ref_test())); + return true; +} + +int main(int, char**) { + static_assert(tests()); + tests(); +} diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.dtor/dtor.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.dtor/dtor.pass.cpp index c0044276ea9ad..1202879036f56 100644 --- a/libcxx/test/std/utilities/optional/optional.object/optional.object.dtor/dtor.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.dtor/dtor.pass.cpp @@ -11,9 +11,9 @@ // ~optional(); +#include #include #include -#include #include "test_macros.h" @@ -64,6 +64,24 @@ int main(int, char**) } assert(X::dtor_called == true); } +#if TEST_STD_VER >= 26 + { + typedef X& T; + static_assert(std::is_trivially_destructible_v); + static_assert(std::is_trivially_destructible_v>); + } + X::dtor_called = false; + X x; + { + optional opt{x}; + assert(X::dtor_called == false); + } + assert(X::dtor_called == false); - return 0; + { + static_assert(std::is_trivially_destructible_v); + static_assert(std::is_trivially_destructible_v>); + } +#endif + return 0; } diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.mod/reset.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.mod/reset.pass.cpp index 7029b37cbecd7..e23e481f6a05d 100644 --- a/libcxx/test/std/utilities/optional/optional.object/optional.object.mod/reset.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.mod/reset.pass.cpp @@ -69,5 +69,16 @@ int main(int, char**) X::dtor_called = false; } - return 0; +#if TEST_STD_VER >= 26 + { + X x{}; + optional opt(x); + X::dtor_called = false; + opt.reset(); + assert(X::dtor_called == false); + assert(static_cast(opt) == false); + } +#endif + + return 0; } diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/dereference.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/dereference.pass.cpp index 49b4d21a28066..6c1bf8aa15a8d 100644 --- a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/dereference.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/dereference.pass.cpp @@ -50,7 +50,19 @@ int main(int, char**) optional opt(X{}); assert((*opt).test() == 4); } +#if TEST_STD_VER >= 26 + { + X x{}; + optional opt(x); + ASSERT_SAME_TYPE(decltype(*opt), X&); + ASSERT_NOEXCEPT(*opt); + } + { + X x{}; + optional opt(x); + assert((*opt).test() == 4); + } +#endif static_assert(test() == 7, ""); - return 0; } diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/dereference_const.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/dereference_const.pass.cpp index ff86d9534faf6..c15d4e4af74cc 100644 --- a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/dereference_const.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/dereference_const.pass.cpp @@ -43,6 +43,25 @@ int main(int, char**) constexpr optional opt(X{}); static_assert((*opt).test() == 3, ""); } +#if TEST_STD_VER >= 26 + { + X x{}; + const optional opt{x}; + ASSERT_SAME_TYPE(decltype(*opt), X&); + ASSERT_NOEXCEPT(*opt); + } + { + X x{}; + const optional opt{x}; + ASSERT_SAME_TYPE(decltype(*opt), const X&); + ASSERT_NOEXCEPT(*opt); + } + { + static constexpr X x{}; + constexpr optional opt(x); + static_assert((*opt).test() == 3); + } +#endif { constexpr optional opt(Y{}); assert((*opt).test() == 2); diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/has_value.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/has_value.pass.cpp index 6998e023022c5..9873a767cfbe6 100644 --- a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/has_value.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/has_value.pass.cpp @@ -33,6 +33,13 @@ int main(int, char**) constexpr optional opt(0); static_assert(opt.has_value(), ""); } +#if TEST_STD_VER >= 26 + { + static constexpr int i = 0; + constexpr optional opt{i}; + static_assert(opt.has_value()); + } +#endif - return 0; + return 0; } diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/op_arrow.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/op_arrow.pass.cpp index 2b5fba546ef42..96d22743ac7fe 100644 --- a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/op_arrow.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/op_arrow.pass.cpp @@ -19,9 +19,9 @@ using std::optional; -struct X -{ - int test() noexcept {return 3;} +struct X { + int test() noexcept { return 3; } + int test() const noexcept { return 3; } }; struct Y @@ -47,6 +47,30 @@ int main(int, char**) optional opt(X{}); assert(opt->test() == 3); } +#if TEST_STD_VER >= 26 + { + X x{}; + std::optional opt(x); + ASSERT_SAME_TYPE(decltype(opt.operator->()), X*); + ASSERT_NOEXCEPT(opt.operator->()); + } + { + X x{}; + std::optional opt(x); + ASSERT_SAME_TYPE(decltype(opt.operator->()), const X*); + ASSERT_NOEXCEPT(opt.operator->()); + } + { + X x{}; + optional opt{x}; + assert(opt->test() == 3); + } + { + X x{}; + optional opt{x}; + assert(opt->test() == 3); + } +#endif { static_assert(test() == 3, ""); } diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/op_arrow_const.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/op_arrow_const.pass.cpp index d8ce932bd7810..e9694fd6d9640 100644 --- a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/op_arrow_const.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/op_arrow_const.pass.cpp @@ -54,6 +54,25 @@ int main(int, char**) constexpr optional opt(Z{}); static_assert(opt->test() == 1, ""); } +#if TEST_STD_VER >= 26 + { + X x{}; + const std::optional opt(x); + ASSERT_SAME_TYPE(decltype(opt.operator->()), X*); + ASSERT_NOEXCEPT(opt.operator->()); + } + { + X x{}; + const std::optional opt(x); + ASSERT_SAME_TYPE(decltype(opt.operator->()), const X*); + ASSERT_NOEXCEPT(opt.operator->()); + } + { + static constexpr Z z{}; + constexpr optional opt(z); + static_assert(opt->test() == 1); + } +#endif return 0; } diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value.pass.cpp index 781784c6806a4..22b74f5512d53 100644 --- a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value.pass.cpp @@ -56,6 +56,14 @@ int main(int, char**) opt.emplace(); assert(opt.value().test() == 4); } +#if TEST_STD_VER >= 26 + { + X x; + optional opt{x}; + ASSERT_NOT_NOEXCEPT(opt.value()); + ASSERT_SAME_TYPE(decltype(opt.value()), X&); + } +#endif #ifndef TEST_HAS_NO_EXCEPTIONS { optional opt; diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_or.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_or.pass.cpp index 8c063ae1a799c..66890ff9c9b91 100644 --- a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_or.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_or.pass.cpp @@ -80,6 +80,14 @@ constexpr int test() assert((std::move(opt).value_or({2, 3}) == Z{2, 3})); assert(!opt); } +#if TEST_STD_VER >= 26 + { + int y = 2; + optional opt; + assert(std::move(opt).value_or(y) == 2); + assert(!opt); + } +#endif return 0; } diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_or_const.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_or_const.pass.cpp index ec42890a3b995..6bd308b405605 100644 --- a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_or_const.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_or_const.pass.cpp @@ -79,6 +79,12 @@ int main(int, char**) const optional opt; assert(opt.value_or({Y(3)}) == 4); } - - return 0; +#if TEST_STD_VER >= 26 + { + X y{3}; + const optional opt; + assert(opt.value_or(y) == 3); + } +#endif + return 0; } diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.swap/swap.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.swap/swap.pass.cpp index e3a2fdb8b0020..a82ca615e0c8c 100644 --- a/libcxx/test/std/utilities/optional/optional.object/optional.object.swap/swap.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.swap/swap.pass.cpp @@ -13,9 +13,10 @@ // noexcept(is_nothrow_move_constructible::value && // is_nothrow_swappable::value) +#include +#include #include #include -#include #include "test_macros.h" #include "archetypes.h" @@ -127,6 +128,74 @@ TEST_CONSTEXPR_CXX20 bool check_swap() return true; } +#if TEST_STD_VER >= 26 +template +constexpr bool check_swap_ref() { + { + optional opt1; + optional opt2; + static_assert(noexcept(opt1.swap(opt2)) == true); + assert(static_cast(opt1) == false); + assert(static_cast(opt2) == false); + opt1.swap(opt2); + assert(static_cast(opt1) == false); + assert(static_cast(opt2) == false); + } + + { + T one{1}; + optional opt1(one); + optional opt2; + static_assert(noexcept(opt1.swap(opt2)) == true); + assert(static_cast(opt1) == true); + assert(std::addressof(*opt1) == std::addressof(one)); + assert(static_cast(opt2) == false); + opt1.swap(opt2); + assert(static_cast(opt1) == false); + assert(static_cast(opt2) == true); + assert(std::addressof(*opt2) == std::addressof(one)); + } + + { + T two{2}; + optional opt1; + optional opt2(two); + static_assert(noexcept(opt1.swap(opt2)) == true); + assert(static_cast(opt1) == false); + assert(static_cast(opt2) == true); + assert(std::addressof(*opt2) == std::addressof(two)); + opt1.swap(opt2); + assert(static_cast(opt1) == true); + assert(std::addressof(*opt1) == std::addressof(two)); + assert(static_cast(opt2) == false); + } + + { + T one{1}; + T two{2}; + + optional opt1(one); + optional opt2(two); + static_assert(noexcept(opt1.swap(opt2)) == true); + assert(static_cast(opt1) == true); + assert(*opt1 == 1); + assert(std::addressof(*opt1) == std::addressof(one)); + assert(static_cast(opt2) == true); + assert(*opt2 == 2); + assert(std::addressof(*opt2) == std::addressof(two)); + opt1.swap(opt2); + assert(static_cast(opt1) == true); + assert(*opt1 == 2); + assert(std::addressof(*opt1) == std::addressof(two)); + assert(static_cast(opt2) == true); + assert(*opt2 == 1); + assert(std::addressof(*opt2) == std::addressof(one)); + } + + return true; +} +#endif + int main(int, char**) { check_swap(); @@ -134,6 +203,12 @@ int main(int, char**) #if TEST_STD_VER > 17 static_assert(check_swap()); static_assert(check_swap()); +#endif +#if TEST_STD_VER >= 26 + static_assert(check_swap_ref()); + static_assert(check_swap_ref()); + check_swap_ref(); + check_swap_ref(); #endif { optional opt1; diff --git a/libcxx/test/std/utilities/optional/optional.object/optional_requires_destructible_object.verify.cpp b/libcxx/test/std/utilities/optional/optional.object/optional_requires_destructible_object.verify.cpp index a96c3c648f939..a956ab3a219cf 100644 --- a/libcxx/test/std/utilities/optional/optional.object/optional_requires_destructible_object.verify.cpp +++ b/libcxx/test/std/utilities/optional/optional.object/optional_requires_destructible_object.verify.cpp @@ -13,6 +13,8 @@ #include +#include "test_macros.h" + using std::optional; struct X @@ -25,9 +27,13 @@ int main(int, char**) { using std::optional; { - // expected-error-re@optional:* 2 {{static assertion failed{{.*}}instantiation of optional with a reference type is ill-formed}} - optional opt1; - optional opt2; +#if TEST_STD_VER >= 26 + // expected-error-re@optional:* {{static assertion failed{{.*}}instantiation of optional with an rvalue reference type is ill-formed}} +#else + // expected-error-re@optional:* 2 {{static assertion failed{{.*}}instantiation of optional with a reference type is ill-formed}} +#endif + optional opt1; + optional opt2; } { // expected-error-re@optional:* {{static assertion failed{{.*}}instantiation of optional with a non-destructible type is ill-formed}} diff --git a/libcxx/test/std/utilities/optional/optional.object/types.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/types.pass.cpp index d097559877267..ecbc6b4548ee6 100644 --- a/libcxx/test/std/utilities/optional/optional.object/types.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.object/types.pass.cpp @@ -36,6 +36,11 @@ int main(int, char**) test, const int>(); test, double>(); test, const double>(); - - return 0; +#if TEST_STD_VER >= 26 + test, int>(); + test, const int>(); + test, double>(); + test, const double>(); +#endif + return 0; } diff --git a/libcxx/test/std/utilities/optional/optional.specalg/make_optional.pass.cpp b/libcxx/test/std/utilities/optional/optional.specalg/make_optional.pass.cpp index e325a7af558eb..c27645165d20e 100644 --- a/libcxx/test/std/utilities/optional/optional.specalg/make_optional.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.specalg/make_optional.pass.cpp @@ -13,10 +13,10 @@ // template // constexpr optional> make_optional(T&& v); +#include +#include #include #include -#include -#include #include "test_macros.h" diff --git a/libcxx/test/std/utilities/optional/optional.specalg/make_optional_explicit.pass.cpp b/libcxx/test/std/utilities/optional/optional.specalg/make_optional_explicit.pass.cpp index 23f131d2fc499..5dd1d6f0b3380 100644 --- a/libcxx/test/std/utilities/optional/optional.specalg/make_optional_explicit.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.specalg/make_optional_explicit.pass.cpp @@ -15,13 +15,30 @@ // GCC crashes on this file, see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=120577 // XFAIL: gcc-15 +#include +#include #include #include -#include -#include +#include #include "test_macros.h" +template +constexpr bool test_ref() { + T i{0}; + auto opt = std::make_optional(i); + +#if TEST_STD_VER < 26 + assert((std::is_same_v>)); +#else + assert((std::is_same_v>)); +#endif + + assert(*opt == 0); + + return true; +} + int main(int, char**) { { @@ -43,6 +60,12 @@ int main(int, char**) auto opt = std::make_optional(4u, 'X'); assert(*opt == "XXXX"); } + using namespace std::string_view_literals; + + static_assert(test_ref()); + assert((test_ref())); + static_assert(test_ref()); + assert((test_ref())); - return 0; + return 0; } diff --git a/libcxx/test/std/utilities/optional/optional.specalg/swap.pass.cpp b/libcxx/test/std/utilities/optional/optional.specalg/swap.pass.cpp index 0da3a821e7961..c757120a1c146 100644 --- a/libcxx/test/std/utilities/optional/optional.specalg/swap.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.specalg/swap.pass.cpp @@ -12,9 +12,10 @@ // template void swap(optional& x, optional& y) // noexcept(noexcept(x.swap(y))); +#include +#include #include #include -#include #include "test_macros.h" #include "archetypes.h" @@ -109,9 +110,82 @@ void test_swap_sfinae() { } } +#if TEST_STD_VER >= 26 +template +constexpr bool test_swap_ref() { + { + optional opt1; + optional opt2; + static_assert(noexcept(swap(opt1, opt2)) == true); + assert(static_cast(opt1) == false); + assert(static_cast(opt2) == false); + swap(opt1, opt2); + assert(static_cast(opt1) == false); + assert(static_cast(opt2) == false); + } + { + T one{1}; + optional opt1(one); + optional opt2; + static_assert(noexcept(swap(opt1, opt2)) == true); + assert(static_cast(opt1) == true); + assert(*opt1 == 1); + assert(std::addressof(*opt1) == std::addressof(one)); + assert(static_cast(opt2) == false); + swap(opt1, opt2); + assert(static_cast(opt1) == false); + assert(static_cast(opt2) == true); + assert(*opt2 == 1); + assert(std::addressof(*opt2) == std::addressof(one)); + } + { + T two{2}; + optional opt1; + optional opt2(two); + static_assert(noexcept(swap(opt1, opt2)) == true); + assert(static_cast(opt1) == false); + assert(static_cast(opt2) == true); + assert(*opt2 == 2); + assert(std::addressof(*opt2) == std::addressof(two)); + swap(opt1, opt2); + assert(static_cast(opt1) == true); + assert(*opt1 == 2); + assert(std::addressof(*opt1) == std::addressof(two)); + assert(static_cast(opt2) == false); + } + { + T one{1}; + T two{2}; + optional opt1(one); + optional opt2(two); + static_assert(noexcept(swap(opt1, opt2)) == true); + assert(static_cast(opt1) == true); + assert(*opt1 == 1); + assert(std::addressof(*opt1) == std::addressof(one)); + assert(static_cast(opt2) == true); + assert(*opt2 == 2); + assert(std::addressof(*opt2) == std::addressof(two)); + swap(opt1, opt2); + assert(static_cast(opt1) == true); + assert(*opt1 == 2); + assert(std::addressof(*opt1) == std::addressof(two)); + assert(static_cast(opt2) == true); + assert(*opt2 == 1); + assert(std::addressof(*opt2) == std::addressof(one)); + } + return true; +} +#endif + int main(int, char**) { test_swap_sfinae(); +#if TEST_STD_VER >= 26 + static_assert(test_swap_ref()); + static_assert(test_swap_ref()); + test_swap_ref(); + test_swap_ref(); +#endif { optional opt1; optional opt2; diff --git a/libcxx/utils/generate_feature_test_macro_components.py b/libcxx/utils/generate_feature_test_macro_components.py index 82a1785a0c906..0802f865f9406 100644 --- a/libcxx/utils/generate_feature_test_macro_components.py +++ b/libcxx/utils/generate_feature_test_macro_components.py @@ -1017,6 +1017,7 @@ def add_version_header(tc): "c++17": 201606, "c++20": 202106, # P2231R1 Missing constexpr in std::optional and std::variant "c++23": 202110, # P0798R8 Monadic operations for std::optional + LWG3621 Remove feature-test macro __cpp_lib_monadic_optional + "c++26": 202506, # P2988R12: std::optional }, "headers": ["optional"], }, From 175e3becbf8468213034679bf749e3c0c4e0bbda Mon Sep 17 00:00:00 2001 From: Asher Mancinelli Date: Tue, 11 Nov 2025 19:00:39 -0800 Subject: [PATCH 23/32] [MLIR][Python] Add region_op wrappers for linalg (#167616) Makes linalg.reduce and linalg.map region_ops so they can be constructed from functions and be called as decorators. --- mlir/python/mlir/dialects/linalg/__init__.py | 4 ++ mlir/test/python/dialects/linalg/ops.py | 76 +++++++++++++++++++- 2 files changed, 79 insertions(+), 1 deletion(-) diff --git a/mlir/python/mlir/dialects/linalg/__init__.py b/mlir/python/mlir/dialects/linalg/__init__.py index d387c12deeed9..c92bda74c12bf 100644 --- a/mlir/python/mlir/dialects/linalg/__init__.py +++ b/mlir/python/mlir/dialects/linalg/__init__.py @@ -352,3 +352,7 @@ def unpack( ip=ip, ) ) + + +reduce = region_op(ReduceOp, terminator=YieldOp) +map = region_op(MapOp, terminator=YieldOp) diff --git a/mlir/test/python/dialects/linalg/ops.py b/mlir/test/python/dialects/linalg/ops.py index 709a1d2424f35..92591cd59fb40 100644 --- a/mlir/test/python/dialects/linalg/ops.py +++ b/mlir/test/python/dialects/linalg/ops.py @@ -1,7 +1,8 @@ # RUN: %PYTHON %s | FileCheck %s -from mlir.dialects import arith, func, linalg, tensor, memref +from mlir.dialects import arith, func, linalg, tensor, memref, builtin from mlir.dialects.linalg.opdsl.lang import * +from mlir.extras import types as T from mlir.ir import * @@ -857,3 +858,76 @@ def elementwise_op( ) print(module) + + +@run +def testReduceOp(): + with Context(), Location.unknown(): + f32 = T.f32() + tensor_type = T.tensor(10, f32) + + @builtin.module + def module(): + @func.func(tensor_type) + def reduce_op(input): + c1 = arith.constant(f32, 1.0) + single_result = ir.RankedTensorType.get((), f32) + dims = ir.DenseI64ArrayAttr.get([0]) + init = tensor.splat(single_result, c1, []) + + @linalg.reduce( + result=[single_result], + inputs=[input], + inits=[init], + dimensions=dims, + ) + def reduced(element: f32, acc: f32): + return arith.mulf(acc, element) + + return tensor.extract(reduced, []) + + print(module) + + +# CHECK-LABEL: func.func @reduce_op( +# CHECK-SAME: %[[ARG0:.*]]: tensor<10xf32>) -> f32 { +# CHECK: %[[CONSTANT_0:.*]] = arith.constant 1.000000e+00 : f32 +# CHECK: %[[SPLAT_0:.*]] = tensor.splat %[[CONSTANT_0]] : tensor +# CHECK: %[[REDUCE_0:.*]] = linalg.reduce { arith.mulf } ins(%[[ARG0]] : tensor<10xf32>) outs(%[[SPLAT_0]] : tensor) dimensions = [0] +# CHECK: %[[EXTRACT_0:.*]] = tensor.extract %[[REDUCE_0]][] : tensor +# CHECK: return %[[EXTRACT_0]] : f32 +# CHECK: } + + +@run +def testMapOp(): + with Context(), Location.unknown(): + f32 = T.f32() + tensor_type = T.tensor(10, f32) + + @builtin.module + def module(): + @func.func(tensor_type) + def map_op(input): + empty = tensor.empty(tensor_type.shape, f32) + + @linalg.map( + result=[tensor_type], + inputs=[input, input], + init=empty, + ) + def add(element: f32, acc: f32, init: f32): + return arith.addf(element, acc) + + return add + + module.verify() + print(module) + + +# CHECK-LABEL: func.func @map_op( +# CHECK-SAME: %[[ARG0:.*]]: tensor<10xf32>) -> tensor<10xf32> { +# CHECK: %[[EMPTY_0:.*]] = tensor.empty() : tensor<10xf32> +# CHECK: %[[MAP_0:.*]] = linalg.map { arith.addf } ins(%[[ARG0]], %[[ARG0]] : tensor<10xf32>, tensor<10xf32>) outs(%[[EMPTY_0]] : tensor<10xf32>) +# CHECK: return %[[MAP_0]] : tensor<10xf32> +# CHECK: } From 905ee4424d62f80a45f26ac03e29adf3bb7a6c85 Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Wed, 12 Nov 2025 11:13:38 +0800 Subject: [PATCH 24/32] [NFC] [C++20] [Modules] Test that we can avoid adding more specializations in reduced BMI --- ...-specialization-update-in-reduced-bmi.cppm | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 clang/test/Modules/avoid-specialization-update-in-reduced-bmi.cppm diff --git a/clang/test/Modules/avoid-specialization-update-in-reduced-bmi.cppm b/clang/test/Modules/avoid-specialization-update-in-reduced-bmi.cppm new file mode 100644 index 0000000000000..7844344a15427 --- /dev/null +++ b/clang/test/Modules/avoid-specialization-update-in-reduced-bmi.cppm @@ -0,0 +1,28 @@ +// RUN: rm -rf %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 %t/base.cppm -emit-module-interface -o %t/base.pcm +// RUN: %clang_cc1 -std=c++20 %t/update.cppm -fmodule-file=base=%t/base.pcm -emit-module-interface -o %t/update.pcm +// RUN: llvm-bcanalyzer --dump --disable-histogram %t/update.pcm | FileCheck %t/update.cppm --check-prefix=FULL +// +// RUN: %clang_cc1 -std=c++20 %t/base.cppm -emit-reduced-module-interface -o %t/base.pcm +// RUN: %clang_cc1 -std=c++20 %t/update.cppm -fmodule-file=base=%t/base.pcm -emit-reduced-module-interface -o %t/update.pcm +// RUN: llvm-bcanalyzer --dump --disable-histogram %t/update.pcm | FileCheck %t/update.cppm + +//--- base.cppm +export module base; + +export template +struct base { + T value; +}; + +//--- update.cppm +export module update; +import base; +export int update() { + return base().value; +} + +// FULL: TEMPLATE_SPECIALIZATION +// CHECK-NOT: TEMPLATE_SPECIALIZATION From 1f38d49ebe96417e368a567efa4d650b8a9ac30f Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Tue, 11 Nov 2025 19:49:53 -0800 Subject: [PATCH 25/32] ValueMapper: Delete unused initializers of replaced appending globals. A full LTO link time performance and memory regression was introduced by #137081 in cases where the modules contain large quantities of llvm.used globals. This was unnoticed because it was not expected that this would be a typical case, but this is exactly what coverage collection does, and when this feature is enabled together with full LTO we end up with quadratic memory consumption (from the unused constants) and quadratic complexity in the function Verifier::visitGlobalValue (which visits all the unused constants in the use list of each global value). This is a targeted fix that avoids reintroducing the quadratic complexity from before #137081, by having ValueMapper delete the old initializer of an appending global if it is unused, instead of visiting every global in the context after every link. The repro-cfi-64 reproducer from #167037 before and after this change: ``` Elapsed time Max RSS (KB) Before 12:05.11 52537184 After 3:27.68 7520696 ``` Fixes #167037. Reviewers: nikic, teresajohnson Reviewed By: teresajohnson Pull Request: https://github.com/llvm/llvm-project/pull/167629 --- .../llvm/Transforms/Utils/ValueMapper.h | 2 +- llvm/lib/Linker/IRMover.cpp | 5 +--- llvm/lib/Transforms/Utils/ValueMapper.cpp | 24 ++++++++++++------- 3 files changed, 17 insertions(+), 14 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/ValueMapper.h b/llvm/include/llvm/Transforms/Utils/ValueMapper.h index 17b5d4b891230..28c4ae840b29f 100644 --- a/llvm/include/llvm/Transforms/Utils/ValueMapper.h +++ b/llvm/include/llvm/Transforms/Utils/ValueMapper.h @@ -204,7 +204,7 @@ class ValueMapper { LLVM_ABI void scheduleMapGlobalInitializer(GlobalVariable &GV, Constant &Init, unsigned MappingContextID = 0); LLVM_ABI void scheduleMapAppendingVariable(GlobalVariable &GV, - Constant *InitPrefix, + GlobalVariable *OldGV, bool IsOldCtorDtor, ArrayRef NewMembers, unsigned MappingContextID = 0); diff --git a/llvm/lib/Linker/IRMover.cpp b/llvm/lib/Linker/IRMover.cpp index f78d9b016d8c9..f215f39f41bfb 100644 --- a/llvm/lib/Linker/IRMover.cpp +++ b/llvm/lib/Linker/IRMover.cpp @@ -882,10 +882,7 @@ IRLinker::linkAppendingVarProto(GlobalVariable *DstGV, NG->copyAttributesFrom(SrcGV); forceRenaming(NG, SrcGV->getName()); - Mapper.scheduleMapAppendingVariable( - *NG, - (DstGV && !DstGV->isDeclaration()) ? DstGV->getInitializer() : nullptr, - IsOldStructor, SrcElements); + Mapper.scheduleMapAppendingVariable(*NG, DstGV, IsOldStructor, SrcElements); // Replace any uses of the two global variables with uses of the new // global. diff --git a/llvm/lib/Transforms/Utils/ValueMapper.cpp b/llvm/lib/Transforms/Utils/ValueMapper.cpp index 8d8a60b6918fe..9021d8b289baf 100644 --- a/llvm/lib/Transforms/Utils/ValueMapper.cpp +++ b/llvm/lib/Transforms/Utils/ValueMapper.cpp @@ -77,7 +77,7 @@ struct WorklistEntry { }; struct AppendingGVTy { GlobalVariable *GV; - Constant *InitPrefix; + GlobalVariable *OldGV; }; struct AliasOrIFuncTy { GlobalValue *GV; @@ -162,7 +162,7 @@ class Mapper { void scheduleMapGlobalInitializer(GlobalVariable &GV, Constant &Init, unsigned MCID); - void scheduleMapAppendingVariable(GlobalVariable &GV, Constant *InitPrefix, + void scheduleMapAppendingVariable(GlobalVariable &GV, GlobalVariable *OldGV, bool IsOldCtorDtor, ArrayRef NewMembers, unsigned MCID); @@ -173,7 +173,7 @@ class Mapper { void flush(); private: - void mapAppendingVariable(GlobalVariable &GV, Constant *InitPrefix, + void mapAppendingVariable(GlobalVariable &GV, GlobalVariable *OldGV, bool IsOldCtorDtor, ArrayRef NewMembers); @@ -944,7 +944,7 @@ void Mapper::flush() { drop_begin(AppendingInits, PrefixSize)); AppendingInits.resize(PrefixSize); mapAppendingVariable(*E.Data.AppendingGV.GV, - E.Data.AppendingGV.InitPrefix, + E.Data.AppendingGV.OldGV, E.AppendingGVIsOldCtorDtor, ArrayRef(NewInits)); break; } @@ -1094,15 +1094,21 @@ void Mapper::remapFunction(Function &F) { } } -void Mapper::mapAppendingVariable(GlobalVariable &GV, Constant *InitPrefix, +void Mapper::mapAppendingVariable(GlobalVariable &GV, GlobalVariable *OldGV, bool IsOldCtorDtor, ArrayRef NewMembers) { + Constant *InitPrefix = + (OldGV && !OldGV->isDeclaration()) ? OldGV->getInitializer() : nullptr; + SmallVector Elements; if (InitPrefix) { unsigned NumElements = cast(InitPrefix->getType())->getNumElements(); for (unsigned I = 0; I != NumElements; ++I) Elements.push_back(InitPrefix->getAggregateElement(I)); + OldGV->setInitializer(nullptr); + if (InitPrefix->hasUseList() && InitPrefix->use_empty()) + InitPrefix->destroyConstant(); } PointerType *VoidPtrTy; @@ -1148,7 +1154,7 @@ void Mapper::scheduleMapGlobalInitializer(GlobalVariable &GV, Constant &Init, } void Mapper::scheduleMapAppendingVariable(GlobalVariable &GV, - Constant *InitPrefix, + GlobalVariable *OldGV, bool IsOldCtorDtor, ArrayRef NewMembers, unsigned MCID) { @@ -1159,7 +1165,7 @@ void Mapper::scheduleMapAppendingVariable(GlobalVariable &GV, WE.Kind = WorklistEntry::MapAppendingVar; WE.MCID = MCID; WE.Data.AppendingGV.GV = &GV; - WE.Data.AppendingGV.InitPrefix = InitPrefix; + WE.Data.AppendingGV.OldGV = OldGV; WE.AppendingGVIsOldCtorDtor = IsOldCtorDtor; WE.AppendingGVNumNewMembers = NewMembers.size(); Worklist.push_back(WE); @@ -1282,12 +1288,12 @@ void ValueMapper::scheduleMapGlobalInitializer(GlobalVariable &GV, } void ValueMapper::scheduleMapAppendingVariable(GlobalVariable &GV, - Constant *InitPrefix, + GlobalVariable *OldGV, bool IsOldCtorDtor, ArrayRef NewMembers, unsigned MCID) { getAsMapper(pImpl)->scheduleMapAppendingVariable( - GV, InitPrefix, IsOldCtorDtor, NewMembers, MCID); + GV, OldGV, IsOldCtorDtor, NewMembers, MCID); } void ValueMapper::scheduleMapGlobalAlias(GlobalAlias &GA, Constant &Aliasee, From c5aace42360184a75323d8fab3a26f0a1d8ba5e7 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 11 Nov 2025 19:50:33 -0800 Subject: [PATCH 26/32] DAG: Move expandMultipleResultFPLibCall to TargetLowering (NFC) (#166988) This kind of helper is higher level and not general enough to go directly in SelectionDAG. Most similar utilities are in TargetLowering. --- llvm/include/llvm/CodeGen/SelectionDAG.h | 10 -- llvm/include/llvm/CodeGen/TargetLowering.h | 10 ++ llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 4 +- .../SelectionDAG/LegalizeFloatTypes.cpp | 2 +- .../SelectionDAG/LegalizeVectorOps.cpp | 4 +- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 161 ------------------ .../CodeGen/SelectionDAG/TargetLowering.cpp | 161 ++++++++++++++++++ 7 files changed, 176 insertions(+), 176 deletions(-) diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index 5b331e4444915..b024e8a68bd6e 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1718,16 +1718,6 @@ class SelectionDAG { /// the target's desired shift amount type. LLVM_ABI SDValue getShiftAmountOperand(EVT LHSTy, SDValue Op); - /// Expands a node with multiple results to an FP or vector libcall. The - /// libcall is expected to take all the operands of the \p Node followed by - /// output pointers for each of the results. \p CallRetResNo can be optionally - /// set to indicate that one of the results comes from the libcall's return - /// value. - LLVM_ABI bool - expandMultipleResultFPLibCall(RTLIB::Libcall LC, SDNode *Node, - SmallVectorImpl &Results, - std::optional CallRetResNo = {}); - /// Expand the specified \c ISD::VAARG node as the Legalize pass would. LLVM_ABI SDValue expandVAArg(SDNode *Node); diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 4d5d1fc7dfadc..cec7d09f494d6 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -5757,6 +5757,16 @@ class LLVM_ABI TargetLowering : public TargetLoweringBase { /// consisting of zext/sext, extract_subvector, mul and add operations. SDValue expandPartialReduceMLA(SDNode *Node, SelectionDAG &DAG) const; + /// Expands a node with multiple results to an FP or vector libcall. The + /// libcall is expected to take all the operands of the \p Node followed by + /// output pointers for each of the results. \p CallRetResNo can be optionally + /// set to indicate that one of the results comes from the libcall's return + /// value. + bool expandMultipleResultFPLibCall( + SelectionDAG &DAG, RTLIB::Libcall LC, SDNode *Node, + SmallVectorImpl &Results, + std::optional CallRetResNo = {}) const; + /// Legalize a SETCC or VP_SETCC with given LHS and RHS and condition code CC /// on the current target. A VP_SETCC will additionally be given a Mask /// and/or EVL not equal to SDValue(). diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 3ed84af6a8717..99d14a60c6ed1 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -4842,7 +4842,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { RTLIB::Libcall LC = Node->getOpcode() == ISD::FSINCOS ? RTLIB::getSINCOS(VT) : RTLIB::getSINCOSPI(VT); - bool Expanded = DAG.expandMultipleResultFPLibCall(LC, Node, Results); + bool Expanded = TLI.expandMultipleResultFPLibCall(DAG, LC, Node, Results); if (!Expanded) { DAG.getContext()->emitError(Twine("no libcall available for ") + Node->getOperationName(&DAG)); @@ -4940,7 +4940,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { EVT VT = Node->getValueType(0); RTLIB::Libcall LC = Node->getOpcode() == ISD::FMODF ? RTLIB::getMODF(VT) : RTLIB::getFREXP(VT); - bool Expanded = DAG.expandMultipleResultFPLibCall(LC, Node, Results, + bool Expanded = TLI.expandMultipleResultFPLibCall(DAG, LC, Node, Results, /*CallRetResNo=*/0); if (!Expanded) llvm_unreachable("Expected scalar FFREXP/FMODF to expand to libcall!"); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 58983cb57d7f6..383a025a4d916 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -1726,7 +1726,7 @@ void DAGTypeLegalizer::ExpandFloatRes_UnaryWithTwoFPResults( SDNode *N, RTLIB::Libcall LC, std::optional CallRetResNo) { assert(!N->isStrictFPOpcode() && "strictfp not implemented"); SmallVector Results; - DAG.expandMultipleResultFPLibCall(LC, N, Results, CallRetResNo); + TLI.expandMultipleResultFPLibCall(DAG, LC, N, Results, CallRetResNo); for (auto [ResNo, Res] : enumerate(Results)) { SDValue Lo, Hi; GetPairElements(Res, Lo, Hi); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index c55e55df373e9..7d979caa8bf82 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -1275,7 +1275,7 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl &Results) { ? RTLIB::getSINCOS(VT) : RTLIB::getSINCOSPI(VT); if (LC != RTLIB::UNKNOWN_LIBCALL && - DAG.expandMultipleResultFPLibCall(LC, Node, Results)) + TLI.expandMultipleResultFPLibCall(DAG, LC, Node, Results)) return; // TODO: Try to see if there's a narrower call available to use before @@ -1286,7 +1286,7 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl &Results) { EVT VT = Node->getValueType(0); RTLIB::Libcall LC = RTLIB::getMODF(VT); if (LC != RTLIB::UNKNOWN_LIBCALL && - DAG.expandMultipleResultFPLibCall(LC, Node, Results, + TLI.expandMultipleResultFPLibCall(DAG, LC, Node, Results, /*CallRetResNo=*/0)) return; break; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index f05266967fb68..363c71d84694f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2467,167 +2467,6 @@ SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) { return getZExtOrTrunc(Op, SDLoc(Op), ShTy); } -/// Given a store node \p StoreNode, return true if it is safe to fold that node -/// into \p FPNode, which expands to a library call with output pointers. -static bool canFoldStoreIntoLibCallOutputPointers(StoreSDNode *StoreNode, - SDNode *FPNode) { - SmallVector Worklist; - SmallVector DeferredNodes; - SmallPtrSet Visited; - - // Skip FPNode use by StoreNode (that's the use we want to fold into FPNode). - for (SDValue Op : StoreNode->ops()) - if (Op.getNode() != FPNode) - Worklist.push_back(Op.getNode()); - - unsigned MaxSteps = SelectionDAG::getHasPredecessorMaxSteps(); - while (!Worklist.empty()) { - const SDNode *Node = Worklist.pop_back_val(); - auto [_, Inserted] = Visited.insert(Node); - if (!Inserted) - continue; - - if (MaxSteps > 0 && Visited.size() >= MaxSteps) - return false; - - // Reached the FPNode (would result in a cycle). - // OR Reached CALLSEQ_START (would result in nested call sequences). - if (Node == FPNode || Node->getOpcode() == ISD::CALLSEQ_START) - return false; - - if (Node->getOpcode() == ISD::CALLSEQ_END) { - // Defer looking into call sequences (so we can check we're outside one). - // We still need to look through these for the predecessor check. - DeferredNodes.push_back(Node); - continue; - } - - for (SDValue Op : Node->ops()) - Worklist.push_back(Op.getNode()); - } - - // True if we're outside a call sequence and don't have the FPNode as a - // predecessor. No cycles or nested call sequences possible. - return !SDNode::hasPredecessorHelper(FPNode, Visited, DeferredNodes, - MaxSteps); -} - -bool SelectionDAG::expandMultipleResultFPLibCall( - RTLIB::Libcall LC, SDNode *Node, SmallVectorImpl &Results, - std::optional CallRetResNo) { - if (LC == RTLIB::UNKNOWN_LIBCALL) - return false; - - RTLIB::LibcallImpl LibcallImpl = TLI->getLibcallImpl(LC); - if (LibcallImpl == RTLIB::Unsupported) - return false; - - LLVMContext &Ctx = *getContext(); - EVT VT = Node->getValueType(0); - unsigned NumResults = Node->getNumValues(); - - // Find users of the node that store the results (and share input chains). The - // destination pointers can be used instead of creating stack allocations. - SDValue StoresInChain; - SmallVector ResultStores(NumResults); - for (SDNode *User : Node->users()) { - if (!ISD::isNormalStore(User)) - continue; - auto *ST = cast(User); - SDValue StoreValue = ST->getValue(); - unsigned ResNo = StoreValue.getResNo(); - // Ensure the store corresponds to an output pointer. - if (CallRetResNo == ResNo) - continue; - // Ensure the store to the default address space and not atomic or volatile. - if (!ST->isSimple() || ST->getAddressSpace() != 0) - continue; - // Ensure all store chains are the same (so they don't alias). - if (StoresInChain && ST->getChain() != StoresInChain) - continue; - // Ensure the store is properly aligned. - Type *StoreType = StoreValue.getValueType().getTypeForEVT(Ctx); - if (ST->getAlign() < - getDataLayout().getABITypeAlign(StoreType->getScalarType())) - continue; - // Avoid: - // 1. Creating cyclic dependencies. - // 2. Expanding the node to a call within a call sequence. - if (!canFoldStoreIntoLibCallOutputPointers(ST, Node)) - continue; - ResultStores[ResNo] = ST; - StoresInChain = ST->getChain(); - } - - TargetLowering::ArgListTy Args; - - // Pass the arguments. - for (const SDValue &Op : Node->op_values()) { - EVT ArgVT = Op.getValueType(); - Type *ArgTy = ArgVT.getTypeForEVT(Ctx); - Args.emplace_back(Op, ArgTy); - } - - // Pass the output pointers. - SmallVector ResultPtrs(NumResults); - Type *PointerTy = PointerType::getUnqual(Ctx); - for (auto [ResNo, ST] : llvm::enumerate(ResultStores)) { - if (ResNo == CallRetResNo) - continue; - EVT ResVT = Node->getValueType(ResNo); - SDValue ResultPtr = ST ? ST->getBasePtr() : CreateStackTemporary(ResVT); - ResultPtrs[ResNo] = ResultPtr; - Args.emplace_back(ResultPtr, PointerTy); - } - - SDLoc DL(Node); - - if (RTLIB::RuntimeLibcallsInfo::hasVectorMaskArgument(LibcallImpl)) { - // Pass the vector mask (if required). - EVT MaskVT = TLI->getSetCCResultType(getDataLayout(), Ctx, VT); - SDValue Mask = getBoolConstant(true, DL, MaskVT, VT); - Args.emplace_back(Mask, MaskVT.getTypeForEVT(Ctx)); - } - - Type *RetType = CallRetResNo.has_value() - ? Node->getValueType(*CallRetResNo).getTypeForEVT(Ctx) - : Type::getVoidTy(Ctx); - SDValue InChain = StoresInChain ? StoresInChain : getEntryNode(); - SDValue Callee = - getExternalSymbol(TLI->getLibcallImplName(LibcallImpl).data(), - TLI->getPointerTy(getDataLayout())); - TargetLowering::CallLoweringInfo CLI(*this); - CLI.setDebugLoc(DL).setChain(InChain).setLibCallee( - TLI->getLibcallImplCallingConv(LibcallImpl), RetType, Callee, - std::move(Args)); - - auto [Call, CallChain] = TLI->LowerCallTo(CLI); - - for (auto [ResNo, ResultPtr] : llvm::enumerate(ResultPtrs)) { - if (ResNo == CallRetResNo) { - Results.push_back(Call); - continue; - } - MachinePointerInfo PtrInfo; - SDValue LoadResult = - getLoad(Node->getValueType(ResNo), DL, CallChain, ResultPtr, PtrInfo); - SDValue OutChain = LoadResult.getValue(1); - - if (StoreSDNode *ST = ResultStores[ResNo]) { - // Replace store with the library call. - ReplaceAllUsesOfValueWith(SDValue(ST, 0), OutChain); - PtrInfo = ST->getPointerInfo(); - } else { - PtrInfo = MachinePointerInfo::getFixedStack( - getMachineFunction(), cast(ResultPtr)->getIndex()); - } - - Results.push_back(LoadResult); - } - - return true; -} - SDValue SelectionDAG::expandVAArg(SDNode *Node) { SDLoc dl(Node); const TargetLowering &TLI = getTargetLoweringInfo(); diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index b51d6649af2ec..bb64f4ee70280 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -12126,6 +12126,167 @@ SDValue TargetLowering::expandPartialReduceMLA(SDNode *N, return Subvectors[0]; } +/// Given a store node \p StoreNode, return true if it is safe to fold that node +/// into \p FPNode, which expands to a library call with output pointers. +static bool canFoldStoreIntoLibCallOutputPointers(StoreSDNode *StoreNode, + SDNode *FPNode) { + SmallVector Worklist; + SmallVector DeferredNodes; + SmallPtrSet Visited; + + // Skip FPNode use by StoreNode (that's the use we want to fold into FPNode). + for (SDValue Op : StoreNode->ops()) + if (Op.getNode() != FPNode) + Worklist.push_back(Op.getNode()); + + unsigned MaxSteps = SelectionDAG::getHasPredecessorMaxSteps(); + while (!Worklist.empty()) { + const SDNode *Node = Worklist.pop_back_val(); + auto [_, Inserted] = Visited.insert(Node); + if (!Inserted) + continue; + + if (MaxSteps > 0 && Visited.size() >= MaxSteps) + return false; + + // Reached the FPNode (would result in a cycle). + // OR Reached CALLSEQ_START (would result in nested call sequences). + if (Node == FPNode || Node->getOpcode() == ISD::CALLSEQ_START) + return false; + + if (Node->getOpcode() == ISD::CALLSEQ_END) { + // Defer looking into call sequences (so we can check we're outside one). + // We still need to look through these for the predecessor check. + DeferredNodes.push_back(Node); + continue; + } + + for (SDValue Op : Node->ops()) + Worklist.push_back(Op.getNode()); + } + + // True if we're outside a call sequence and don't have the FPNode as a + // predecessor. No cycles or nested call sequences possible. + return !SDNode::hasPredecessorHelper(FPNode, Visited, DeferredNodes, + MaxSteps); +} + +bool TargetLowering::expandMultipleResultFPLibCall( + SelectionDAG &DAG, RTLIB::Libcall LC, SDNode *Node, + SmallVectorImpl &Results, + std::optional CallRetResNo) const { + if (LC == RTLIB::UNKNOWN_LIBCALL) + return false; + + RTLIB::LibcallImpl LibcallImpl = getLibcallImpl(LC); + if (LibcallImpl == RTLIB::Unsupported) + return false; + + LLVMContext &Ctx = *DAG.getContext(); + EVT VT = Node->getValueType(0); + unsigned NumResults = Node->getNumValues(); + + // Find users of the node that store the results (and share input chains). The + // destination pointers can be used instead of creating stack allocations. + SDValue StoresInChain; + SmallVector ResultStores(NumResults); + for (SDNode *User : Node->users()) { + if (!ISD::isNormalStore(User)) + continue; + auto *ST = cast(User); + SDValue StoreValue = ST->getValue(); + unsigned ResNo = StoreValue.getResNo(); + // Ensure the store corresponds to an output pointer. + if (CallRetResNo == ResNo) + continue; + // Ensure the store to the default address space and not atomic or volatile. + if (!ST->isSimple() || ST->getAddressSpace() != 0) + continue; + // Ensure all store chains are the same (so they don't alias). + if (StoresInChain && ST->getChain() != StoresInChain) + continue; + // Ensure the store is properly aligned. + Type *StoreType = StoreValue.getValueType().getTypeForEVT(Ctx); + if (ST->getAlign() < + DAG.getDataLayout().getABITypeAlign(StoreType->getScalarType())) + continue; + // Avoid: + // 1. Creating cyclic dependencies. + // 2. Expanding the node to a call within a call sequence. + if (!canFoldStoreIntoLibCallOutputPointers(ST, Node)) + continue; + ResultStores[ResNo] = ST; + StoresInChain = ST->getChain(); + } + + ArgListTy Args; + + // Pass the arguments. + for (const SDValue &Op : Node->op_values()) { + EVT ArgVT = Op.getValueType(); + Type *ArgTy = ArgVT.getTypeForEVT(Ctx); + Args.emplace_back(Op, ArgTy); + } + + // Pass the output pointers. + SmallVector ResultPtrs(NumResults); + Type *PointerTy = PointerType::getUnqual(Ctx); + for (auto [ResNo, ST] : llvm::enumerate(ResultStores)) { + if (ResNo == CallRetResNo) + continue; + EVT ResVT = Node->getValueType(ResNo); + SDValue ResultPtr = ST ? ST->getBasePtr() : DAG.CreateStackTemporary(ResVT); + ResultPtrs[ResNo] = ResultPtr; + Args.emplace_back(ResultPtr, PointerTy); + } + + SDLoc DL(Node); + + if (RTLIB::RuntimeLibcallsInfo::hasVectorMaskArgument(LibcallImpl)) { + // Pass the vector mask (if required). + EVT MaskVT = getSetCCResultType(DAG.getDataLayout(), Ctx, VT); + SDValue Mask = DAG.getBoolConstant(true, DL, MaskVT, VT); + Args.emplace_back(Mask, MaskVT.getTypeForEVT(Ctx)); + } + + Type *RetType = CallRetResNo.has_value() + ? Node->getValueType(*CallRetResNo).getTypeForEVT(Ctx) + : Type::getVoidTy(Ctx); + SDValue InChain = StoresInChain ? StoresInChain : DAG.getEntryNode(); + SDValue Callee = DAG.getExternalSymbol(getLibcallImplName(LibcallImpl).data(), + getPointerTy(DAG.getDataLayout())); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(DL).setChain(InChain).setLibCallee( + getLibcallImplCallingConv(LibcallImpl), RetType, Callee, std::move(Args)); + + auto [Call, CallChain] = LowerCallTo(CLI); + + for (auto [ResNo, ResultPtr] : llvm::enumerate(ResultPtrs)) { + if (ResNo == CallRetResNo) { + Results.push_back(Call); + continue; + } + MachinePointerInfo PtrInfo; + SDValue LoadResult = DAG.getLoad(Node->getValueType(ResNo), DL, CallChain, + ResultPtr, PtrInfo); + SDValue OutChain = LoadResult.getValue(1); + + if (StoreSDNode *ST = ResultStores[ResNo]) { + // Replace store with the library call. + DAG.ReplaceAllUsesOfValueWith(SDValue(ST, 0), OutChain); + PtrInfo = ST->getPointerInfo(); + } else { + PtrInfo = MachinePointerInfo::getFixedStack( + DAG.getMachineFunction(), + cast(ResultPtr)->getIndex()); + } + + Results.push_back(LoadResult); + } + + return true; +} + bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, SDValue Mask, From 54659793328a0a7f0f1efa9d4d7fc43999b34ae2 Mon Sep 17 00:00:00 2001 From: Gleb Popov <6yearold@gmail.com> Date: Wed, 12 Nov 2025 07:45:42 +0300 Subject: [PATCH 27/32] libunwind: Implement the unw_strerror function for better nongnu libunwind compatibility (#160887) As it was explained to me in https://discourse.llvm.org/t/libunwinds-raison-detre/88283/2 the LLVM version of libunwind is mostly compatible with nongnu one. This change improves the compatibility a bit further. --- libunwind/include/libunwind.h | 1 + libunwind/src/libunwind.cpp | 35 +++++++++++++++++++++++++++++++++++ libunwind/src/libunwind_ext.h | 1 + 3 files changed, 37 insertions(+) diff --git a/libunwind/include/libunwind.h b/libunwind/include/libunwind.h index 18684ce311f95..56ca7110274a3 100644 --- a/libunwind/include/libunwind.h +++ b/libunwind/include/libunwind.h @@ -234,6 +234,7 @@ extern int unw_is_fpreg(unw_cursor_t *, unw_regnum_t) LIBUNWIND_AVAIL; extern int unw_is_signal_frame(unw_cursor_t *) LIBUNWIND_AVAIL; extern int unw_get_proc_name(unw_cursor_t *, char *, size_t, unw_word_t *) LIBUNWIND_AVAIL; //extern int unw_get_save_loc(unw_cursor_t*, int, unw_save_loc_t*); +extern const char *unw_strerror(int) LIBUNWIND_AVAIL; extern unw_addr_space_t unw_local_addr_space; diff --git a/libunwind/src/libunwind.cpp b/libunwind/src/libunwind.cpp index 3a94b6cf0cc5c..b3036396c379d 100644 --- a/libunwind/src/libunwind.cpp +++ b/libunwind/src/libunwind.cpp @@ -409,6 +409,41 @@ void __unw_remove_dynamic_eh_frame_section(unw_word_t eh_frame_start) { } #endif // defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) + +/// Maps the UNW_* error code to a textual representation +_LIBUNWIND_HIDDEN const char *__unw_strerror(int error_code) { + switch (error_code) { + case UNW_ESUCCESS: + return "no error"; + case UNW_EUNSPEC: + return "unspecified (general) error"; + case UNW_ENOMEM: + return "out of memory"; + case UNW_EBADREG: + return "bad register number"; + case UNW_EREADONLYREG: + return "attempt to write read-only register"; + case UNW_ESTOPUNWIND: + return "stop unwinding"; + case UNW_EINVALIDIP: + return "invalid IP"; + case UNW_EBADFRAME: + return "bad frame"; + case UNW_EINVAL: + return "unsupported operation or bad value"; + case UNW_EBADVERSION: + return "unwind info has unsupported version"; + case UNW_ENOINFO: + return "no unwind info found"; +#if defined(_LIBUNWIND_TARGET_AARCH64) && !defined(_LIBUNWIND_IS_NATIVE_ONLY) + case UNW_ECROSSRASIGNING: + return "cross unwind with return address signing"; +#endif + } + return "invalid error code"; +} +_LIBUNWIND_WEAK_ALIAS(__unw_strerror, unw_strerror) + #endif // !defined(__USING_SJLJ_EXCEPTIONS__) && !defined(__wasm__) #ifdef __APPLE__ diff --git a/libunwind/src/libunwind_ext.h b/libunwind/src/libunwind_ext.h index 900e8101f81f1..f5da90d7bd3b7 100644 --- a/libunwind/src/libunwind_ext.h +++ b/libunwind/src/libunwind_ext.h @@ -46,6 +46,7 @@ extern int __unw_get_proc_info(unw_cursor_t *, unw_proc_info_t *); extern int __unw_is_fpreg(unw_cursor_t *, unw_regnum_t); extern int __unw_is_signal_frame(unw_cursor_t *); extern int __unw_get_proc_name(unw_cursor_t *, char *, size_t, unw_word_t *); +extern const char *__unw_strerror(int); #if defined(_AIX) extern uintptr_t __unw_get_data_rel_base(unw_cursor_t *); From ce32b73a6285bd5db22e64f52c132d1fc90aed39 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Wed, 12 Nov 2025 16:10:34 +1100 Subject: [PATCH 28/32] Orc rt session wrap unwrap (#167635) --- orc-rt/include/orc-rt/Session.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/orc-rt/include/orc-rt/Session.h b/orc-rt/include/orc-rt/Session.h index c198d374bc849..fe4f07157385f 100644 --- a/orc-rt/include/orc-rt/Session.h +++ b/orc-rt/include/orc-rt/Session.h @@ -17,6 +17,8 @@ #include "orc-rt/ResourceManager.h" #include "orc-rt/move_only_function.h" +#include "orc-rt-c/CoreTypes.h" + #include namespace orc_rt { @@ -69,6 +71,14 @@ class Session { std::vector> ResourceMgrs; }; +inline orc_rt_SessionRef wrap(Session *S) noexcept { + return reinterpret_cast(S); +} + +inline Session *unwrap(orc_rt_SessionRef S) noexcept { + return reinterpret_cast(S); +} + } // namespace orc_rt #endif // ORC_RT_SESSION_H From a664cadaf4259da1cd1a3c6eb3cd38520d03ffa0 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 11 Nov 2025 21:18:42 -0800 Subject: [PATCH 29/32] Wasm: Fix hardcoding _Unwind_CallPersonality function name (#167612) --- llvm/lib/CodeGen/WasmEHPrepare.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/WasmEHPrepare.cpp b/llvm/lib/CodeGen/WasmEHPrepare.cpp index 1ea3e6bcb15ce..2f54578da5113 100644 --- a/llvm/lib/CodeGen/WasmEHPrepare.cpp +++ b/llvm/lib/CodeGen/WasmEHPrepare.cpp @@ -85,6 +85,7 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/IR/Module.h" +#include "llvm/IR/RuntimeLibcalls.h" #include "llvm/InitializePasses.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -273,8 +274,13 @@ bool WasmEHPrepareImpl::prepareEHPads(Function &F) { // instruction selection. CatchF = Intrinsic::getOrInsertDeclaration(&M, Intrinsic::wasm_catch); + // FIXME: Verify this is really supported for current module. + StringRef UnwindCallPersonalityName = + RTLIB::RuntimeLibcallsInfo::getLibcallImplName( + RTLIB::impl__Unwind_CallPersonality); + // _Unwind_CallPersonality() wrapper function, which calls the personality - CallPersonalityF = M.getOrInsertFunction("_Unwind_CallPersonality", + CallPersonalityF = M.getOrInsertFunction(UnwindCallPersonalityName, IRB.getInt32Ty(), IRB.getPtrTy()); if (Function *F = dyn_cast(CallPersonalityF.getCallee())) F->setDoesNotThrow(); From 7d9b7e8c7b251d54b5d9d3f9fb8d5dde3483389c Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 11 Nov 2025 21:26:40 -0800 Subject: [PATCH 30/32] PPC: Mark xfailed sincospi test as unsupported with EXPENSIVE_CHECKS (#167639) --- llvm/test/CodeGen/PowerPC/llvm.sincospi.ppcfp128.ll | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/test/CodeGen/PowerPC/llvm.sincospi.ppcfp128.ll b/llvm/test/CodeGen/PowerPC/llvm.sincospi.ppcfp128.ll index 4fbb6a07aa37d..c332f441e8b00 100644 --- a/llvm/test/CodeGen/PowerPC/llvm.sincospi.ppcfp128.ll +++ b/llvm/test/CodeGen/PowerPC/llvm.sincospi.ppcfp128.ll @@ -1,4 +1,5 @@ ; XFAIL: * +; UNSUPPORTED: expensive_checks ; FIXME: asserts ; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-gnu-linux -filetype=null -enable-legalize-types-checking=0 \ ; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names %s From a7ceeffb30b1b785ab5d5f86ff12e79cd7cb9df6 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Wed, 12 Nov 2025 16:36:52 +1100 Subject: [PATCH 31/32] [orc-rt] Make Session explicitly immovable. (#167640) NFCI -- the deleted copy constructor already made this immovable. The explicit operations just make clear that this was intentional. --- orc-rt/include/orc-rt/Session.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/orc-rt/include/orc-rt/Session.h b/orc-rt/include/orc-rt/Session.h index fe4f07157385f..fbace053bd72f 100644 --- a/orc-rt/include/orc-rt/Session.h +++ b/orc-rt/include/orc-rt/Session.h @@ -42,6 +42,8 @@ class Session { // Sessions are not copyable or moveable. Session(const Session &) = delete; Session &operator=(const Session &) = delete; + Session(Session &&) = delete; + Session &operator=(Session &&) = delete; ~Session(); From ae2b303391c7d626475a1b25db91c1a8ffefbc5d Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Wed, 12 Nov 2025 13:37:36 +0800 Subject: [PATCH 32/32] [C++20] [Modules] Don't import initializer/pending implicit instantiations from other named module (#167468) Close https://github.com/llvm/llvm-project/issues/166068 The cause of the problem is that we would import initializers and pending implicit instantiations from other named module. This is very bad and it may waste a lot of time. And we didn't observe it as the weak symbols can live together and the strong symbols would be removed by other mechanism. So we didn't observe the bad behavior for a long time. But it indeeds waste compilation time. --- clang/lib/Serialization/ASTReader.cpp | 23 ++++++++++------ clang/lib/Serialization/ASTWriter.cpp | 22 +++++++++------- clang/test/Modules/pr166068.cppm | 38 +++++++++++++++++++++++++++ 3 files changed, 65 insertions(+), 18 deletions(-) create mode 100644 clang/test/Modules/pr166068.cppm diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index a04041c10b4ba..634bf991b2aee 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -4087,10 +4087,14 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F, std::errc::illegal_byte_sequence, "Invalid PENDING_IMPLICIT_INSTANTIATIONS block"); - for (unsigned I = 0, N = Record.size(); I != N; /* in loop */) { - PendingInstantiations.push_back( - {ReadDeclID(F, Record, I), - ReadSourceLocation(F, Record, I).getRawEncoding()}); + // For standard C++20 module, we will only reads the instantiations + // if it is the main file. + if (!F.StandardCXXModule || F.Kind == MK_MainFile) { + for (unsigned I = 0, N = Record.size(); I != N; /* in loop */) { + PendingInstantiations.push_back( + {ReadDeclID(F, Record, I), + ReadSourceLocation(F, Record, I).getRawEncoding()}); + } } break; @@ -6438,10 +6442,13 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F, case SUBMODULE_INITIALIZERS: { if (!ContextObj) break; - SmallVector Inits; - for (unsigned I = 0; I < Record.size(); /*in loop*/) - Inits.push_back(ReadDeclID(F, Record, I)); - ContextObj->addLazyModuleInitializers(CurrentModule, Inits); + // Standard C++ module has its own way to initialize variables. + if (!F.StandardCXXModule || F.Kind == MK_MainFile) { + SmallVector Inits; + for (unsigned I = 0; I < Record.size(); /*in loop*/) + Inits.push_back(ReadDeclID(F, Record, I)); + ContextObj->addLazyModuleInitializers(CurrentModule, Inits); + } break; } diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 821e7df1bce53..e4618d60a8acb 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -3247,7 +3247,7 @@ void ASTWriter::WriteSubmodules(Module *WritingModule, ASTContext *Context) { // Emit the reachable initializers. // The initializer may only be unreachable in reduced BMI. - if (Context) { + if (Context && !GeneratingReducedBMI) { RecordData Inits; for (Decl *D : Context->getModuleInitializers(Mod)) if (wasDeclEmitted(D)) @@ -5827,17 +5827,19 @@ void ASTWriter::WriteSpecialDeclRecords(Sema &SemaRef) { Stream.EmitRecord(UNUSED_LOCAL_TYPEDEF_NAME_CANDIDATES, UnusedLocalTypedefNameCandidates); - // Write the record containing pending implicit instantiations. - RecordData PendingInstantiations; - for (const auto &I : SemaRef.PendingInstantiations) { - if (!wasDeclEmitted(I.first)) - continue; + if (!GeneratingReducedBMI) { + // Write the record containing pending implicit instantiations. + RecordData PendingInstantiations; + for (const auto &I : SemaRef.PendingInstantiations) { + if (!wasDeclEmitted(I.first)) + continue; - AddDeclRef(I.first, PendingInstantiations); - AddSourceLocation(I.second, PendingInstantiations); + AddDeclRef(I.first, PendingInstantiations); + AddSourceLocation(I.second, PendingInstantiations); + } + if (!PendingInstantiations.empty()) + Stream.EmitRecord(PENDING_IMPLICIT_INSTANTIATIONS, PendingInstantiations); } - if (!PendingInstantiations.empty()) - Stream.EmitRecord(PENDING_IMPLICIT_INSTANTIATIONS, PendingInstantiations); // Write the record containing declaration references of Sema. RecordData SemaDeclRefs; diff --git a/clang/test/Modules/pr166068.cppm b/clang/test/Modules/pr166068.cppm new file mode 100644 index 0000000000000..b6944b591d264 --- /dev/null +++ b/clang/test/Modules/pr166068.cppm @@ -0,0 +1,38 @@ +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 %t/flyweight.cppm -emit-reduced-module-interface -o %t/flyweight.pcm +// RUN: %clang_cc1 -std=c++20 %t/account.cppm -emit-reduced-module-interface -o %t/account.pcm -fprebuilt-module-path=%t +// RUN: %clang_cc1 -std=c++20 %t/core.cppm -emit-reduced-module-interface -o %t/core.pcm -fprebuilt-module-path=%t +// RUN: %clang_cc1 -std=c++20 %t/core.cppm -fprebuilt-module-path=%t -emit-llvm -disable-llvm-passes -o - | FileCheck %t/core.cppm + +//--- flyweight.cppm +module; +template struct flyweight_core { + static bool init() { (void)__builtin_operator_new(2); return true; } + static bool static_initializer; +}; +template bool flyweight_core::static_initializer = init(); +export module flyweight; +export template void flyweight() { + (void)flyweight_core::static_initializer; +} + +//--- account.cppm +export module account; +import flyweight; +export void account() { + (void)::flyweight; +} + +//--- core.cppm +export module core; +import account; + +extern "C" void core() {} + +// Fine enough to check it won't crash. +// CHECK-NOT: init +// CHECK-NOT: static_initializer +// CHECK: define {{.*}}@core(