+// MUSTACHE-GLOBAL-INDEX:

Global Namespace

+// MUSTACHE-GLOBAL-INDEX:

Namespaces

+// MUSTACHE-GLOBAL-INDEX:
  • @nonymous_namespace
  • +// MUSTACHE-GLOBAL-INDEX:
  • AnotherNamespace
  • +// MUSTACHE-GLOBAL-INDEX:
  • PrimaryNamespace
  • + // MD-GLOBAL-INDEX: # Global Namespace // MD-GLOBAL-INDEX: ## Namespaces // MD-GLOBAL-INDEX: * [@nonymous_namespace](..{{[\/]}}@nonymous_namespace{{[\/]}}index.md) From 37f7b3128d8217e6a99cc6117ea709e8fa7b0704 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Mon, 24 Nov 2025 18:11:58 +0000 Subject: [PATCH 23/37] Reland [VPlan] Handle WidenGEP in narrowToSingleScalars (#167880) Changes: Fix a missed update to WidenGEP::usesFirstLaneOnly, and include reduced-case test that was previously hitting the new assert: the underlying reason was that VPWidenGEP::usesScalars was too weak, and the single-scalar WidenGEP was not narrowed by narrowToSingleScalarRecipes. This allows us to strip a special case in VPWidenGEP::execute. --- llvm/lib/Transforms/Vectorize/VPlan.h | 15 +--- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 76 ++++++++----------- .../Transforms/Vectorize/VPlanTransforms.cpp | 3 +- .../RISCV/gather-scatter-cost.ll | 14 ++-- ...row-to-single-scalar-widen-gep-scalable.ll | 60 +++++++++++++++ .../widen-gep-all-indices-invariant.ll | 12 +-- 6 files changed, 107 insertions(+), 73 deletions(-) create mode 100644 llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar-widen-gep-scalable.ll diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 8a435accfedfe..0c7d9c0193a03 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1854,12 +1854,6 @@ class LLVM_ABI_FOR_TEST VPWidenGEPRecipe : public VPRecipeWithIRFlags { return getOperand(I + 1)->isDefinedOutsideLoopRegions(); } - bool areAllOperandsInvariant() const { - return all_of(operands(), [](VPValue *Op) { - return Op->isDefinedOutsideLoopRegions(); - }); - } - public: VPWidenGEPRecipe(GetElementPtrInst *GEP, ArrayRef Operands, const VPIRFlags &Flags = {}, @@ -1898,14 +1892,7 @@ class LLVM_ABI_FOR_TEST VPWidenGEPRecipe : public VPRecipeWithIRFlags { } /// Returns true if the recipe only uses the first lane of operand \p Op. - bool usesFirstLaneOnly(const VPValue *Op) const override { - assert(is_contained(operands(), Op) && - "Op must be an operand of the recipe"); - if (Op == getOperand(0)) - return isPointerLoopInvariant(); - else - return !isPointerLoopInvariant() && Op->isDefinedOutsideLoopRegions(); - } + bool usesFirstLaneOnly(const VPValue *Op) const override; protected: #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 5ea9dd349e06f..54fdec3bcf4a1 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -2536,6 +2536,11 @@ void VPScalarIVStepsRecipe::printRecipe(raw_ostream &O, const Twine &Indent, } #endif +bool VPWidenGEPRecipe::usesFirstLaneOnly(const VPValue *Op) const { + assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); + return vputils::isSingleScalar(Op); +} + void VPWidenGEPRecipe::execute(VPTransformState &State) { assert(State.VF.isVector() && "not widening"); // Construct a vector GEP by widening the operands of the scalar GEP as @@ -2544,51 +2549,32 @@ void VPWidenGEPRecipe::execute(VPTransformState &State) { // is vector-typed. Thus, to keep the representation compact, we only use // vector-typed operands for loop-varying values. - if (areAllOperandsInvariant()) { - // If we are vectorizing, but the GEP has only loop-invariant operands, - // the GEP we build (by only using vector-typed operands for - // loop-varying values) would be a scalar pointer. Thus, to ensure we - // produce a vector of pointers, we need to either arbitrarily pick an - // operand to broadcast, or broadcast a clone of the original GEP. - // Here, we broadcast a clone of the original. - // - // TODO: If at some point we decide to scalarize instructions having - // loop-invariant operands, this special case will no longer be - // required. We would add the scalarization decision to - // collectLoopScalars() and teach getVectorValue() to broadcast - // the lane-zero scalar value. - SmallVector Ops; - for (unsigned I = 0, E = getNumOperands(); I != E; I++) - Ops.push_back(State.get(getOperand(I), VPLane(0))); - - auto *NewGEP = - State.Builder.CreateGEP(getSourceElementType(), Ops[0], drop_begin(Ops), - "", getGEPNoWrapFlags()); - Value *Splat = State.Builder.CreateVectorSplat(State.VF, NewGEP); - State.set(this, Splat); - } else { - // If the GEP has at least one loop-varying operand, we are sure to - // produce a vector of pointers unless VF is scalar. - // The pointer operand of the new GEP. If it's loop-invariant, we - // won't broadcast it. - auto *Ptr = State.get(getOperand(0), isPointerLoopInvariant()); - - // Collect all the indices for the new GEP. If any index is - // loop-invariant, we won't broadcast it. - SmallVector Indices; - for (unsigned I = 1, E = getNumOperands(); I < E; I++) { - VPValue *Operand = getOperand(I); - Indices.push_back(State.get(Operand, isIndexLoopInvariant(I - 1))); - } - - // Create the new GEP. Note that this GEP may be a scalar if VF == 1, - // but it should be a vector, otherwise. - auto *NewGEP = State.Builder.CreateGEP(getSourceElementType(), Ptr, Indices, - "", getGEPNoWrapFlags()); - assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) && - "NewGEP is not a pointer vector"); - State.set(this, NewGEP); - } + assert( + any_of(operands(), + [](VPValue *Op) { return !Op->isDefinedOutsideLoopRegions(); }) && + "Expected at least one loop-variant operand"); + + // If the GEP has at least one loop-varying operand, we are sure to + // produce a vector of pointers unless VF is scalar. + // The pointer operand of the new GEP. If it's loop-invariant, we + // won't broadcast it. + auto *Ptr = State.get(getOperand(0), isPointerLoopInvariant()); + + // Collect all the indices for the new GEP. If any index is + // loop-invariant, we won't broadcast it. + SmallVector Indices; + for (unsigned I = 1, E = getNumOperands(); I < E; I++) { + VPValue *Operand = getOperand(I); + Indices.push_back(State.get(Operand, isIndexLoopInvariant(I - 1))); + } + + // Create the new GEP. Note that this GEP may be a scalar if VF == 1, + // but it should be a vector, otherwise. + auto *NewGEP = State.Builder.CreateGEP(getSourceElementType(), Ptr, Indices, + "", getGEPNoWrapFlags()); + assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) && + "NewGEP is not a pointer vector"); + State.set(this, NewGEP); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index e7a8773be067b..89b490e960f33 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1451,7 +1451,8 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) { for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly( vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry()))) { for (VPRecipeBase &R : make_early_inc_range(reverse(*VPBB))) { - if (!isa(&R)) + if (!isa(&R)) continue; auto *RepR = dyn_cast(&R); if (RepR && (RepR->isSingleScalar() || RepR->isPredicated())) diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll index 212a5c99676f4..877484f5159fd 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll @@ -63,7 +63,7 @@ define void @predicated_uniform_load(ptr %src, i32 %n, ptr %dst, i1 %cond) { ; CHECK-NEXT: store i32 [[STORE]], ptr [[NBRBOXES]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp sgt i32 [[IV]], [[IBOX]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -114,7 +114,7 @@ define void @predicated_strided_store(ptr %start) { ; RVA23-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP3]] ; RVA23-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] ; RVA23-NEXT: [[TMP7:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; RVA23-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; RVA23-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; RVA23: middle.block: ; RVA23-NEXT: br label [[LOOP:%.*]] ; RVA23: exit: @@ -141,7 +141,7 @@ define void @predicated_strided_store(ptr %start) { ; RVA23ZVL1024B-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP3]] ; RVA23ZVL1024B-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] ; RVA23ZVL1024B-NEXT: [[TMP7:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; RVA23ZVL1024B-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; RVA23ZVL1024B-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; RVA23ZVL1024B: middle.block: ; RVA23ZVL1024B-NEXT: br label [[LOOP:%.*]] ; RVA23ZVL1024B: exit: @@ -185,16 +185,16 @@ define void @store_to_addr_generated_from_invariant_addr(ptr noalias %p0, ptr no ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[P1:%.*]], [[VEC_IND]] ; CHECK-NEXT: call void @llvm.vp.scatter.nxv2p0.nxv2p0( [[BROADCAST_SPLAT1]], align 8 [[TMP5]], splat (i1 true), i32 [[TMP3]]) ; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[P2:%.*]], align 4 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[TMP6]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[P3:%.*]], [[BROADCAST_SPLAT2]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[P3:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, ptr [[TMP8]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer ; CHECK-NEXT: call void @llvm.vp.scatter.nxv2i32.nxv2p0( zeroinitializer, align 4 [[TMP7]], splat (i1 true), i32 [[TMP3]]) ; CHECK-NEXT: call void @llvm.vp.scatter.nxv2i32.nxv2p0( zeroinitializer, align 4 [[TMP7]], splat (i1 true), i32 [[TMP3]]) ; CHECK-NEXT: call void @llvm.vp.scatter.nxv2i8.nxv2p0( zeroinitializer, align 1 [[TMP7]], splat (i1 true), i32 [[TMP3]]) ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP4]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: exit: diff --git a/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar-widen-gep-scalable.ll b/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar-widen-gep-scalable.ll new file mode 100644 index 0000000000000..6746e92cc1fd1 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar-widen-gep-scalable.ll @@ -0,0 +1,60 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "^scalar.ph" --version 6 +; RUN: opt -p loop-vectorize -force-vector-width=2 \ +; RUN: -force-target-supports-scalable-vectors=true \ +; RUN: -scalable-vectorization=preferred -S %s | FileCheck %s + +define void @widengep_narrow(ptr %in, ptr noalias %p) { +; CHECK-LABEL: define void @widengep_narrow( +; CHECK-SAME: ptr [[IN:%.*]], ptr noalias [[P:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[IN]], i64 8 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, ptr [[TMP4]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv2i64() +; CHECK-NEXT: [[TMP6:%.*]] = mul [[TMP5]], splat (i64 1) +; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP6]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP3]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, [[BROADCAST_SPLAT2]], [[VEC_IND]] +; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP9:%.*]] = mul nuw i32 [[TMP8]], 2 +; CHECK-NEXT: [[TMP10:%.*]] = sub i32 [[TMP9]], 1 +; CHECK-NEXT: [[TMP11:%.*]] = extractelement [[TMP7]], i32 [[TMP10]] +; CHECK-NEXT: store ptr [[TMP11]], ptr [[P]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %gep.in.off = getelementptr i8, ptr %in, i64 8 + %gep.in.iv = getelementptr i32, ptr %gep.in.off, i64 %iv + store ptr %gep.in.iv, ptr %p + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv, 1024 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/widen-gep-all-indices-invariant.ll b/llvm/test/Transforms/LoopVectorize/widen-gep-all-indices-invariant.ll index 9bb010c0431d8..90ef97609e096 100644 --- a/llvm/test/Transforms/LoopVectorize/widen-gep-all-indices-invariant.ll +++ b/llvm/test/Transforms/LoopVectorize/widen-gep-all-indices-invariant.ll @@ -8,14 +8,14 @@ define void @pr63340(ptr %A, ptr %B) { ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 1 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP0]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x ptr> [[DOTSPLATINSERT]], <4 x ptr> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP0]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT]], <4 x ptr> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i8 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[B]], i8 [[OFFSET_IDX]] -; CHECK-NEXT: store <4 x ptr> [[DOTSPLAT]], ptr [[TMP1]], align 8 +; CHECK-NEXT: store <4 x ptr> [[BROADCAST_SPLAT]], ptr [[TMP1]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 128 ; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -55,11 +55,11 @@ define void @wide_gep_index_invariant(ptr noalias %dst, ptr noalias %src, i64 %n ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SRC]], align 8 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP0]], i64 0 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[TMP0]], i64 [[N]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP1]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT]], <4 x ptr> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, <4 x ptr> [[BROADCAST_SPLAT]], i64 [[N]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr ptr, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: store <4 x ptr> [[TMP1]], ptr [[TMP2]], align 8 +; CHECK-NEXT: store <4 x ptr> [[BROADCAST_SPLAT]], ptr [[TMP2]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] From 9688f88e57f369002157758b8399a235bf6763ca Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Mon, 24 Nov 2025 18:12:26 +0000 Subject: [PATCH 24/37] [LV] Pre-commit test for #128062 (#164801) In preparation to extend the work done by dfa665f ([VPlan] Add transformation to narrow interleave groups) to make the narrowing more powerful, pre-commit a test case from #128062. --- ...28062-interleaved-accesses-narrow-group.ll | 201 ++++++++++++++++++ 1 file changed, 201 insertions(+) create mode 100644 llvm/test/Transforms/LoopVectorize/pr128062-interleaved-accesses-narrow-group.ll diff --git a/llvm/test/Transforms/LoopVectorize/pr128062-interleaved-accesses-narrow-group.ll b/llvm/test/Transforms/LoopVectorize/pr128062-interleaved-accesses-narrow-group.ll new file mode 100644 index 0000000000000..00eeb69dcb0f7 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/pr128062-interleaved-accesses-narrow-group.ll @@ -0,0 +1,201 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6 +; RUN: opt %s -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -enable-interleaved-mem-accesses -S | FileCheck %s + +define void @pr128062(ptr %dst.start, i8 %a, i16 %b) { +; CHECK-LABEL: define void @pr128062( +; CHECK-SAME: ptr [[DST_START:%.*]], i8 [[A:%.*]], i16 [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[B]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i8> poison, i8 [[A]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT1]], <4 x i8> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST_START]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 1 +; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_VEC]], <16 x i8> poison, <4 x i32> +; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <16 x i8> [[WIDE_VEC]], <16 x i8> poison, <4 x i32> +; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <16 x i8> [[WIDE_VEC]], <16 x i8> poison, <4 x i32> +; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <16 x i8> [[WIDE_VEC]], <16 x i8> poison, <4 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = zext <4 x i8> [[STRIDED_VEC]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = mul nuw <4 x i16> [[TMP0]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP2:%.*]] = udiv <4 x i16> [[TMP1]], splat (i16 255) +; CHECK-NEXT: [[TMP3:%.*]] = trunc nuw <4 x i16> [[TMP2]] to <4 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[STRIDED_VEC3]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = mul nuw <4 x i16> [[TMP5]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP7:%.*]] = udiv <4 x i16> [[TMP6]], splat (i16 255) +; CHECK-NEXT: [[TMP8:%.*]] = trunc nuw <4 x i16> [[TMP7]] to <4 x i8> +; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = zext <4 x i8> [[STRIDED_VEC4]] to <4 x i16> +; CHECK-NEXT: [[TMP11:%.*]] = mul nuw <4 x i16> [[TMP10]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP12:%.*]] = udiv <4 x i16> [[TMP11]], splat (i16 255) +; CHECK-NEXT: [[TMP13:%.*]] = trunc nuw <4 x i16> [[TMP12]] to <4 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP13]] +; CHECK-NEXT: [[TMP15:%.*]] = zext <4 x i8> [[STRIDED_VEC5]] to <4 x i16> +; CHECK-NEXT: [[TMP16:%.*]] = mul nuw <4 x i16> [[TMP15]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP17:%.*]] = udiv <4 x i16> [[TMP16]], splat (i16 255) +; CHECK-NEXT: [[TMP18:%.*]] = trunc nuw <4 x i16> [[TMP17]] to <4 x i8> +; CHECK-NEXT: [[TMP19:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP18]] +; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> [[TMP9]], <8 x i32> +; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <4 x i8> [[TMP14]], <4 x i8> [[TMP19]], <8 x i32> +; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <8 x i8> [[TMP20]], <8 x i8> [[TMP21]], <16 x i32> +; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i8> [[TMP22]], <16 x i8> poison, <16 x i32> +; CHECK-NEXT: store <16 x i8> [[INTERLEAVED_VEC]], ptr [[NEXT_GEP]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 +; CHECK-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %dst = phi ptr [ %dst.start, %entry ], [ %dst.next, %loop ] + %dst.next = getelementptr inbounds nuw i8, ptr %dst, i64 4 + %load.dst = load i8, ptr %dst, align 1 + %dst.ext = zext i8 %load.dst to i16 + %mul.dst.0 = mul nuw i16 %dst.ext, %b + %udiv.0 = udiv i16 %mul.dst.0, 255 + %trunc.0 = trunc nuw i16 %udiv.0 to i8 + %val.0 = add i8 %a, %trunc.0 + store i8 %val.0, ptr %dst, align 1 + %gep.dst.1 = getelementptr inbounds nuw i8, ptr %dst, i64 1 + %load.dst.1 = load i8, ptr %gep.dst.1, align 1 + %dst.1.ext = zext i8 %load.dst.1 to i16 + %mul.dst.1 = mul nuw i16 %dst.1.ext, %b + %udiv.1 = udiv i16 %mul.dst.1, 255 + %trunc.1 = trunc nuw i16 %udiv.1 to i8 + %val.1 = add i8 %a, %trunc.1 + store i8 %val.1, ptr %gep.dst.1, align 1 + %gep.dst.2 = getelementptr inbounds nuw i8, ptr %dst, i64 2 + %load.dst.2 = load i8, ptr %gep.dst.2, align 1 + %dst.2.ext = zext i8 %load.dst.2 to i16 + %mul.dst.2 = mul nuw i16 %dst.2.ext, %b + %udiv.2 = udiv i16 %mul.dst.2, 255 + %trunc.2 = trunc nuw i16 %udiv.2 to i8 + %val.2 = add i8 %a, %trunc.2 + store i8 %val.2, ptr %gep.dst.2, align 1 + %gep.dst.3 = getelementptr inbounds nuw i8, ptr %dst, i64 3 + %load.dst.3 = load i8, ptr %gep.dst.3, align 1 + %dst.3.ext = zext i8 %load.dst.3 to i16 + %mul.dst.3 = mul nuw i16 %dst.3.ext, %b + %udiv.3 = udiv i16 %mul.dst.3, 255 + %trunc.3 = trunc nuw i16 %udiv.3 to i8 + %val.3 = add i8 %a, %trunc.3 + store i8 %val.3, ptr %gep.dst.3, align 1 + %iv.next = add i64 %iv, 4 + %exit.cond = icmp eq i64 %iv.next, 256 + br i1 %exit.cond, label %exit, label %loop + +exit: + ret void +} + +; Same as above, except one zext is replaced with an sext. +define void @opcode_mismatch(ptr %dst.start, i8 %a, i16 %b) { +; CHECK-LABEL: define void @opcode_mismatch( +; CHECK-SAME: ptr [[DST_START:%.*]], i8 [[A:%.*]], i16 [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[B]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i8> poison, i8 [[A]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT1]], <4 x i8> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST_START]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 1 +; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_VEC]], <16 x i8> poison, <4 x i32> +; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <16 x i8> [[WIDE_VEC]], <16 x i8> poison, <4 x i32> +; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <16 x i8> [[WIDE_VEC]], <16 x i8> poison, <4 x i32> +; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <16 x i8> [[WIDE_VEC]], <16 x i8> poison, <4 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = zext <4 x i8> [[STRIDED_VEC]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = mul nuw <4 x i16> [[TMP0]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP2:%.*]] = udiv <4 x i16> [[TMP1]], splat (i16 255) +; CHECK-NEXT: [[TMP3:%.*]] = trunc nuw <4 x i16> [[TMP2]] to <4 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i8> [[STRIDED_VEC3]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = mul nuw <4 x i16> [[TMP5]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP7:%.*]] = udiv <4 x i16> [[TMP6]], splat (i16 255) +; CHECK-NEXT: [[TMP8:%.*]] = trunc nuw <4 x i16> [[TMP7]] to <4 x i8> +; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = zext <4 x i8> [[STRIDED_VEC4]] to <4 x i16> +; CHECK-NEXT: [[TMP11:%.*]] = mul nuw <4 x i16> [[TMP10]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP12:%.*]] = udiv <4 x i16> [[TMP11]], splat (i16 255) +; CHECK-NEXT: [[TMP13:%.*]] = trunc nuw <4 x i16> [[TMP12]] to <4 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP13]] +; CHECK-NEXT: [[TMP15:%.*]] = zext <4 x i8> [[STRIDED_VEC5]] to <4 x i16> +; CHECK-NEXT: [[TMP16:%.*]] = mul nuw <4 x i16> [[TMP15]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP17:%.*]] = udiv <4 x i16> [[TMP16]], splat (i16 255) +; CHECK-NEXT: [[TMP18:%.*]] = trunc nuw <4 x i16> [[TMP17]] to <4 x i8> +; CHECK-NEXT: [[TMP19:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP18]] +; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> [[TMP9]], <8 x i32> +; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <4 x i8> [[TMP14]], <4 x i8> [[TMP19]], <8 x i32> +; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <8 x i8> [[TMP20]], <8 x i8> [[TMP21]], <16 x i32> +; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i8> [[TMP22]], <16 x i8> poison, <16 x i32> +; CHECK-NEXT: store <16 x i8> [[INTERLEAVED_VEC]], ptr [[NEXT_GEP]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 +; CHECK-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %dst = phi ptr [ %dst.start, %entry ], [ %dst.next, %loop ] + %dst.next = getelementptr inbounds nuw i8, ptr %dst, i64 4 + %load.dst = load i8, ptr %dst, align 1 + %dst.ext = zext i8 %load.dst to i16 + %mul.dst.0 = mul nuw i16 %dst.ext, %b + %udiv.0 = udiv i16 %mul.dst.0, 255 + %trunc.0 = trunc nuw i16 %udiv.0 to i8 + %val.0 = add i8 %a, %trunc.0 + store i8 %val.0, ptr %dst, align 1 + %gep.dst.1 = getelementptr inbounds nuw i8, ptr %dst, i64 1 + %load.dst.1 = load i8, ptr %gep.dst.1, align 1 + %dst.1.ext = sext i8 %load.dst.1 to i16 + %mul.dst.1 = mul nuw i16 %dst.1.ext, %b + %udiv.1 = udiv i16 %mul.dst.1, 255 + %trunc.1 = trunc nuw i16 %udiv.1 to i8 + %val.1 = add i8 %a, %trunc.1 + store i8 %val.1, ptr %gep.dst.1, align 1 + %gep.dst.2 = getelementptr inbounds nuw i8, ptr %dst, i64 2 + %load.dst.2 = load i8, ptr %gep.dst.2, align 1 + %dst.2.ext = zext i8 %load.dst.2 to i16 + %mul.dst.2 = mul nuw i16 %dst.2.ext, %b + %udiv.2 = udiv i16 %mul.dst.2, 255 + %trunc.2 = trunc nuw i16 %udiv.2 to i8 + %val.2 = add i8 %a, %trunc.2 + store i8 %val.2, ptr %gep.dst.2, align 1 + %gep.dst.3 = getelementptr inbounds nuw i8, ptr %dst, i64 3 + %load.dst.3 = load i8, ptr %gep.dst.3, align 1 + %dst.3.ext = zext i8 %load.dst.3 to i16 + %mul.dst.3 = mul nuw i16 %dst.3.ext, %b + %udiv.3 = udiv i16 %mul.dst.3, 255 + %trunc.3 = trunc nuw i16 %udiv.3 to i8 + %val.3 = add i8 %a, %trunc.3 + store i8 %val.3, ptr %gep.dst.3, align 1 + %iv.next = add i64 %iv, 4 + %exit.cond = icmp eq i64 %iv.next, 256 + br i1 %exit.cond, label %exit, label %loop + +exit: + ret void +} From 621cbcde0161341494b546a1fb478cfd57d1a94f Mon Sep 17 00:00:00 2001 From: Atmn Patel Date: Mon, 24 Nov 2025 12:15:00 -0600 Subject: [PATCH 25/37] [mlir][acc] Adds attr to acc.present to identify default clause origin (#169114) The `acc.present` Op as generated by ACCImplicitData does not provide a way to differentiate between `acc.present` ops that are generated implicitly and the ones that are generated as result of an explicit `default(present)` clause in the source code. This differentiation would allow for better communication to the user on the decisions made by the compiler while managing data automatically between the host and the device. This commit adds this information as a discardable attribute on the `acc.present` op. --- flang/test/Transforms/OpenACC/acc-implicit-data.fir | 8 ++++---- mlir/include/mlir/Dialect/OpenACC/OpenACC.h | 4 ++++ mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp | 2 ++ mlir/test/Dialect/OpenACC/acc-implicit-data.mlir | 2 +- 4 files changed, 11 insertions(+), 5 deletions(-) diff --git a/flang/test/Transforms/OpenACC/acc-implicit-data.fir b/flang/test/Transforms/OpenACC/acc-implicit-data.fir index 7f6a57cb4d8c6..2d28c341d0d5e 100644 --- a/flang/test/Transforms/OpenACC/acc-implicit-data.fir +++ b/flang/test/Transforms/OpenACC/acc-implicit-data.fir @@ -133,7 +133,7 @@ func.func @test_fir_derivedtype_in_parallel_defaultpresent() { return } -// CHECK: %[[PRESENT:.*]] = acc.present varPtr({{.*}} : !fir.ref>) -> !fir.ref> {implicit = true, name = "aggrvar"} +// CHECK: %[[PRESENT:.*]] = acc.present varPtr({{.*}} : !fir.ref>) -> !fir.ref> {acc.from_default, implicit = true, name = "aggrvar"} // CHECK: acc.delete accPtr(%[[PRESENT]] : !fir.ref>) {dataClause = #acc, implicit = true, name = "aggrvar"} // ----- @@ -147,7 +147,7 @@ func.func @test_fir_derivedtype_in_kernels_defaultpresent() { return } -// CHECK: %[[PRESENT:.*]] = acc.present varPtr({{.*}} : !fir.ref>) -> !fir.ref> {implicit = true, name = "aggrvar"} +// CHECK: %[[PRESENT:.*]] = acc.present varPtr({{.*}} : !fir.ref>) -> !fir.ref> {acc.from_default, implicit = true, name = "aggrvar"} // CHECK: acc.delete accPtr(%[[PRESENT]] : !fir.ref>) {dataClause = #acc, implicit = true, name = "aggrvar"} // ----- @@ -161,7 +161,7 @@ func.func @test_fir_array_in_parallel_defaultpresent() { return } -// CHECK: %[[PRESENT:.*]] = acc.present varPtr({{.*}} : !fir.ref>) -> !fir.ref> {implicit = true, name = "arrayvar"} +// CHECK: %[[PRESENT:.*]] = acc.present varPtr({{.*}} : !fir.ref>) -> !fir.ref> {acc.from_default, implicit = true, name = "arrayvar"} // CHECK: acc.delete accPtr(%[[PRESENT]] : !fir.ref>) {dataClause = #acc, implicit = true, name = "arrayvar"} // ----- @@ -175,7 +175,7 @@ func.func @test_fir_array_in_kernels_defaultpresent() { return } -// CHECK: %[[PRESENT:.*]] = acc.present varPtr({{.*}} : !fir.ref>) -> !fir.ref> {implicit = true, name = "arrayvar"} +// CHECK: %[[PRESENT:.*]] = acc.present varPtr({{.*}} : !fir.ref>) -> !fir.ref> {acc.from_default, implicit = true, name = "arrayvar"} // CHECK: acc.delete accPtr(%[[PRESENT]] : !fir.ref>) {dataClause = #acc, implicit = true, name = "arrayvar"} // ----- diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACC.h b/mlir/include/mlir/Dialect/OpenACC/OpenACC.h index 05d2316711c8a..601fc1a594768 100644 --- a/mlir/include/mlir/Dialect/OpenACC/OpenACC.h +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACC.h @@ -177,6 +177,10 @@ static constexpr StringLiteral getRoutineInfoAttrName() { return StringLiteral("acc.routine_info"); } +static constexpr StringLiteral getFromDefaultClauseAttrName() { + return StringLiteral("acc.from_default"); +} + static constexpr StringLiteral getVarNameAttrName() { return VarNameAttr::name; } diff --git a/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp b/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp index 91262bd76ca31..7d729619b3f21 100644 --- a/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp +++ b/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp @@ -570,6 +570,8 @@ Operation *ACCImplicitData::generateDataClauseOpForCandidate( newDataOp = acc::PresentOp::create(builder, loc, var, /*structured=*/true, /*implicit=*/true, accSupport.getVariableName(var)); + newDataOp->setAttr(acc::getFromDefaultClauseAttrName(), + builder.getUnitAttr()); } else { auto copyinOp = acc::CopyinOp::create(builder, loc, var, diff --git a/mlir/test/Dialect/OpenACC/acc-implicit-data.mlir b/mlir/test/Dialect/OpenACC/acc-implicit-data.mlir index cf09c33ca5197..06c1c3cadd4ba 100644 --- a/mlir/test/Dialect/OpenACC/acc-implicit-data.mlir +++ b/mlir/test/Dialect/OpenACC/acc-implicit-data.mlir @@ -110,7 +110,7 @@ func.func @test_array_parallel_defaultpresent() { } // CHECK-LABEL: func.func @test_array_parallel_defaultpresent -// CHECK: %[[PRESENT:.*]] = acc.present varPtr({{.*}} : memref<10xf32>) -> memref<10xf32> {implicit = true, name = ""} +// CHECK: %[[PRESENT:.*]] = acc.present varPtr({{.*}} : memref<10xf32>) -> memref<10xf32> {acc.from_default, implicit = true, name = ""} // CHECK: acc.delete accPtr(%[[PRESENT]] : memref<10xf32>) {dataClause = #acc, implicit = true, name = ""} // ----- From a27bb38ee6f5762e715803d8eb6ffc5a8dd09575 Mon Sep 17 00:00:00 2001 From: Rahul Joshi Date: Mon, 24 Nov 2025 10:19:15 -0800 Subject: [PATCH 26/37] Reapply "[NFC][bugpoint] Namespace cleanup in `bugpoint`" (#168961) (#169055) This reverts commit b83e458fe5330227581e1e65f3866ddfcd597837. Also undo the use of namespace qualifier for `ReducePassList` as that seems to cause build failures. --- llvm/tools/bugpoint/BugDriver.h | 5 +++++ llvm/tools/bugpoint/ExecutionDriver.cpp | 17 +++++++---------- llvm/tools/bugpoint/ExtractFunction.cpp | 3 --- llvm/tools/bugpoint/Miscompilation.cpp | 5 ----- llvm/tools/bugpoint/OptimizerDriver.cpp | 4 ---- 5 files changed, 12 insertions(+), 22 deletions(-) diff --git a/llvm/tools/bugpoint/BugDriver.h b/llvm/tools/bugpoint/BugDriver.h index ca57405f9d770..71a5aa14bbb2e 100644 --- a/llvm/tools/bugpoint/BugDriver.h +++ b/llvm/tools/bugpoint/BugDriver.h @@ -16,6 +16,7 @@ #define LLVM_TOOLS_BUGPOINT_BUGDRIVER_H #include "llvm/IR/ValueMap.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Error.h" #include "llvm/Support/FileSystem.h" #include "llvm/Transforms/Utils/ValueMapper.h" @@ -41,6 +42,10 @@ extern bool DisableSimplifyCFG; /// extern bool BugpointIsInterrupted; +/// Command line options used across files. +extern cl::list InputArgv; +extern cl::opt OutputPrefix; + class BugDriver { LLVMContext &Context; const char *ToolName; // argv[0] of bugpoint diff --git a/llvm/tools/bugpoint/ExecutionDriver.cpp b/llvm/tools/bugpoint/ExecutionDriver.cpp index 8c6b7fbe50c7c..96eeb35b4db70 100644 --- a/llvm/tools/bugpoint/ExecutionDriver.cpp +++ b/llvm/tools/bugpoint/ExecutionDriver.cpp @@ -13,7 +13,6 @@ #include "BugDriver.h" #include "ToolRunner.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/FileUtilities.h" #include "llvm/Support/Program.h" @@ -102,15 +101,13 @@ static cl::opt CustomExecCommand( // Anything specified after the --args option are taken as arguments to the // program being debugged. -namespace llvm { -cl::list InputArgv("args", cl::Positional, - cl::desc("..."), - cl::PositionalEatsArgs); - -cl::opt - OutputPrefix("output-prefix", cl::init("bugpoint"), - cl::desc("Prefix to use for outputs (default: 'bugpoint')")); -} // namespace llvm +cl::list llvm::InputArgv("args", cl::Positional, + cl::desc("..."), + cl::PositionalEatsArgs); + +cl::opt llvm::OutputPrefix( + "output-prefix", cl::init("bugpoint"), + cl::desc("Prefix to use for outputs (default: 'bugpoint')")); static cl::list ToolArgv("tool-args", cl::Positional, cl::desc("..."), diff --git a/llvm/tools/bugpoint/ExtractFunction.cpp b/llvm/tools/bugpoint/ExtractFunction.cpp index 3206589ff38f2..31cdd0d43f2fc 100644 --- a/llvm/tools/bugpoint/ExtractFunction.cpp +++ b/llvm/tools/bugpoint/ExtractFunction.cpp @@ -36,9 +36,6 @@ using namespace llvm; #define DEBUG_TYPE "bugpoint" bool llvm::DisableSimplifyCFG = false; -namespace llvm { -extern cl::opt OutputPrefix; -} // namespace llvm static cl::opt NoDCE("disable-dce", diff --git a/llvm/tools/bugpoint/Miscompilation.cpp b/llvm/tools/bugpoint/Miscompilation.cpp index a7f1643aecf15..dcad126d87865 100644 --- a/llvm/tools/bugpoint/Miscompilation.cpp +++ b/llvm/tools/bugpoint/Miscompilation.cpp @@ -28,11 +28,6 @@ using namespace llvm; -namespace llvm { -extern cl::opt OutputPrefix; -extern cl::list InputArgv; -} // end namespace llvm - static cl::opt DisableLoopExtraction( "disable-loop-extraction", cl::desc("Don't extract loops when searching for miscompilations"), diff --git a/llvm/tools/bugpoint/OptimizerDriver.cpp b/llvm/tools/bugpoint/OptimizerDriver.cpp index bf2e8c0b4a910..191f87c08a0f6 100644 --- a/llvm/tools/bugpoint/OptimizerDriver.cpp +++ b/llvm/tools/bugpoint/OptimizerDriver.cpp @@ -34,10 +34,6 @@ using namespace llvm; #define DEBUG_TYPE "bugpoint" -namespace llvm { -extern cl::opt OutputPrefix; -} - static cl::opt OptCmd("opt-command", cl::init(""), cl::desc("Path to opt. (default: search path " From 1b65752d16045114ed381c95306517ff99147cda Mon Sep 17 00:00:00 2001 From: Erich Keane Date: Mon, 24 Nov 2025 10:33:10 -0800 Subject: [PATCH 27/37] [OpenACC][CIR] Implement 'present' lowering on local-declare (#169381) Just like the last handful of patches that did copy, copyin, copyout, create, etc, this patch has the exact same behavior, except the entry op is a present, and the exit is delete. --- clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp | 10 +- clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp | 10 +- .../CIR/CodeGenOpenACC/declare-present.cpp | 199 ++++++++++++++++++ 3 files changed, 212 insertions(+), 7 deletions(-) create mode 100644 clang/test/CIR/CodeGenOpenACC/declare-present.cpp diff --git a/clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp b/clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp index bf9ec3701e6ea..9c1aeb87c8029 100644 --- a/clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp @@ -55,8 +55,8 @@ struct OpenACCDeclareCleanup final : EHScopeStack::Cleanup { if (auto copyin = val.getDefiningOp()) { switch (copyin.getDataClause()) { default: - cgf.cgm.errorNYI(declareRange, - "OpenACC local declare clause copyin cleanup"); + llvm_unreachable( + "OpenACC local declare clause copyin unexpected data clause"); break; case mlir::acc::DataClause::acc_copy: createOutOp(cgf, copyin); @@ -68,8 +68,8 @@ struct OpenACCDeclareCleanup final : EHScopeStack::Cleanup { } else if (auto create = val.getDefiningOp()) { switch (create.getDataClause()) { default: - cgf.cgm.errorNYI(declareRange, - "OpenACC local declare clause create cleanup"); + llvm_unreachable( + "OpenACC local declare clause create unexpected data clause"); break; case mlir::acc::DataClause::acc_copyout: createOutOp(cgf, create); @@ -78,6 +78,8 @@ struct OpenACCDeclareCleanup final : EHScopeStack::Cleanup { createOutOp(cgf, create); break; } + } else if (auto create = val.getDefiningOp()) { + createOutOp(cgf, create); } else if (val.getDefiningOp()) { // Link has no exit clauses, and shouldn't be copied. continue; diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp index 3e229d0d76917..a23ec93ab1d75 100644 --- a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp @@ -984,12 +984,16 @@ class OpenACCClauseCIREmitter final addDataOperand( var, mlir::acc::DataClause::acc_present, {}, /*structured=*/true, /*implicit=*/false); + } else if constexpr (isOneOfTypes) { + for (const Expr *var : clause.getVarList()) + addDataOperand( + var, mlir::acc::DataClause::acc_present, {}, + /*structured=*/true, + /*implicit=*/false); } else if constexpr (isCombinedType) { applyToComputeOp(clause); } else { - // TODO: When we've implemented this for everything, switch this to an - // unreachable. declare remains. - return clauseNotImplemented(clause); + llvm_unreachable("Unknown construct kind in VisitPresentClause"); } } diff --git a/clang/test/CIR/CodeGenOpenACC/declare-present.cpp b/clang/test/CIR/CodeGenOpenACC/declare-present.cpp new file mode 100644 index 0000000000000..c17b9597adf12 --- /dev/null +++ b/clang/test/CIR/CodeGenOpenACC/declare-present.cpp @@ -0,0 +1,199 @@ +// RUN: %clang_cc1 -fopenacc -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir %s -o - | FileCheck %s + +struct HasSideEffects { + HasSideEffects(); + ~HasSideEffects(); +}; + +// TODO: OpenACC: Implement 'global', NS lowering. + +struct Struct { + static const HasSideEffects StaticMemHSE; + static const HasSideEffects StaticMemHSEArr[5]; + static const int StaticMemInt; + + // TODO: OpenACC: Implement static-local lowering. + + void MemFunc1(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEPtr) { + // CHECK: cir.func {{.*}}MemFunc1{{.*}}(%{{.*}}: !cir.ptr{{.*}}, %[[ARG_HSE:.*]]: !rec_HasSideEffects{{.*}}, %[[ARG_INT:.*]]: !s32i {{.*}}, %[[ARG_HSE_PTR:.*]]: !cir.ptr{{.*}}) + // CHECK-NEXT: cir.alloca{{.*}}["this" + // CHECK-NEXT: %[[ARG_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["ArgHSE" + // CHECK-NEXT: %[[ARG_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["ArgInt + // CHECK-NEXT: %[[ARG_HSE_PTR_ALLOCA:.*]] = cir.alloca !cir.ptr{{.*}}["ArgHSEPtr" + // CHECK-NEXT: %[[LOC_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["LocalHSE + // CHECK-NEXT: %[[LOC_HSE_ARR_ALLOCA:.*]] = cir.alloca !cir.array{{.*}}["LocalHSEArr + // CHECK-NEXT: %[[LOC_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["LocalInt + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.load + + HasSideEffects LocalHSE; + // CHECK-NEXT: cir.call{{.*}} : (!cir.ptr) -> () + HasSideEffects LocalHSEArr[5]; + int LocalInt; + +#pragma acc declare present(ArgHSE, ArgInt, LocalHSE, LocalInt, ArgHSEPtr[1:1], LocalHSEArr[1:1]) + // CHECK: %[[ARG_HSE_PRESENT:.*]] = acc.present varPtr(%[[ARG_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "ArgHSE"} + // CHECK-NEXT: %[[ARG_INT_PRESENT:.*]] = acc.present varPtr(%[[ARG_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "ArgInt"} + // CHECK-NEXT: %[[LOC_HSE_PRESENT:.*]] = acc.present varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "LocalHSE"} + // CHECK-NEXT: %[[LOC_INT_PRESENT:.*]] = acc.present varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "LocalInt"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) + // CHECK-NEXT: %[[ARG_HSE_PTR_PRESENT:.*]] = acc.present varPtr(%[[ARG_HSE_PTR_ALLOCA]] : !cir.ptr>) bounds(%[[BOUND1]]) -> !cir.ptr> {name = "ArgHSEPtr[1:1]"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) + // CHECK-NEXT: %[[LOC_HSE_ARR_PRESENT:.*]] = acc.present varPtr(%[[LOC_HSE_ARR_ALLOCA]] : !cir.ptr>) bounds(%[[BOUND2]]) -> !cir.ptr> {name = "LocalHSEArr[1:1]"} + // CHECK-NEXT: %[[ENTER:.*]] = acc.declare_enter dataOperands(%[[ARG_HSE_PRESENT]], %[[ARG_INT_PRESENT]], %[[LOC_HSE_PRESENT]], %[[LOC_INT_PRESENT]], %[[ARG_HSE_PTR_PRESENT]], %[[LOC_HSE_ARR_PRESENT]] : !cir.ptr, !cir.ptr, !cir.ptr, !cir.ptr, !cir.ptr>, !cir.ptr>) + // + // CHECK-NEXT: acc.declare_exit token(%[[ENTER]]) dataOperands(%[[ARG_HSE_PRESENT]], %[[ARG_INT_PRESENT]], %[[LOC_HSE_PRESENT]], %[[LOC_INT_PRESENT]], %[[ARG_HSE_PTR_PRESENT]], %[[LOC_HSE_ARR_PRESENT]] : !cir.ptr, !cir.ptr, !cir.ptr, !cir.ptr, !cir.ptr>, !cir.ptr>) + // CHECK-NEXT: acc.delete accPtr(%[[ARG_HSE_PRESENT]] : !cir.ptr) {dataClause = #acc, name = "ArgHSE"} + // CHECK-NEXT: acc.delete accPtr(%[[ARG_INT_PRESENT]] : !cir.ptr) {dataClause = #acc, name = "ArgInt"} + // CHECK-NEXT: acc.delete accPtr(%[[LOC_HSE_PRESENT]] : !cir.ptr) {dataClause = #acc, name = "LocalHSE"} + // CHECK-NEXT: acc.delete accPtr(%[[LOC_INT_PRESENT]] : !cir.ptr) {dataClause = #acc, name = "LocalInt"} + // CHECK-NEXT: acc.delete accPtr(%[[ARG_HSE_PTR_PRESENT]] : !cir.ptr>) bounds(%[[BOUND1]]) {dataClause = #acc, name = "ArgHSEPtr[1:1]"} + // CHECK-NEXT: acc.delete accPtr(%[[LOC_HSE_ARR_PRESENT]] : !cir.ptr>) bounds(%[[BOUND2]]) {dataClause = #acc, name = "LocalHSEArr[1:1]"} + } + void MemFunc2(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEPtr); +}; + +void use() { + Struct s; + s.MemFunc1(HasSideEffects{}, 0, nullptr); +} + +void Struct::MemFunc2(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEPtr) { + // CHECK: cir.func {{.*}}MemFunc2{{.*}}(%{{.*}}: !cir.ptr{{.*}}, %[[ARG_HSE:.*]]: !rec_HasSideEffects{{.*}}, %[[ARG_INT:.*]]: !s32i {{.*}}, %[[ARG_HSE_PTR:.*]]: !cir.ptr{{.*}}) + // CHECK-NEXT: cir.alloca{{.*}}["this" + // CHECK-NEXT: %[[ARG_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["ArgHSE" + // CHECK-NEXT: %[[ARG_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["ArgInt + // CHECK-NEXT: %[[ARG_HSE_PTR_ALLOCA:.*]] = cir.alloca !cir.ptr{{.*}}["ArgHSEPtr" + // CHECK-NEXT: %[[LOC_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["LocalHSE + // CHECK-NEXT: %[[LOC_HSE_ARR_ALLOCA:.*]] = cir.alloca !cir.array{{.*}}["LocalHSEArr + // CHECK-NEXT: %[[LOC_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["LocalInt + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.load + HasSideEffects LocalHSE; + // CHECK-NEXT: cir.call{{.*}} : (!cir.ptr) -> () + HasSideEffects LocalHSEArr[5]; + // CHECK: do { + // CHECK: } while { + // CHECK: } + int LocalInt; +#pragma acc declare present(ArgHSE, ArgInt, ArgHSEPtr[1:1]) + // CHECK: %[[ARG_HSE_PRESENT:.*]] = acc.present varPtr(%[[ARG_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "ArgHSE"} + // CHECK-NEXT: %[[ARG_INT_PRESENT:.*]] = acc.present varPtr(%[[ARG_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "ArgInt"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) + // CHECK-NEXT: %[[ARG_HSE_PTR_PRESENT:.*]] = acc.present varPtr(%[[ARG_HSE_PTR_ALLOCA]] : !cir.ptr>) bounds(%[[BOUND1]]) -> !cir.ptr> {name = "ArgHSEPtr[1:1]"} + // CHECK-NEXT: %[[ENTER1:.*]] = acc.declare_enter dataOperands(%[[ARG_HSE_PRESENT]], %[[ARG_INT_PRESENT]], %[[ARG_HSE_PTR_PRESENT]] : !cir.ptr, !cir.ptr, !cir.ptr>) + +#pragma acc declare present(LocalHSE, LocalInt, LocalHSEArr[1:1]) + // CHECK-NEXT: %[[LOC_HSE_PRESENT:.*]] = acc.present varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "LocalHSE"} + // CHECK-NEXT: %[[LOC_INT_PRESENT:.*]] = acc.present varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "LocalInt"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) + // CHECK-NEXT: %[[LOC_HSE_ARR_PRESENT:.*]] = acc.present varPtr(%[[LOC_HSE_ARR_ALLOCA]] : !cir.ptr>) bounds(%[[BOUND2]]) -> !cir.ptr> {name = "LocalHSEArr[1:1]"} + // CHECK-NEXT: %[[ENTER2:.*]] = acc.declare_enter dataOperands(%[[LOC_HSE_PRESENT]], %[[LOC_INT_PRESENT]], %[[LOC_HSE_ARR_PRESENT]] : !cir.ptr, !cir.ptr, !cir.ptr>) + + // CHECK-NEXT: acc.declare_exit token(%[[ENTER2]]) dataOperands(%[[LOC_HSE_PRESENT]], %[[LOC_INT_PRESENT]], %[[LOC_HSE_ARR_PRESENT]] : !cir.ptr, !cir.ptr, !cir.ptr>) + // CHECK-NEXT: acc.delete accPtr(%[[LOC_HSE_PRESENT]] : !cir.ptr) {dataClause = #acc, name = "LocalHSE"} + // CHECK-NEXT: acc.delete accPtr(%[[LOC_INT_PRESENT]] : !cir.ptr) {dataClause = #acc, name = "LocalInt"} + // CHECK-NEXT: acc.delete accPtr(%[[LOC_HSE_ARR_PRESENT]] : !cir.ptr>) bounds(%[[BOUND2]]) {dataClause = #acc, name = "LocalHSEArr[1:1]"} + // + // CHECK-NEXT: acc.declare_exit token(%[[ENTER1]]) dataOperands(%[[ARG_HSE_PRESENT]], %[[ARG_INT_PRESENT]], %[[ARG_HSE_PTR_PRESENT]] : !cir.ptr, !cir.ptr, !cir.ptr>) + // CHECK-NEXT: acc.delete accPtr(%[[ARG_HSE_PRESENT]] : !cir.ptr) {dataClause = #acc, name = "ArgHSE"} + // CHECK-NEXT: acc.delete accPtr(%[[ARG_INT_PRESENT]] : !cir.ptr) {dataClause = #acc, name = "ArgInt"} + // CHECK-NEXT: acc.delete accPtr(%[[ARG_HSE_PTR_PRESENT]] : !cir.ptr>) bounds(%[[BOUND1]]) {dataClause = #acc, name = "ArgHSEPtr[1:1]"} +} + +extern "C" void do_thing(); + +extern "C" void NormalFunc(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEPtr) { + // CHECK: cir.func {{.*}}NormalFunc(%[[ARG_HSE:.*]]: !rec_HasSideEffects{{.*}}, %[[ARG_INT:.*]]: !s32i {{.*}}, %[[ARG_HSE_PTR:.*]]: !cir.ptr{{.*}}) + // CHECK-NEXT: %[[ARG_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["ArgHSE" + // CHECK-NEXT: %[[ARG_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["ArgInt + // CHECK-NEXT: %[[ARG_HSE_PTR_ALLOCA:.*]] = cir.alloca !cir.ptr{{.*}}["ArgHSEPtr" + // CHECK-NEXT: %[[LOC_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["LocalHSE + // CHECK-NEXT: %[[LOC_HSE_ARR_ALLOCA:.*]] = cir.alloca !cir.array{{.*}}["LocalHSEArr + // CHECK-NEXT: %[[LOC_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["LocalInt + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + HasSideEffects LocalHSE; + // CHECK-NEXT: cir.call{{.*}} : (!cir.ptr) -> () + HasSideEffects LocalHSEArr[5]; + // CHECK: do { + // CHECK: } while { + // CHECK: } + int LocalInt; +#pragma acc declare present(ArgHSE, ArgInt, ArgHSEPtr[1:1]) + // CHECK: %[[ARG_HSE_PRESENT:.*]] = acc.present varPtr(%[[ARG_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "ArgHSE"} + // CHECK-NEXT: %[[ARG_INT_PRESENT:.*]] = acc.present varPtr(%[[ARG_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "ArgInt"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) + // CHECK-NEXT: %[[ARG_HSE_PTR_PRESENT:.*]] = acc.present varPtr(%[[ARG_HSE_PTR_ALLOCA]] : !cir.ptr>) bounds(%[[BOUND1]]) -> !cir.ptr> {name = "ArgHSEPtr[1:1]"} + // CHECK-NEXT: %[[ENTER1:.*]] = acc.declare_enter dataOperands(%[[ARG_HSE_PRESENT]], %[[ARG_INT_PRESENT]], %[[ARG_HSE_PTR_PRESENT]] : !cir.ptr, !cir.ptr, !cir.ptr>) + { + // CHECK-NEXT: cir.scope { +#pragma acc declare present(LocalHSE, LocalInt, LocalHSEArr[1:1]) + // CHECK-NEXT: %[[LOC_HSE_PRESENT:.*]] = acc.present varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "LocalHSE"} + // CHECK-NEXT: %[[LOC_INT_PRESENT:.*]] = acc.present varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "LocalInt"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) + // CHECK-NEXT: %[[LOC_HSE_ARR_PRESENT:.*]] = acc.present varPtr(%[[LOC_HSE_ARR_ALLOCA]] : !cir.ptr>) bounds(%[[BOUND2]]) -> !cir.ptr> {name = "LocalHSEArr[1:1]"} + // CHECK-NEXT: %[[ENTER2:.*]] = acc.declare_enter dataOperands(%[[LOC_HSE_PRESENT]], %[[LOC_INT_PRESENT]], %[[LOC_HSE_ARR_PRESENT]] : !cir.ptr, !cir.ptr, !cir.ptr>) + + do_thing(); + // CHECK-NEXT: cir.call @do_thing + // CHECK-NEXT: acc.declare_exit token(%[[ENTER2]]) dataOperands(%[[LOC_HSE_PRESENT]], %[[LOC_INT_PRESENT]], %[[LOC_HSE_ARR_PRESENT]] : !cir.ptr, !cir.ptr, !cir.ptr>) + // CHECK-NEXT: acc.delete accPtr(%[[LOC_HSE_PRESENT]] : !cir.ptr) {dataClause = #acc, name = "LocalHSE"} + // CHECK-NEXT: acc.delete accPtr(%[[LOC_INT_PRESENT]] : !cir.ptr) {dataClause = #acc, name = "LocalInt"} + // CHECK-NEXT: acc.delete accPtr(%[[LOC_HSE_ARR_PRESENT]] : !cir.ptr>) bounds(%[[BOUND2]]) {dataClause = #acc, name = "LocalHSEArr[1:1]"} + } + // CHECK-NEXT: } + + // Make sure that cleanup gets put in the right scope. + do_thing(); + // CHECK-NEXT: cir.call @do_thing + // CHECK-NEXT: acc.declare_exit token(%[[ENTER1]]) dataOperands(%[[ARG_HSE_PRESENT]], %[[ARG_INT_PRESENT]], %[[ARG_HSE_PTR_PRESENT]] : !cir.ptr, !cir.ptr, !cir.ptr>) + + // CHECK-NEXT: acc.delete accPtr(%[[ARG_HSE_PRESENT]] : !cir.ptr) {dataClause = #acc, name = "ArgHSE"} + // CHECK-NEXT: acc.delete accPtr(%[[ARG_INT_PRESENT]] : !cir.ptr) {dataClause = #acc, name = "ArgInt"} + // CHECK-NEXT: acc.delete accPtr(%[[ARG_HSE_PTR_PRESENT]] : !cir.ptr>) bounds(%[[BOUND1]]) {dataClause = #acc, name = "ArgHSEPtr[1:1]"} +} + From 740d0bd385967f6ae0171896722143d9a70b66a5 Mon Sep 17 00:00:00 2001 From: Maksim Levental Date: Mon, 24 Nov 2025 13:39:15 -0500 Subject: [PATCH 28/37] [MLIR][Python] add GetTypeID for llvm.struct_type and llvm.ptr and enable downcasting (#169383) --- mlir/include/mlir-c/Dialect/LLVM.h | 4 ++++ mlir/lib/Bindings/Python/DialectLLVM.cpp | 7 ++++--- mlir/lib/CAPI/Dialect/LLVM.cpp | 8 ++++++++ mlir/test/python/dialects/llvm.py | 6 ++++++ 4 files changed, 22 insertions(+), 3 deletions(-) diff --git a/mlir/include/mlir-c/Dialect/LLVM.h b/mlir/include/mlir-c/Dialect/LLVM.h index c1ade9ed8617c..cc7f09f71d028 100644 --- a/mlir/include/mlir-c/Dialect/LLVM.h +++ b/mlir/include/mlir-c/Dialect/LLVM.h @@ -23,6 +23,8 @@ MLIR_DECLARE_CAPI_DIALECT_REGISTRATION(LLVM, llvm); MLIR_CAPI_EXPORTED MlirType mlirLLVMPointerTypeGet(MlirContext ctx, unsigned addressSpace); +MLIR_CAPI_EXPORTED MlirTypeID mlirLLVMPointerTypeGetTypeID(void); + /// Returns `true` if the type is an LLVM dialect pointer type. MLIR_CAPI_EXPORTED bool mlirTypeIsALLVMPointerType(MlirType type); @@ -58,6 +60,8 @@ MLIR_CAPI_EXPORTED MlirType mlirLLVMFunctionTypeGetReturnType(MlirType type); /// Returns `true` if the type is an LLVM dialect struct type. MLIR_CAPI_EXPORTED bool mlirTypeIsALLVMStructType(MlirType type); +MLIR_CAPI_EXPORTED MlirTypeID mlirLLVMStructTypeGetTypeID(void); + /// Returns `true` if the type is a literal (unnamed) LLVM struct type. MLIR_CAPI_EXPORTED bool mlirLLVMStructTypeIsLiteral(MlirType type); diff --git a/mlir/lib/Bindings/Python/DialectLLVM.cpp b/mlir/lib/Bindings/Python/DialectLLVM.cpp index 870a713b8edcb..05681cecf82b3 100644 --- a/mlir/lib/Bindings/Python/DialectLLVM.cpp +++ b/mlir/lib/Bindings/Python/DialectLLVM.cpp @@ -31,8 +31,8 @@ static void populateDialectLLVMSubmodule(nanobind::module_ &m) { // StructType //===--------------------------------------------------------------------===// - auto llvmStructType = - mlir_type_subclass(m, "StructType", mlirTypeIsALLVMStructType); + auto llvmStructType = mlir_type_subclass( + m, "StructType", mlirTypeIsALLVMStructType, mlirLLVMStructTypeGetTypeID); llvmStructType .def_classmethod( @@ -137,7 +137,8 @@ static void populateDialectLLVMSubmodule(nanobind::module_ &m) { // PointerType //===--------------------------------------------------------------------===// - mlir_type_subclass(m, "PointerType", mlirTypeIsALLVMPointerType) + mlir_type_subclass(m, "PointerType", mlirTypeIsALLVMPointerType, + mlirLLVMPointerTypeGetTypeID) .def_classmethod( "get", [](const nb::object &cls, std::optional addressSpace, diff --git a/mlir/lib/CAPI/Dialect/LLVM.cpp b/mlir/lib/CAPI/Dialect/LLVM.cpp index 6636f0ea73ec9..bf231767320a5 100644 --- a/mlir/lib/CAPI/Dialect/LLVM.cpp +++ b/mlir/lib/CAPI/Dialect/LLVM.cpp @@ -27,6 +27,10 @@ MlirType mlirLLVMPointerTypeGet(MlirContext ctx, unsigned addressSpace) { return wrap(LLVMPointerType::get(unwrap(ctx), addressSpace)); } +MlirTypeID mlirLLVMPointerTypeGetTypeID() { + return wrap(LLVM::LLVMPointerType::getTypeID()); +} + bool mlirTypeIsALLVMPointerType(MlirType type) { return isa(unwrap(type)); } @@ -73,6 +77,10 @@ bool mlirTypeIsALLVMStructType(MlirType type) { return isa(unwrap(type)); } +MlirTypeID mlirLLVMStructTypeGetTypeID() { + return wrap(LLVM::LLVMStructType::getTypeID()); +} + bool mlirLLVMStructTypeIsLiteral(MlirType type) { return !cast(unwrap(type)).isIdentified(); } diff --git a/mlir/test/python/dialects/llvm.py b/mlir/test/python/dialects/llvm.py index 8ea0fddee3f7c..305ed9aba940d 100644 --- a/mlir/test/python/dialects/llvm.py +++ b/mlir/test/python/dialects/llvm.py @@ -98,6 +98,9 @@ def testStructType(): assert opaque.opaque # CHECK: !llvm.struct<"opaque", opaque> + typ = Type.parse('!llvm.struct<"zoo", (i32, i64)>') + assert isinstance(typ, llvm.StructType) + # CHECK-LABEL: testSmoke @constructAndPrintInModule @@ -120,6 +123,9 @@ def testPointerType(): # CHECK: !llvm.ptr<1> print(ptr_with_addr) + typ = Type.parse("!llvm.ptr<1>") + assert isinstance(typ, llvm.PointerType) + # CHECK-LABEL: testConstant @constructAndPrintInModule From d4cd331b7efc8cd5f15faa846697d9d61b0ff246 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Mon, 24 Nov 2025 18:39:29 +0000 Subject: [PATCH 29/37] [gn build] Port 2bdd1357c826 --- llvm/utils/gn/secondary/libcxx/include/BUILD.gn | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index 09d2f1ed92554..82fe916645635 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -1167,7 +1167,6 @@ if (current_toolchain == default_toolchain) { "__locale_dir/locale_base_api.h", "__locale_dir/locale_base_api/bsd_locale_fallbacks.h", "__locale_dir/locale_base_api/ibm.h", - "__locale_dir/locale_base_api/musl.h", "__locale_dir/locale_base_api/openbsd.h", "__locale_dir/messages.h", "__locale_dir/money.h", From 0e86510c787d68f5f87708b2efdaf92a7501b6c3 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Mon, 24 Nov 2025 18:39:30 +0000 Subject: [PATCH 30/37] [gn build] Port 3773bbe9e791 --- llvm/utils/gn/secondary/clang/lib/Driver/BUILD.gn | 2 ++ llvm/utils/gn/secondary/clang/lib/Frontend/BUILD.gn | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/llvm/utils/gn/secondary/clang/lib/Driver/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Driver/BUILD.gn index 66dbf6152472a..9b524e2ef7cd5 100644 --- a/llvm/utils/gn/secondary/clang/lib/Driver/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/Driver/BUILD.gn @@ -29,6 +29,8 @@ static_library("Driver") { sources = [ "Action.cpp", "Compilation.cpp", + "CreateASTUnitFromArgs.cpp", + "CreateInvocationFromArgs.cpp", "Distro.cpp", "Driver.cpp", "Job.cpp", diff --git a/llvm/utils/gn/secondary/clang/lib/Frontend/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Frontend/BUILD.gn index 4009cfc609f4a..cdf39d645bc52 100644 --- a/llvm/utils/gn/secondary/clang/lib/Frontend/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/Frontend/BUILD.gn @@ -28,7 +28,6 @@ static_library("Frontend") { "ChainedIncludesSource.cpp", "CompilerInstance.cpp", "CompilerInvocation.cpp", - "CreateInvocationFromCommandLine.cpp", "DependencyFile.cpp", "DependencyGraph.cpp", "DiagnosticRenderer.cpp", @@ -48,6 +47,7 @@ static_library("Frontend") { "SARIFDiagnosticPrinter.cpp", "SerializedDiagnosticPrinter.cpp", "SerializedDiagnosticReader.cpp", + "StandaloneDiagnostic.cpp", "TestModuleFileExtension.cpp", "TextDiagnostic.cpp", "TextDiagnosticBuffer.cpp", From 40fb2ca506a873b031f90dac619ccca1d6ff0de5 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Mon, 24 Nov 2025 18:39:31 +0000 Subject: [PATCH 31/37] [gn build] Port 645e0dcbff33 --- llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn index ad72c0069237d..e54797e188a11 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn @@ -165,6 +165,7 @@ static_library("LLVMRISCVCodeGen") { "RISCVVectorMaskDAGMutation.cpp", "RISCVVectorPeephole.cpp", "RISCVZacasABIFix.cpp", + "RISCVZilsdOptimizer.cpp", ] } From 445956443bdf5dcc7fb8beb7dd9e571f31551519 Mon Sep 17 00:00:00 2001 From: Henry Baba-Weiss Date: Mon, 24 Nov 2025 10:49:41 -0800 Subject: [PATCH 32/37] [clang][Sema] Handle target_clones redeclarations that omit the attribute (#169259) This patch adds a case to `CheckMultiVersionAdditionalDecl()` that detects redeclarations of `target_clones` functions which omit the attribute, and makes sure they are marked as redeclarations. It also updates the comment at the call site of `CheckMultiVersionAdditionalDecl()` to reflect this. Previously, `target_clones` multiversioned functions that omitted the attribute from subsequent declarations would cause Clang to hit an `llvm_unreachable` and crash. In the following example, the second declaration (the function definition) should inherit the `target_clones` attribute from the first declaration (the forward declaration): ``` __attribute__((target_clones("arch=atom", "default"))) void foo(void); void foo(void) { /* ... */ } ``` However, `CheckMultiVersionAdditionalDecl()` was not recognizing the function definition as a redeclaration of the forward declaration, which prevented `Sema::MergeFunctionDecl()` from automatically inheriting the attribute. A side effect of this fix is that Clang now catches redeclarations of `target_clones` functions that have conflicting types, which previously caused Clang to crash by hitting that same `llvm_unreachable`. The `bad_overload1` case in `clang/test/Sema/attr-target-clones.c` has been updated to reflect this. Fixes #165517 Fixes #129483 --- clang/docs/ReleaseNotes.rst | 2 ++ clang/lib/Sema/SemaDecl.cpp | 15 +++++++++++-- clang/test/CodeGen/attr-target-clones.c | 29 +++++++++++++++++++++++++ clang/test/Sema/attr-target-clones.c | 13 +++++++++++ 4 files changed, 57 insertions(+), 2 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 439e47b209b2f..b12e4539dc3a6 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -491,6 +491,8 @@ Bug Fixes in This Version - Accept empty enumerations in MSVC-compatible C mode. (#GH114402) - Fix a bug leading to incorrect code generation with complex number compound assignment and bitfield values, which also caused a crash with UBsan. (#GH166798) - Fixed false-positive shadow diagnostics for lambdas in explicit object member functions. (#GH163731) +- Fix an assertion failure when a ``target_clones`` attribute is only on the + forward declaration of a multiversioned function. (#GH165517) (#GH129483) Bug Fixes to Compiler Builtins ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index b8ca2a376fde8..651437a6f4c30 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -11996,6 +11996,16 @@ static bool CheckMultiVersionAdditionalDecl( } } + // Redeclarations of a target_clones function may omit the attribute, in which + // case it will be inherited during declaration merging. + if (NewMVKind == MultiVersionKind::None && + OldMVKind == MultiVersionKind::TargetClones) { + NewFD->setIsMultiVersion(); + Redeclaration = true; + OldDecl = OldFD; + return false; + } + // Else, this is simply a non-redecl case. Checking the 'value' is only // necessary in the Target case, since The CPUSpecific/Dispatch cases are // handled in the attribute adding step. @@ -12119,8 +12129,9 @@ static bool CheckMultiVersionFunction(Sema &S, FunctionDecl *NewFD, } // At this point, we have a multiversion function decl (in OldFD) AND an - // appropriate attribute in the current function decl. Resolve that these are - // still compatible with previous declarations. + // appropriate attribute in the current function decl (unless it's allowed to + // omit the attribute). Resolve that these are still compatible with previous + // declarations. return CheckMultiVersionAdditionalDecl(S, OldFD, NewFD, NewCPUDisp, NewCPUSpec, NewClones, Redeclaration, OldDecl, Previous); diff --git a/clang/test/CodeGen/attr-target-clones.c b/clang/test/CodeGen/attr-target-clones.c index 295b25d6478eb..56db77c2b09a3 100644 --- a/clang/test/CodeGen/attr-target-clones.c +++ b/clang/test/CodeGen/attr-target-clones.c @@ -125,6 +125,35 @@ void __attribute__((target_clones("default, arch=ivybridge"))) unused(void) {} // WINDOWS: musttail call void @unused.arch_ivybridge.0 // WINDOWS: musttail call void @unused.default.1 +int __attribute__((target_clones("sse4.2, default"))) inherited(void); +int inherited(void) { return 0; } +// LINUX: define {{.*}}i32 @inherited.sse4.2.0() +// LINUX: define {{.*}}i32 @inherited.default.1() +// LINUX: define weak_odr ptr @inherited.resolver() #[[ATTR_RESOLVER]] comdat +// LINUX: ret ptr @inherited.sse4.2.0 +// LINUX: ret ptr @inherited.default.1 + +// DARWIN: define {{.*}}i32 @inherited.sse4.2.0() +// DARWIN: define {{.*}}i32 @inherited.default.1() +// DARWIN: define weak_odr ptr @inherited.resolver() #[[ATTR_RESOLVER]] { +// DARWIN: ret ptr @inherited.sse4.2.0 +// DARWIN: ret ptr @inherited.default.1 + +// WINDOWS: define dso_local i32 @inherited.sse4.2.0() +// WINDOWS: define dso_local i32 @inherited.default.1() +// WINDOWS: define weak_odr dso_local i32 @inherited() #[[ATTR_RESOLVER]] comdat +// WINDOWS: musttail call i32 @inherited.sse4.2.0 +// WINDOWS: musttail call i32 @inherited.default.1 + +int test_inherited(void) { + // LINUX: define {{.*}}i32 @test_inherited() #[[DEF:[0-9]+]] + // DARWIN: define {{.*}}i32 @test_inherited() #[[DEF:[0-9]+]] + // WINDOWS: define dso_local i32 @test_inherited() #[[DEF:[0-9]+]] + return inherited(); + // LINUX: call i32 @inherited() + // DARWIN: call i32 @inherited() + // WINDOWS: call i32 @inherited() +} inline int __attribute__((target_clones("arch=sandybridge,default,sse4.2"))) foo_inline(void) { return 0; } diff --git a/clang/test/Sema/attr-target-clones.c b/clang/test/Sema/attr-target-clones.c index 4597ea54d02bf..40688772eeb96 100644 --- a/clang/test/Sema/attr-target-clones.c +++ b/clang/test/Sema/attr-target-clones.c @@ -28,6 +28,17 @@ int __attribute__((target_clones("sse4.2", "arch=atom", "default"))) redecl4(voi int __attribute__((target_clones("sse4.2", "arch=sandybridge", "default"))) redecl4(void) { return 1; } +int __attribute__((target_clones("sse4.2", "default"))) redecl5(void); +int redecl5(void) { return 1; } + +int redecl6(void); +int __attribute__((target_clones("sse4.2", "default"))) redecl6(void) { return 1; } + +int __attribute__((target_clones("sse4.2", "default"))) redecl7(void); +// expected-error@+2 {{multiversioning attributes cannot be combined}} +// expected-note@-2 {{previous declaration is here}} +int __attribute__((target("sse4.2"))) redecl7(void) { return 1; } + int __attribute__((target("sse4.2"))) redef2(void) { return 1; } // expected-error@+2 {{multiversioning attributes cannot be combined}} // expected-note@-2 {{previous declaration is here}} @@ -87,6 +98,8 @@ int useage(void) { int __attribute__((target_clones("sse4.2", "default"))) mv_after_use(void) { return 1; } void bad_overload1(void) __attribute__((target_clones("mmx", "sse4.2", "default"))); +// expected-error@+2 {{conflicting types for 'bad_overload1'}} +// expected-note@-2 {{previous declaration is here}} void bad_overload1(int p) {} void bad_overload2(int p) {} From f5e228b32ac0a59b5aa834caa80150ba877e82ce Mon Sep 17 00:00:00 2001 From: "Deric C." Date: Mon, 24 Nov 2025 10:56:20 -0800 Subject: [PATCH 33/37] [DirectX] Simplify DXIL data scalarization, and data scalarize whole GEP chains (#168096) - The DXIL data scalarizer only needs to change vectors into arrays. It does not need to change the types of GEPs to match the pointer type. This PR simplifies the `visitGetElementPtrInst` method to do just that while also accounting for nested GEPs from ConstantExprs. (Before this PR, there were still vector types lingering in nested GEPs with ConstantExprs.) - The `equivalentArrayTypeFromVector` function was awkwardly placed near the top of the file and away from the other helper functions. The function is now moved next to the other helper functions. - Removed an unnecessary `||` condition from `isVectorOrArrayOfVectors` Related tests have also been cleaned up, and the test CHECKs have been modified to account for the new simplified behavior. --- .../Target/DirectX/DXILDataScalarization.cpp | 131 +++++++----------- .../bugfix_150050_data_scalarize_const_gep.ll | 35 +++-- llvm/test/CodeGen/DirectX/scalarize-alloca.ll | 6 +- llvm/test/CodeGen/DirectX/scalarize-global.ll | 6 +- 4 files changed, 75 insertions(+), 103 deletions(-) diff --git a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp index 9f1616f6960fe..5f18c37ef1125 100644 --- a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp +++ b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp @@ -29,20 +29,6 @@ static const int MaxVecSize = 4; using namespace llvm; -// Recursively creates an array-like version of a given vector type. -static Type *equivalentArrayTypeFromVector(Type *T) { - if (auto *VecTy = dyn_cast(T)) - return ArrayType::get(VecTy->getElementType(), - dyn_cast(VecTy)->getNumElements()); - if (auto *ArrayTy = dyn_cast(T)) { - Type *NewElementType = - equivalentArrayTypeFromVector(ArrayTy->getElementType()); - return ArrayType::get(NewElementType, ArrayTy->getNumElements()); - } - // If it's not a vector or array, return the original type. - return T; -} - class DXILDataScalarizationLegacy : public ModulePass { public: @@ -121,12 +107,25 @@ DataScalarizerVisitor::lookupReplacementGlobal(Value *CurrOperand) { static bool isVectorOrArrayOfVectors(Type *T) { if (isa(T)) return true; - if (ArrayType *ArrType = dyn_cast(T)) - return isa(ArrType->getElementType()) || - isVectorOrArrayOfVectors(ArrType->getElementType()); + if (ArrayType *ArrayTy = dyn_cast(T)) + return isVectorOrArrayOfVectors(ArrayTy->getElementType()); return false; } +// Recursively creates an array-like version of a given vector type. +static Type *equivalentArrayTypeFromVector(Type *T) { + if (auto *VecTy = dyn_cast(T)) + return ArrayType::get(VecTy->getElementType(), + dyn_cast(VecTy)->getNumElements()); + if (auto *ArrayTy = dyn_cast(T)) { + Type *NewElementType = + equivalentArrayTypeFromVector(ArrayTy->getElementType()); + return ArrayType::get(NewElementType, ArrayTy->getNumElements()); + } + // If it's not a vector or array, return the original type. + return T; +} + bool DataScalarizerVisitor::visitAllocaInst(AllocaInst &AI) { Type *AllocatedType = AI.getAllocatedType(); if (!isVectorOrArrayOfVectors(AllocatedType)) @@ -135,7 +134,7 @@ bool DataScalarizerVisitor::visitAllocaInst(AllocaInst &AI) { IRBuilder<> Builder(&AI); Type *NewType = equivalentArrayTypeFromVector(AllocatedType); AllocaInst *ArrAlloca = - Builder.CreateAlloca(NewType, nullptr, AI.getName() + ".scalarize"); + Builder.CreateAlloca(NewType, nullptr, AI.getName() + ".scalarized"); ArrAlloca->setAlignment(AI.getAlign()); AI.replaceAllUsesWith(ArrAlloca); AI.eraseFromParent(); @@ -303,78 +302,44 @@ bool DataScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) { bool DataScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) { GEPOperator *GOp = cast(&GEPI); Value *PtrOperand = GOp->getPointerOperand(); - Type *NewGEPType = GOp->getSourceElementType(); - - // Unwrap GEP ConstantExprs to find the base operand and element type - while (auto *GEPCE = dyn_cast_or_null( - dyn_cast(PtrOperand))) { - GOp = GEPCE; - PtrOperand = GEPCE->getPointerOperand(); - NewGEPType = GEPCE->getSourceElementType(); - } - - Type *const OrigGEPType = NewGEPType; - Value *const OrigOperand = PtrOperand; - - if (GlobalVariable *NewGlobal = lookupReplacementGlobal(PtrOperand)) { - NewGEPType = NewGlobal->getValueType(); - PtrOperand = NewGlobal; - } else if (AllocaInst *Alloca = dyn_cast(PtrOperand)) { - Type *AllocatedType = Alloca->getAllocatedType(); - if (isa(AllocatedType) && - AllocatedType != GOp->getResultElementType()) - NewGEPType = AllocatedType; - } else - return false; // Only GEPs into an alloca or global variable are considered - - // Defer changing i8 GEP types until dxil-flatten-arrays - if (OrigGEPType->isIntegerTy(8)) - NewGEPType = OrigGEPType; - - // If the original type is a "sub-type" of the new type, then ensure the gep - // correctly zero-indexes the extra dimensions to keep the offset calculation - // correct. - // Eg: - // i32, [4 x i32] and [8 x [4 x i32]] are sub-types of [8 x [4 x i32]], etc. - // - // So then: - // gep [4 x i32] %idx - // -> gep [8 x [4 x i32]], i32 0, i32 %idx - // gep i32 %idx - // -> gep [8 x [4 x i32]], i32 0, i32 0, i32 %idx - uint32_t MissingDims = 0; - Type *SubType = NewGEPType; - - // The new type will be in its array version; so match accordingly. - Type *const GEPArrType = equivalentArrayTypeFromVector(OrigGEPType); - - while (SubType != GEPArrType) { - MissingDims++; - - ArrayType *ArrType = dyn_cast(SubType); - if (!ArrType) { - assert(SubType == GEPArrType && - "GEP uses an DXIL invalid sub-type of alloca/global variable"); - break; - } - - SubType = ArrType->getElementType(); + Type *GEPType = GOp->getSourceElementType(); + + // Replace a GEP ConstantExpr pointer operand with a GEP instruction so that + // it can be visited + if (auto *PtrOpGEPCE = dyn_cast(PtrOperand); + PtrOpGEPCE && PtrOpGEPCE->getOpcode() == Instruction::GetElementPtr) { + GetElementPtrInst *OldGEPI = + cast(PtrOpGEPCE->getAsInstruction()); + OldGEPI->insertBefore(GEPI.getIterator()); + + IRBuilder<> Builder(&GEPI); + SmallVector Indices(GEPI.indices()); + Value *NewGEP = + Builder.CreateGEP(GEPI.getSourceElementType(), OldGEPI, Indices, + GEPI.getName(), GEPI.getNoWrapFlags()); + assert(isa(NewGEP) && + "Expected newly-created GEP to be an instruction"); + GetElementPtrInst *NewGEPI = cast(NewGEP); + + GEPI.replaceAllUsesWith(NewGEPI); + GEPI.eraseFromParent(); + visitGetElementPtrInst(*OldGEPI); + visitGetElementPtrInst(*NewGEPI); + return true; } - bool NeedsTransform = OrigOperand != PtrOperand || - OrigGEPType != NewGEPType || MissingDims != 0; + Type *NewGEPType = equivalentArrayTypeFromVector(GEPType); + Value *NewPtrOperand = PtrOperand; + if (GlobalVariable *NewGlobal = lookupReplacementGlobal(PtrOperand)) + NewPtrOperand = NewGlobal; + bool NeedsTransform = NewPtrOperand != PtrOperand || NewGEPType != GEPType; if (!NeedsTransform) return false; IRBuilder<> Builder(&GEPI); - SmallVector Indices; - - for (uint32_t I = 0; I < MissingDims; I++) - Indices.push_back(Builder.getInt32(0)); - llvm::append_range(Indices, GOp->indices()); - - Value *NewGEP = Builder.CreateGEP(NewGEPType, PtrOperand, Indices, + SmallVector Indices(GOp->idx_begin(), GOp->idx_end()); + Value *NewGEP = Builder.CreateGEP(NewGEPType, NewPtrOperand, Indices, GOp->getName(), GOp->getNoWrapFlags()); GOp->replaceAllUsesWith(NewGEP); diff --git a/llvm/test/CodeGen/DirectX/bugfix_150050_data_scalarize_const_gep.ll b/llvm/test/CodeGen/DirectX/bugfix_150050_data_scalarize_const_gep.ll index 156a8e7c5c386..def886f933d08 100644 --- a/llvm/test/CodeGen/DirectX/bugfix_150050_data_scalarize_const_gep.ll +++ b/llvm/test/CodeGen/DirectX/bugfix_150050_data_scalarize_const_gep.ll @@ -11,9 +11,10 @@ define void @CSMain() { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[AFRAGPACKED_I_SCALARIZE:%.*]] = alloca [4 x i32], align 16 ; -; SCHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [10 x <4 x i32>], ptr addrspace(3) getelementptr inbounds ([10 x [10 x [4 x i32]]], ptr addrspace(3) @aTile.scalarized, i32 0, i32 1), i32 0, i32 2 -; SCHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr addrspace(3) [[TMP0]], align 16 -; SCHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[AFRAGPACKED_I_SCALARIZE]], align 16 +; SCHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds [10 x [10 x [4 x i32]]], ptr addrspace(3) @aTile.scalarized, i32 0, i32 1 +; SCHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [10 x [4 x i32]], ptr addrspace(3) [[GEP0]], i32 0, i32 2 +; SCHECK-NEXT: [[LOAD:%.*]] = load <4 x i32>, ptr addrspace(3) [[GEP1]], align 16 +; SCHECK-NEXT: store <4 x i32> [[LOAD]], ptr [[AFRAGPACKED_I_SCALARIZE]], align 16 ; ; FCHECK-NEXT: [[AFRAGPACKED_I_SCALARIZE_I14:%.*]] = getelementptr [4 x i32], ptr [[AFRAGPACKED_I_SCALARIZE]], i32 0, i32 1 ; FCHECK-NEXT: [[AFRAGPACKED_I_SCALARIZE_I25:%.*]] = getelementptr [4 x i32], ptr [[AFRAGPACKED_I_SCALARIZE]], i32 0, i32 2 @@ -40,12 +41,13 @@ define void @Main() { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[BFRAGPACKED_I:%.*]] = alloca i32, align 16 ; -; SCHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [10 x i32], ptr addrspace(3) getelementptr inbounds ([10 x [10 x i32]], ptr addrspace(3) @bTile, i32 0, i32 1), i32 0, i32 1 -; SCHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(3) [[TMP0]], align 16 -; SCHECK-NEXT: store i32 [[TMP1]], ptr [[BFRAGPACKED_I]], align 16 +; SCHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr addrspace(3) @bTile, i32 0, i32 1 +; SCHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [10 x i32], ptr addrspace(3) [[GEP0]], i32 0, i32 1 +; SCHECK-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(3) [[GEP1]], align 16 +; SCHECK-NEXT: store i32 [[LOAD]], ptr [[BFRAGPACKED_I]], align 16 ; -; FCHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(3) getelementptr inbounds ([100 x i32], ptr addrspace(3) @bTile.1dim, i32 0, i32 11), align 16 -; FCHECK-NEXT: store i32 [[TMP0]], ptr [[BFRAGPACKED_I]], align 16 +; FCHECK-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(3) getelementptr inbounds ([100 x i32], ptr addrspace(3) @bTile.1dim, i32 0, i32 11), align 16 +; FCHECK-NEXT: store i32 [[LOAD]], ptr [[BFRAGPACKED_I]], align 16 ; ; CHECK-NEXT: ret void entry: @@ -57,10 +59,12 @@ entry: define void @global_nested_geps_3d() { ; CHECK-LABEL: define void @global_nested_geps_3d() { -; SCHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <2 x i32>, ptr getelementptr inbounds ([2 x <2 x i32>], ptr getelementptr inbounds ([2 x [2 x [2 x i32]]], ptr @cTile.scalarized, i32 0, i32 1), i32 0, i32 1), i32 0, i32 1 -; SCHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +; SCHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds [2 x [2 x [2 x i32]]], ptr @cTile.scalarized, i32 0, i32 1 +; SCHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr [[GEP0]], i32 0, i32 1 +; SCHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds [2 x i32], ptr [[GEP1]], i32 0, i32 1 +; SCHECK-NEXT: load i32, ptr [[GEP2]], align 4 ; -; FCHECK-NEXT: [[TMP1:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @cTile.scalarized.1dim, i32 0, i32 7), align 4 +; FCHECK-NEXT: load i32, ptr getelementptr inbounds ([8 x i32], ptr @cTile.scalarized.1dim, i32 0, i32 7), align 4 ; ; CHECK-NEXT: ret void %1 = load i32, i32* getelementptr inbounds (<2 x i32>, <2 x i32>* getelementptr inbounds ([2 x <2 x i32>], [2 x <2 x i32>]* getelementptr inbounds ([2 x [2 x <2 x i32>]], [2 x [2 x <2 x i32>]]* @cTile, i32 0, i32 1), i32 0, i32 1), i32 0, i32 1), align 4 @@ -69,10 +73,13 @@ define void @global_nested_geps_3d() { define void @global_nested_geps_4d() { ; CHECK-LABEL: define void @global_nested_geps_4d() { -; SCHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <2 x i32>, ptr getelementptr inbounds ([2 x <2 x i32>], ptr getelementptr inbounds ([2 x [2 x <2 x i32>]], ptr getelementptr inbounds ([2 x [2 x [2 x [2 x i32]]]], ptr @dTile.scalarized, i32 0, i32 1), i32 0, i32 1), i32 0, i32 1), i32 0, i32 1 -; SCHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +; SCHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds [2 x [2 x [2 x [2 x i32]]]], ptr @dTile.scalarized, i32 0, i32 1 +; SCHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [2 x [2 x [2 x i32]]], ptr [[GEP0]], i32 0, i32 1 +; SCHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr [[GEP1]], i32 0, i32 1 +; SCHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds [2 x i32], ptr [[GEP2]], i32 0, i32 1 +; SCHECK-NEXT: load i32, ptr [[GEP3]], align 4 ; -; FCHECK-NEXT: [[TMP1:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @dTile.scalarized.1dim, i32 0, i32 15), align 4 +; FCHECK-NEXT: load i32, ptr getelementptr inbounds ([16 x i32], ptr @dTile.scalarized.1dim, i32 0, i32 15), align 4 ; ; CHECK-NEXT: ret void %1 = load i32, i32* getelementptr inbounds (<2 x i32>, <2 x i32>* getelementptr inbounds ([2 x <2 x i32>], [2 x <2 x i32>]* getelementptr inbounds ([2 x [2 x <2 x i32>]], [2 x [2 x <2 x i32>]]* getelementptr inbounds ([2 x [2 x [2 x <2 x i32>]]], [2 x [2 x [2 x <2 x i32>]]]* @dTile, i32 0, i32 1), i32 0, i32 1), i32 0, i32 1), i32 0, i32 1), align 4 diff --git a/llvm/test/CodeGen/DirectX/scalarize-alloca.ll b/llvm/test/CodeGen/DirectX/scalarize-alloca.ll index 475935d2eb135..85e3bb0185e44 100644 --- a/llvm/test/CodeGen/DirectX/scalarize-alloca.ll +++ b/llvm/test/CodeGen/DirectX/scalarize-alloca.ll @@ -48,7 +48,7 @@ define void @subtype_array_test() { ; SCHECK: [[alloca_val:%.*]] = alloca [8 x [4 x i32]], align 4 ; FCHECK: [[alloca_val:%.*]] = alloca [32 x i32], align 4 ; CHECK: [[tid:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0) - ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [8 x [4 x i32]], ptr [[alloca_val]], i32 0, i32 [[tid]] + ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [4 x i32], ptr [[alloca_val]], i32 [[tid]] ; FCHECK: [[flatidx_mul:%.*]] = mul i32 [[tid]], 4 ; FCHECK: [[flatidx:%.*]] = add i32 0, [[flatidx_mul]] ; FCHECK: [[gep:%.*]] = getelementptr inbounds nuw [32 x i32], ptr [[alloca_val]], i32 0, i32 [[flatidx]] @@ -64,7 +64,7 @@ define void @subtype_vector_test() { ; SCHECK: [[alloca_val:%.*]] = alloca [8 x [4 x i32]], align 4 ; FCHECK: [[alloca_val:%.*]] = alloca [32 x i32], align 4 ; CHECK: [[tid:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0) - ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [8 x [4 x i32]], ptr [[alloca_val]], i32 0, i32 [[tid]] + ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [4 x i32], ptr [[alloca_val]], i32 [[tid]] ; FCHECK: [[flatidx_mul:%.*]] = mul i32 [[tid]], 4 ; FCHECK: [[flatidx:%.*]] = add i32 0, [[flatidx_mul]] ; FCHECK: [[gep:%.*]] = getelementptr inbounds nuw [32 x i32], ptr [[alloca_val]], i32 0, i32 [[flatidx]] @@ -80,7 +80,7 @@ define void @subtype_scalar_test() { ; SCHECK: [[alloca_val:%.*]] = alloca [8 x [4 x i32]], align 4 ; FCHECK: [[alloca_val:%.*]] = alloca [32 x i32], align 4 ; CHECK: [[tid:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0) - ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [8 x [4 x i32]], ptr [[alloca_val]], i32 0, i32 0, i32 [[tid]] + ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw i32, ptr [[alloca_val]], i32 [[tid]] ; FCHECK: [[flatidx_mul:%.*]] = mul i32 [[tid]], 1 ; FCHECK: [[flatidx:%.*]] = add i32 0, [[flatidx_mul]] ; FCHECK: [[gep:%.*]] = getelementptr inbounds nuw [32 x i32], ptr [[alloca_val]], i32 0, i32 [[flatidx]] diff --git a/llvm/test/CodeGen/DirectX/scalarize-global.ll b/llvm/test/CodeGen/DirectX/scalarize-global.ll index ca10f6ece5a85..c27dc4083bfd3 100644 --- a/llvm/test/CodeGen/DirectX/scalarize-global.ll +++ b/llvm/test/CodeGen/DirectX/scalarize-global.ll @@ -11,7 +11,7 @@ ; CHECK-LABEL: subtype_array_test define <4 x i32> @subtype_array_test() { ; CHECK: [[tid:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0) - ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [8 x [4 x i32]], ptr addrspace(3) [[arrayofVecData]], i32 0, i32 [[tid]] + ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [4 x i32], ptr addrspace(3) [[arrayofVecData]], i32 [[tid]] ; FCHECK: [[flatidx_mul:%.*]] = mul i32 [[tid]], 4 ; FCHECK: [[flatidx:%.*]] = add i32 0, [[flatidx_mul]] ; FCHECK: [[gep:%.*]] = getelementptr inbounds nuw [32 x i32], ptr addrspace(3) [[arrayofVecData]], i32 0, i32 [[flatidx]] @@ -26,7 +26,7 @@ define <4 x i32> @subtype_array_test() { ; CHECK-LABEL: subtype_vector_test define <4 x i32> @subtype_vector_test() { ; CHECK: [[tid:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0) - ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [8 x [4 x i32]], ptr addrspace(3) [[arrayofVecData]], i32 0, i32 [[tid]] + ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [4 x i32], ptr addrspace(3) [[arrayofVecData]], i32 [[tid]] ; FCHECK: [[flatidx_mul:%.*]] = mul i32 [[tid]], 4 ; FCHECK: [[flatidx:%.*]] = add i32 0, [[flatidx_mul]] ; FCHECK: [[gep:%.*]] = getelementptr inbounds nuw [32 x i32], ptr addrspace(3) [[arrayofVecData]], i32 0, i32 [[flatidx]] @@ -41,7 +41,7 @@ define <4 x i32> @subtype_vector_test() { ; CHECK-LABEL: subtype_scalar_test define <4 x i32> @subtype_scalar_test() { ; CHECK: [[tid:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0) - ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [8 x [4 x i32]], ptr addrspace(3) [[arrayofVecData]], i32 0, i32 0, i32 [[tid]] + ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(3) [[arrayofVecData]], i32 [[tid]] ; FCHECK: [[flatidx_mul:%.*]] = mul i32 [[tid]], 1 ; FCHECK: [[flatidx:%.*]] = add i32 0, [[flatidx_mul]] ; FCHECK: [[gep:%.*]] = getelementptr inbounds nuw [32 x i32], ptr addrspace(3) [[arrayofVecData]], i32 0, i32 [[flatidx]] From 4a0d4850d77c13b71cd0bdd40b38a5afc46fb62b Mon Sep 17 00:00:00 2001 From: Erick Velez Date: Mon, 24 Nov 2025 11:01:34 -0800 Subject: [PATCH 34/37] [clang-doc] Add definition information to class templates (#169109) --- .../clang-doc/assets/class-template.mustache | 1 + clang-tools-extra/test/clang-doc/namespace.cpp | 9 ++++----- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/clang-tools-extra/clang-doc/assets/class-template.mustache b/clang-tools-extra/clang-doc/assets/class-template.mustache index c5187026a2399..bbac3ffae02fb 100644 --- a/clang-tools-extra/clang-doc/assets/class-template.mustache +++ b/clang-tools-extra/clang-doc/assets/class-template.mustache @@ -140,6 +140,7 @@

    {{TagType}} {{Name}}

    +

    Defined at line {{Location.LineNumber}} of file {{Location.Filename}}

    {{#Description}}
    {{>Comments}} diff --git a/clang-tools-extra/test/clang-doc/namespace.cpp b/clang-tools-extra/test/clang-doc/namespace.cpp index 28f9556b86218..adf7ab7d946ab 100644 --- a/clang-tools-extra/test/clang-doc/namespace.cpp +++ b/clang-tools-extra/test/clang-doc/namespace.cpp @@ -58,7 +58,6 @@ // COM: FIXME: Add global functions to the namespace template // COM: FIXME: Add namespaces to the namespace template -// COM: FIXME: Add class definition location to class template // Anonymous Namespace namespace { @@ -70,7 +69,7 @@ void anonFunction() {} class AnonClass {}; // MD-ANON-CLASS-LINE: *Defined at {{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp#[[@LINE-1]]* // HTML-ANON-CLASS-LINE:

    Defined at line [[@LINE-2]] of file {{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp

    -// MUSTACHE-ANON-CLASS-LINE-NOT:

    Defined at line [[@LINE-3]] of file {{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp

    +// MUSTACHE-ANON-CLASS-LINE:

    Defined at line [[@LINE-3]] of file {{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp

    // MD-ANON-CLASS: # class AnonClass // HTML-ANON-CLASS:

    class AnonClass

    @@ -117,7 +116,7 @@ void functionInPrimaryNamespace() {} class ClassInPrimaryNamespace {}; // MD-PRIMARY-CLASS-LINE: *Defined at {{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp#[[@LINE-1]]* // HTML-PRIMARY-CLASS-LINE:

    Defined at line [[@LINE-2]] of file {{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp

    -// MUSTACHE-PRIMARY-CLASS-LINE-NOT:

    Defined at line [[@LINE-3]] of file {{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp

    +// MUSTACHE-PRIMARY-CLASS-LINE:

    Defined at line [[@LINE-3]] of file {{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp

    // MD-PRIMARY-CLASS: # class ClassInPrimaryNamespace // MD-PRIMARY-CLASS: Class in PrimaryNamespace @@ -139,7 +138,7 @@ void functionInNestedNamespace() {} class ClassInNestedNamespace {}; // MD-NESTED-CLASS-LINE: *Defined at {{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp#[[@LINE-1]]* // HTML-NESTED-CLASS-LINE:

    Defined at line [[@LINE-2]] of file {{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp

    -// MUSTACHE-NESTED-CLASS-LINE-NOT:

    Defined at line [[@LINE-3]] of file {{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp

    +// MUSTACHE-NESTED-CLASS-LINE:

    Defined at line [[@LINE-3]] of file {{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp

    // MD-NESTED-CLASS: # class ClassInNestedNamespace // MD-NESTED-CLASS: Class in NestedNamespace @@ -233,7 +232,7 @@ void functionInAnotherNamespace() {} class ClassInAnotherNamespace {}; // MD-ANOTHER-CLASS-LINE: *Defined at {{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp#[[@LINE-1]]* // HTML-ANOTHER-CLASS-LINE:

    Defined at line [[@LINE-2]] of file {{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp

    -// MUSTACHE-ANOTHER-CLASS-LINE-NOT:

    Defined at line [[@LINE-3]] of file {{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp

    +// MUSTACHE-ANOTHER-CLASS-LINE:

    Defined at line [[@LINE-3]] of file {{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp

    // MD-ANOTHER-CLASS: # class ClassInAnotherNamespace // MD-ANOTHER-CLASS: Class in AnotherNamespace From 658675fad794197a2a41207b8e4b422becd78f28 Mon Sep 17 00:00:00 2001 From: Erich Keane Date: Mon, 24 Nov 2025 11:06:15 -0800 Subject: [PATCH 35/37] [OpenACC][CIR] 'device_resident' clause lowering for local declare (#169389) Just like the last handful of clauses, this is a pretty simple one, doing device_resident (Entry op: declare_device_resident, and exit: delete). This should be the last of the 'local' declare patches. --- clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp | 16 +- clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp | 12 ++ .../CodeGenOpenACC/declare-deviceresident.cpp | 199 ++++++++++++++++++ 3 files changed, 219 insertions(+), 8 deletions(-) create mode 100644 clang/test/CIR/CodeGenOpenACC/declare-deviceresident.cpp diff --git a/clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp b/clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp index 9c1aeb87c8029..41a193e4d85c5 100644 --- a/clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp @@ -19,12 +19,9 @@ using namespace clang::CIRGen; namespace { struct OpenACCDeclareCleanup final : EHScopeStack::Cleanup { - SourceRange declareRange; mlir::acc::DeclareEnterOp enterOp; - OpenACCDeclareCleanup(SourceRange declareRange, - mlir::acc::DeclareEnterOp enterOp) - : declareRange(declareRange), enterOp(enterOp) {} + OpenACCDeclareCleanup(mlir::acc::DeclareEnterOp enterOp) : enterOp(enterOp) {} template void createOutOp(CIRGenFunction &cgf, InTy inOp) { @@ -78,8 +75,11 @@ struct OpenACCDeclareCleanup final : EHScopeStack::Cleanup { createOutOp(cgf, create); break; } - } else if (auto create = val.getDefiningOp()) { - createOutOp(cgf, create); + } else if (auto present = val.getDefiningOp()) { + createOutOp(cgf, present); + } else if (auto dev_res = + val.getDefiningOp()) { + createOutOp(cgf, dev_res); } else if (val.getDefiningOp()) { // Link has no exit clauses, and shouldn't be copied. continue; @@ -87,7 +87,7 @@ struct OpenACCDeclareCleanup final : EHScopeStack::Cleanup { // DevicePtr has no exit clauses, and shouldn't be copied. continue; } else { - cgf.cgm.errorNYI(declareRange, "OpenACC local declare clause cleanup"); + llvm_unreachable("OpenACC local declare clause unexpected defining op"); continue; } exitOp.getDataClauseOperandsMutable().append(val); @@ -106,7 +106,7 @@ void CIRGenFunction::emitOpenACCDeclare(const OpenACCDeclareDecl &d) { d.clauses()); ehStack.pushCleanup(CleanupKind::NormalCleanup, - d.getSourceRange(), enterOp); + enterOp); } void CIRGenFunction::emitOpenACCRoutine(const OpenACCRoutineDecl &d) { diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp index a23ec93ab1d75..60a089fe0e936 100644 --- a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp @@ -1135,6 +1135,18 @@ class OpenACCClauseCIREmitter final llvm_unreachable("Unknown construct kind in VisitReductionClause"); } } + + void VisitDeviceResidentClause(const OpenACCDeviceResidentClause &clause) { + if constexpr (isOneOfTypes) { + for (const Expr *var : clause.getVarList()) + addDataOperand( + var, mlir::acc::DataClause::acc_declare_device_resident, {}, + /*structured=*/true, + /*implicit=*/false); + } else { + llvm_unreachable("Unknown construct kind in VisitDeviceResidentClause"); + } + } }; template diff --git a/clang/test/CIR/CodeGenOpenACC/declare-deviceresident.cpp b/clang/test/CIR/CodeGenOpenACC/declare-deviceresident.cpp new file mode 100644 index 0000000000000..dbec4f22a1bb3 --- /dev/null +++ b/clang/test/CIR/CodeGenOpenACC/declare-deviceresident.cpp @@ -0,0 +1,199 @@ +// RUN: %clang_cc1 -fopenacc -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir %s -o - | FileCheck %s + +struct HasSideEffects { + HasSideEffects(); + ~HasSideEffects(); +}; + +// TODO: OpenACC: Implement 'global', NS lowering. + +struct Struct { + static const HasSideEffects StaticMemHSE; + static const HasSideEffects StaticMemHSEArr[5]; + static const int StaticMemInt; + + // TODO: OpenACC: Implement static-local lowering. + + void MemFunc1(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEPtr) { + // CHECK: cir.func {{.*}}MemFunc1{{.*}}(%{{.*}}: !cir.ptr{{.*}}, %[[ARG_HSE:.*]]: !rec_HasSideEffects{{.*}}, %[[ARG_INT:.*]]: !s32i {{.*}}, %[[ARG_HSE_PTR:.*]]: !cir.ptr{{.*}}) + // CHECK-NEXT: cir.alloca{{.*}}["this" + // CHECK-NEXT: %[[ARG_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["ArgHSE" + // CHECK-NEXT: %[[ARG_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["ArgInt + // CHECK-NEXT: %[[ARG_HSE_PTR_ALLOCA:.*]] = cir.alloca !cir.ptr{{.*}}["ArgHSEPtr" + // CHECK-NEXT: %[[LOC_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["LocalHSE + // CHECK-NEXT: %[[LOC_HSE_ARR_ALLOCA:.*]] = cir.alloca !cir.array{{.*}}["LocalHSEArr + // CHECK-NEXT: %[[LOC_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["LocalInt + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.load + + HasSideEffects LocalHSE; + // CHECK-NEXT: cir.call{{.*}} : (!cir.ptr) -> () + HasSideEffects LocalHSEArr[5]; + int LocalInt; + +#pragma acc declare device_resident(ArgHSE, ArgInt, LocalHSE, LocalInt, ArgHSEPtr[1:1], LocalHSEArr[1:1]) + // CHECK: %[[ARG_HSE_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[ARG_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "ArgHSE"} + // CHECK-NEXT: %[[ARG_INT_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[ARG_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "ArgInt"} + // CHECK-NEXT: %[[LOC_HSE_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "LocalHSE"} + // CHECK-NEXT: %[[LOC_INT_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "LocalInt"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) + // CHECK-NEXT: %[[ARG_HSE_PTR_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[ARG_HSE_PTR_ALLOCA]] : !cir.ptr>) bounds(%[[BOUND1]]) -> !cir.ptr> {name = "ArgHSEPtr[1:1]"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) + // CHECK-NEXT: %[[LOC_HSE_ARR_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[LOC_HSE_ARR_ALLOCA]] : !cir.ptr>) bounds(%[[BOUND2]]) -> !cir.ptr> {name = "LocalHSEArr[1:1]"} + // CHECK-NEXT: %[[ENTER:.*]] = acc.declare_enter dataOperands(%[[ARG_HSE_DEV_RES]], %[[ARG_INT_DEV_RES]], %[[LOC_HSE_DEV_RES]], %[[LOC_INT_DEV_RES]], %[[ARG_HSE_PTR_DEV_RES]], %[[LOC_HSE_ARR_DEV_RES]] : !cir.ptr, !cir.ptr, !cir.ptr, !cir.ptr, !cir.ptr>, !cir.ptr>) + // + // CHECK-NEXT: acc.declare_exit token(%[[ENTER]]) dataOperands(%[[ARG_HSE_DEV_RES]], %[[ARG_INT_DEV_RES]], %[[LOC_HSE_DEV_RES]], %[[LOC_INT_DEV_RES]], %[[ARG_HSE_PTR_DEV_RES]], %[[LOC_HSE_ARR_DEV_RES]] : !cir.ptr, !cir.ptr, !cir.ptr, !cir.ptr, !cir.ptr>, !cir.ptr>) + // CHECK-NEXT: acc.delete accPtr(%[[ARG_HSE_DEV_RES]] : !cir.ptr) {dataClause = #acc, name = "ArgHSE"} + // CHECK-NEXT: acc.delete accPtr(%[[ARG_INT_DEV_RES]] : !cir.ptr) {dataClause = #acc, name = "ArgInt"} + // CHECK-NEXT: acc.delete accPtr(%[[LOC_HSE_DEV_RES]] : !cir.ptr) {dataClause = #acc, name = "LocalHSE"} + // CHECK-NEXT: acc.delete accPtr(%[[LOC_INT_DEV_RES]] : !cir.ptr) {dataClause = #acc, name = "LocalInt"} + // CHECK-NEXT: acc.delete accPtr(%[[ARG_HSE_PTR_DEV_RES]] : !cir.ptr>) bounds(%[[BOUND1]]) {dataClause = #acc, name = "ArgHSEPtr[1:1]"} + // CHECK-NEXT: acc.delete accPtr(%[[LOC_HSE_ARR_DEV_RES]] : !cir.ptr>) bounds(%[[BOUND2]]) {dataClause = #acc, name = "LocalHSEArr[1:1]"} + } + void MemFunc2(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEPtr); +}; + +void use() { + Struct s; + s.MemFunc1(HasSideEffects{}, 0, nullptr); +} + +void Struct::MemFunc2(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEPtr) { + // CHECK: cir.func {{.*}}MemFunc2{{.*}}(%{{.*}}: !cir.ptr{{.*}}, %[[ARG_HSE:.*]]: !rec_HasSideEffects{{.*}}, %[[ARG_INT:.*]]: !s32i {{.*}}, %[[ARG_HSE_PTR:.*]]: !cir.ptr{{.*}}) + // CHECK-NEXT: cir.alloca{{.*}}["this" + // CHECK-NEXT: %[[ARG_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["ArgHSE" + // CHECK-NEXT: %[[ARG_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["ArgInt + // CHECK-NEXT: %[[ARG_HSE_PTR_ALLOCA:.*]] = cir.alloca !cir.ptr{{.*}}["ArgHSEPtr" + // CHECK-NEXT: %[[LOC_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["LocalHSE + // CHECK-NEXT: %[[LOC_HSE_ARR_ALLOCA:.*]] = cir.alloca !cir.array{{.*}}["LocalHSEArr + // CHECK-NEXT: %[[LOC_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["LocalInt + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.load + HasSideEffects LocalHSE; + // CHECK-NEXT: cir.call{{.*}} : (!cir.ptr) -> () + HasSideEffects LocalHSEArr[5]; + // CHECK: do { + // CHECK: } while { + // CHECK: } + int LocalInt; +#pragma acc declare device_resident(ArgHSE, ArgInt, ArgHSEPtr[1:1]) + // CHECK: %[[ARG_HSE_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[ARG_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "ArgHSE"} + // CHECK-NEXT: %[[ARG_INT_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[ARG_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "ArgInt"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) + // CHECK-NEXT: %[[ARG_HSE_PTR_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[ARG_HSE_PTR_ALLOCA]] : !cir.ptr>) bounds(%[[BOUND1]]) -> !cir.ptr> {name = "ArgHSEPtr[1:1]"} + // CHECK-NEXT: %[[ENTER1:.*]] = acc.declare_enter dataOperands(%[[ARG_HSE_DEV_RES]], %[[ARG_INT_DEV_RES]], %[[ARG_HSE_PTR_DEV_RES]] : !cir.ptr, !cir.ptr, !cir.ptr>) + +#pragma acc declare device_resident(LocalHSE, LocalInt, LocalHSEArr[1:1]) + // CHECK-NEXT: %[[LOC_HSE_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "LocalHSE"} + // CHECK-NEXT: %[[LOC_INT_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "LocalInt"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) + // CHECK-NEXT: %[[LOC_HSE_ARR_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[LOC_HSE_ARR_ALLOCA]] : !cir.ptr>) bounds(%[[BOUND2]]) -> !cir.ptr> {name = "LocalHSEArr[1:1]"} + // CHECK-NEXT: %[[ENTER2:.*]] = acc.declare_enter dataOperands(%[[LOC_HSE_DEV_RES]], %[[LOC_INT_DEV_RES]], %[[LOC_HSE_ARR_DEV_RES]] : !cir.ptr, !cir.ptr, !cir.ptr>) + + // CHECK-NEXT: acc.declare_exit token(%[[ENTER2]]) dataOperands(%[[LOC_HSE_DEV_RES]], %[[LOC_INT_DEV_RES]], %[[LOC_HSE_ARR_DEV_RES]] : !cir.ptr, !cir.ptr, !cir.ptr>) + // CHECK-NEXT: acc.delete accPtr(%[[LOC_HSE_DEV_RES]] : !cir.ptr) {dataClause = #acc, name = "LocalHSE"} + // CHECK-NEXT: acc.delete accPtr(%[[LOC_INT_DEV_RES]] : !cir.ptr) {dataClause = #acc, name = "LocalInt"} + // CHECK-NEXT: acc.delete accPtr(%[[LOC_HSE_ARR_DEV_RES]] : !cir.ptr>) bounds(%[[BOUND2]]) {dataClause = #acc, name = "LocalHSEArr[1:1]"} + // + // CHECK-NEXT: acc.declare_exit token(%[[ENTER1]]) dataOperands(%[[ARG_HSE_DEV_RES]], %[[ARG_INT_DEV_RES]], %[[ARG_HSE_PTR_DEV_RES]] : !cir.ptr, !cir.ptr, !cir.ptr>) + // CHECK-NEXT: acc.delete accPtr(%[[ARG_HSE_DEV_RES]] : !cir.ptr) {dataClause = #acc, name = "ArgHSE"} + // CHECK-NEXT: acc.delete accPtr(%[[ARG_INT_DEV_RES]] : !cir.ptr) {dataClause = #acc, name = "ArgInt"} + // CHECK-NEXT: acc.delete accPtr(%[[ARG_HSE_PTR_DEV_RES]] : !cir.ptr>) bounds(%[[BOUND1]]) {dataClause = #acc, name = "ArgHSEPtr[1:1]"} +} + +extern "C" void do_thing(); + +extern "C" void NormalFunc(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEPtr) { + // CHECK: cir.func {{.*}}NormalFunc(%[[ARG_HSE:.*]]: !rec_HasSideEffects{{.*}}, %[[ARG_INT:.*]]: !s32i {{.*}}, %[[ARG_HSE_PTR:.*]]: !cir.ptr{{.*}}) + // CHECK-NEXT: %[[ARG_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["ArgHSE" + // CHECK-NEXT: %[[ARG_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["ArgInt + // CHECK-NEXT: %[[ARG_HSE_PTR_ALLOCA:.*]] = cir.alloca !cir.ptr{{.*}}["ArgHSEPtr" + // CHECK-NEXT: %[[LOC_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["LocalHSE + // CHECK-NEXT: %[[LOC_HSE_ARR_ALLOCA:.*]] = cir.alloca !cir.array{{.*}}["LocalHSEArr + // CHECK-NEXT: %[[LOC_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["LocalInt + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + HasSideEffects LocalHSE; + // CHECK-NEXT: cir.call{{.*}} : (!cir.ptr) -> () + HasSideEffects LocalHSEArr[5]; + // CHECK: do { + // CHECK: } while { + // CHECK: } + int LocalInt; +#pragma acc declare device_resident(ArgHSE, ArgInt, ArgHSEPtr[1:1]) + // CHECK: %[[ARG_HSE_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[ARG_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "ArgHSE"} + // CHECK-NEXT: %[[ARG_INT_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[ARG_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "ArgInt"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) + // CHECK-NEXT: %[[ARG_HSE_PTR_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[ARG_HSE_PTR_ALLOCA]] : !cir.ptr>) bounds(%[[BOUND1]]) -> !cir.ptr> {name = "ArgHSEPtr[1:1]"} + // CHECK-NEXT: %[[ENTER1:.*]] = acc.declare_enter dataOperands(%[[ARG_HSE_DEV_RES]], %[[ARG_INT_DEV_RES]], %[[ARG_HSE_PTR_DEV_RES]] : !cir.ptr, !cir.ptr, !cir.ptr>) + { + // CHECK-NEXT: cir.scope { +#pragma acc declare device_resident(LocalHSE, LocalInt, LocalHSEArr[1:1]) + // CHECK-NEXT: %[[LOC_HSE_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "LocalHSE"} + // CHECK-NEXT: %[[LOC_INT_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "LocalInt"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) + // CHECK-NEXT: %[[LOC_HSE_ARR_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[LOC_HSE_ARR_ALLOCA]] : !cir.ptr>) bounds(%[[BOUND2]]) -> !cir.ptr> {name = "LocalHSEArr[1:1]"} + // CHECK-NEXT: %[[ENTER2:.*]] = acc.declare_enter dataOperands(%[[LOC_HSE_DEV_RES]], %[[LOC_INT_DEV_RES]], %[[LOC_HSE_ARR_DEV_RES]] : !cir.ptr, !cir.ptr, !cir.ptr>) + + do_thing(); + // CHECK-NEXT: cir.call @do_thing + // CHECK-NEXT: acc.declare_exit token(%[[ENTER2]]) dataOperands(%[[LOC_HSE_DEV_RES]], %[[LOC_INT_DEV_RES]], %[[LOC_HSE_ARR_DEV_RES]] : !cir.ptr, !cir.ptr, !cir.ptr>) + // CHECK-NEXT: acc.delete accPtr(%[[LOC_HSE_DEV_RES]] : !cir.ptr) {dataClause = #acc, name = "LocalHSE"} + // CHECK-NEXT: acc.delete accPtr(%[[LOC_INT_DEV_RES]] : !cir.ptr) {dataClause = #acc, name = "LocalInt"} + // CHECK-NEXT: acc.delete accPtr(%[[LOC_HSE_ARR_DEV_RES]] : !cir.ptr>) bounds(%[[BOUND2]]) {dataClause = #acc, name = "LocalHSEArr[1:1]"} + } + // CHECK-NEXT: } + + // Make sure that cleanup gets put in the right scope. + do_thing(); + // CHECK-NEXT: cir.call @do_thing + // CHECK-NEXT: acc.declare_exit token(%[[ENTER1]]) dataOperands(%[[ARG_HSE_DEV_RES]], %[[ARG_INT_DEV_RES]], %[[ARG_HSE_PTR_DEV_RES]] : !cir.ptr, !cir.ptr, !cir.ptr>) + + // CHECK-NEXT: acc.delete accPtr(%[[ARG_HSE_DEV_RES]] : !cir.ptr) {dataClause = #acc, name = "ArgHSE"} + // CHECK-NEXT: acc.delete accPtr(%[[ARG_INT_DEV_RES]] : !cir.ptr) {dataClause = #acc, name = "ArgInt"} + // CHECK-NEXT: acc.delete accPtr(%[[ARG_HSE_PTR_DEV_RES]] : !cir.ptr>) bounds(%[[BOUND1]]) {dataClause = #acc, name = "ArgHSEPtr[1:1]"} +} + From 0549aa11c2c1b619c673a0644a25f939bf13746f Mon Sep 17 00:00:00 2001 From: Michael Buch Date: Tue, 25 Nov 2025 04:15:08 +0900 Subject: [PATCH 36/37] [llvm][dsymutil] Use the DW_AT_name of the uniqued DIE for insertion into .debug_names (#168513) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Depends on: * https://github.com/llvm/llvm-project/pull/168895 Note, the last commit is the one with the actual fix. The others are drive-by/test changes We've been seeing dsymutil verification failures like: ``` error: Name Index @ 0x0: Entry @ 0x11949d: mismatched Name of DIE @ 0x9c644c: index - apply<(lambda at /some/build/dir/lib/LLVMSupport/include/llvm/Support/Error.h:1070:35)>; debug_info - apply<(lambda at /some/build/dir/lib/LLVMCustom/include/llvm/Support/Error.h:1070:35)> apply, _ZN11custom_llvm18ErrorHandlerTraitsIRFvRNS_13ErrorInfoBaseEEE5applyIZNS_12consumeErrorENS_5ErrorEEUlRKS1_E_EES7_OT_NSt3__110unique_ptrIS1_NSD_14default_deleteIS1_EEEE. ``` Not how the name of the DIE has a different lambda path than the one that was used to insert the DIE into debug_names. The root cause of the issue is that we have a DW_AT_subprogram definition whose DW_AT_specification DIE got deduplicated. But the DW_AT_name of the original specification is different than the one it got uniqued to. That’s technically fine because dsymutil uniques by linkage name, which uniquely identifies any function with non-internal linkage. But we insert the definition DIE into the debug-names table using the DW_AT_name of the original specification (we call `getDIENames(InputDIE…)`). But what we really want to do is use the name of the adjusted `DW_AT_specifcation` (i.e., the `DW_AT_specification` of the output DIE). That’s not as simple as it sounds because we can’t just get ahold of the DIE in the output CU. We have to grab the ODR `DeclContext` of the input DIE’s specification. That is the only link back to the canonical specification DIE. For that to be of any use, we have to stash the `DW_AT_name` into `DeclContext` so we can use it in `getDIENames`. We have to account for the possibility of multiple levels of `DW_AT_specification`/`DW_AT_abstract_origin`. So my proposed solution is to recursively scan the referenced DIE’s, grab the canonical DIE for those and get the name from the `DeclContext` (if none exists then use the `DW_AT_name` of the DIE itself). One remaining question is whether we need to handle the case where a DIE has a `DW_AT_specification` *and* a `DW_AT_abstract_origin`? That complicates the way we locate `DW_AT_name`. We'd have to adjust `getCanonicalDIEName` to handle this. But it's not clear what a `DW_AT_name` would be for such cases. Worst case at the moment we take the wrong path up the specifications and don't find any `DW_AT_name`, and don't end up indexing that DIE. Something to keep an eye out for. rdar://149239553 --- .../llvm/DWARFLinker/Classic/DWARFLinker.h | 6 +- .../Classic/DWARFLinkerDeclContext.h | 14 ++-- llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp | 76 ++++++++++++++++-- .../Classic/DWARFLinkerDeclContext.cpp | 32 ++++---- .../AArch64/dummy-debug-map-arm64.map | 4 + .../AArch64/dwarf5-str-offsets-base-strx.test | 2 +- .../tools/dsymutil/AArch64/inlined-low_pc.c | 4 +- .../odr-uniquing-DW_AT_name-conflict.test | 28 +++++++ .../odr-uniquing-DW_AT_name-conflict/1.o | Bin 0 -> 2416 bytes .../odr-uniquing-DW_AT_name-conflict/2.o | Bin 0 -> 2416 bytes .../odr-uniquing-DW_AT_name-conflict/lib1.cpp | 5 ++ .../odr-uniquing-DW_AT_name-conflict/lib1.h | 3 + .../odr-uniquing-DW_AT_name-conflict/lib2.cpp | 5 ++ .../odr-uniquing-DW_AT_name-conflict/main.cpp | 6 ++ 14 files changed, 155 insertions(+), 30 deletions(-) create mode 100644 llvm/test/tools/dsymutil/AArch64/odr-uniquing-DW_AT_name-conflict.test create mode 100644 llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/1.o create mode 100644 llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/2.o create mode 100644 llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/lib1.cpp create mode 100644 llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/lib1.h create mode 100644 llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/lib2.cpp create mode 100644 llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/main.cpp diff --git a/llvm/include/llvm/DWARFLinker/Classic/DWARFLinker.h b/llvm/include/llvm/DWARFLinker/Classic/DWARFLinker.h index 5b9535380aebf..d6f5d926f022c 100644 --- a/llvm/include/llvm/DWARFLinker/Classic/DWARFLinker.h +++ b/llvm/include/llvm/DWARFLinker/Classic/DWARFLinker.h @@ -708,7 +708,11 @@ class LLVM_ABI DWARFLinker : public DWARFLinkerBase { /// already there. /// \returns is a name was found. bool getDIENames(const DWARFDie &Die, AttributesInfo &Info, - OffsetsStringPool &StringPool, bool StripTemplate = false); + OffsetsStringPool &StringPool, const DWARFFile &File, + CompileUnit &Unit, bool StripTemplate = false); + + llvm::StringRef getCanonicalDIEName(DWARFDie Die, const DWARFFile &File, + CompileUnit *Unit); uint32_t hashFullyQualifiedName(DWARFDie DIE, CompileUnit &U, const DWARFFile &File, diff --git a/llvm/include/llvm/DWARFLinker/Classic/DWARFLinkerDeclContext.h b/llvm/include/llvm/DWARFLinker/Classic/DWARFLinkerDeclContext.h index 9fb1b3f80e2ff..5ced6d05cc231 100644 --- a/llvm/include/llvm/DWARFLinker/Classic/DWARFLinkerDeclContext.h +++ b/llvm/include/llvm/DWARFLinker/Classic/DWARFLinkerDeclContext.h @@ -84,11 +84,13 @@ class DeclContext { DeclContext() : DefinedInClangModule(0), Parent(*this) {} DeclContext(unsigned Hash, uint32_t Line, uint32_t ByteSize, uint16_t Tag, - StringRef Name, StringRef File, const DeclContext &Parent, - DWARFDie LastSeenDIE = DWARFDie(), unsigned CUId = 0) + StringRef Name, StringRef NameForUniquing, StringRef File, + const DeclContext &Parent, DWARFDie LastSeenDIE = DWARFDie(), + unsigned CUId = 0) : QualifiedNameHash(Hash), Line(Line), ByteSize(ByteSize), Tag(Tag), - DefinedInClangModule(0), Name(Name), File(File), Parent(Parent), - LastSeenDIE(LastSeenDIE), LastSeenCompileUnitID(CUId) {} + DefinedInClangModule(0), Name(Name), NameForUniquing(NameForUniquing), + File(File), Parent(Parent), LastSeenDIE(LastSeenDIE), + LastSeenCompileUnitID(CUId) {} uint32_t getQualifiedNameHash() const { return QualifiedNameHash; } @@ -100,6 +102,7 @@ class DeclContext { uint32_t getCanonicalDIEOffset() const { return CanonicalDIEOffset; } void setCanonicalDIEOffset(uint32_t Offset) { CanonicalDIEOffset = Offset; } + llvm::StringRef getCanonicalName() const { return Name; } bool isDefinedInClangModule() const { return DefinedInClangModule; } void setDefinedInClangModule(bool Val) { DefinedInClangModule = Val; } @@ -115,6 +118,7 @@ class DeclContext { uint16_t Tag = dwarf::DW_TAG_compile_unit; unsigned DefinedInClangModule : 1; StringRef Name; + StringRef NameForUniquing; StringRef File; const DeclContext &Parent; DWARFDie LastSeenDIE; @@ -180,7 +184,7 @@ struct DeclMapInfo : private DenseMapInfo { return RHS == LHS; return LHS->QualifiedNameHash == RHS->QualifiedNameHash && LHS->Line == RHS->Line && LHS->ByteSize == RHS->ByteSize && - LHS->Name.data() == RHS->Name.data() && + LHS->NameForUniquing.data() == RHS->NameForUniquing.data() && LHS->File.data() == RHS->File.data() && LHS->Parent.QualifiedNameHash == RHS->Parent.QualifiedNameHash; } diff --git a/llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp b/llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp index 8637b55c78f9c..daf3788639451 100644 --- a/llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp +++ b/llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp @@ -151,22 +151,84 @@ static bool isTypeTag(uint16_t Tag) { return false; } -bool DWARFLinker::DIECloner::getDIENames(const DWARFDie &Die, - AttributesInfo &Info, - OffsetsStringPool &StringPool, - bool StripTemplate) { +/// Recurse through the input DIE's canonical references until we find a +/// DW_AT_name. +llvm::StringRef +DWARFLinker::DIECloner::getCanonicalDIEName(DWARFDie Die, const DWARFFile &File, + CompileUnit *Unit) { + if (!Die) + return {}; + + std::optional Ref; + + auto GetDieName = [](const DWARFDie &D) -> llvm::StringRef { + auto NameForm = D.find(llvm::dwarf::DW_AT_name); + if (!NameForm) + return {}; + + auto NameOrErr = NameForm->getAsCString(); + if (!NameOrErr) { + llvm::consumeError(NameOrErr.takeError()); + return {}; + } + + return *NameOrErr; + }; + + llvm::StringRef Name = GetDieName(Die); + if (!Name.empty()) + return Name; + + while (true) { + if (!(Ref = Die.find(llvm::dwarf::DW_AT_specification)) && + !(Ref = Die.find(llvm::dwarf::DW_AT_abstract_origin))) + break; + + Die = Linker.resolveDIEReference(File, CompileUnits, *Ref, Die, Unit); + if (!Die) + break; + + assert(Unit); + + unsigned SpecIdx = Unit->getOrigUnit().getDIEIndex(Die); + CompileUnit::DIEInfo &SpecInfo = Unit->getInfo(SpecIdx); + if (SpecInfo.Ctxt && SpecInfo.Ctxt->hasCanonicalDIE()) { + if (!SpecInfo.Ctxt->getCanonicalName().empty()) { + Name = SpecInfo.Ctxt->getCanonicalName(); + break; + } + } + + Name = GetDieName(Die); + if (!Name.empty()) + break; + } + + return Name; +} + +bool DWARFLinker::DIECloner::getDIENames( + const DWARFDie &Die, AttributesInfo &Info, OffsetsStringPool &StringPool, + const DWARFFile &File, CompileUnit &Unit, bool StripTemplate) { // This function will be called on DIEs having low_pcs and // ranges. As getting the name might be more expansive, filter out // blocks directly. if (Die.getTag() == dwarf::DW_TAG_lexical_block) return false; + // The mangled name of an specification DIE will by virtue of the + // uniquing algorithm be the same as the one it got uniqued into. + // So just use the input DIE's linkage name. if (!Info.MangledName) if (const char *MangledName = Die.getLinkageName()) Info.MangledName = StringPool.getEntry(MangledName); + // For subprograms with linkage names, we unique on the linkage name, + // so DW_AT_name's may differ between the input and canonical DIEs. + // Use the name of the canonical DIE. if (!Info.Name) - if (const char *Name = Die.getShortName()) + if (llvm::StringRef Name = getCanonicalDIEName(Die, File, &Unit); + !Name.empty()) Info.Name = StringPool.getEntry(Name); if (!Info.MangledName) @@ -1939,7 +2001,7 @@ DIE *DWARFLinker::DIECloner::cloneDIE(const DWARFDie &InputDIE, // accelerator tables too. For now stick with dsymutil's behavior. if ((Info.InDebugMap || AttrInfo.HasLowPc || AttrInfo.HasRanges) && Tag != dwarf::DW_TAG_compile_unit && - getDIENames(InputDIE, AttrInfo, DebugStrPool, + getDIENames(InputDIE, AttrInfo, DebugStrPool, File, Unit, Tag != dwarf::DW_TAG_inlined_subroutine)) { if (AttrInfo.MangledName && AttrInfo.MangledName != AttrInfo.Name) Unit.addNameAccelerator(Die, AttrInfo.MangledName, @@ -1962,7 +2024,7 @@ DIE *DWARFLinker::DIECloner::cloneDIE(const DWARFDie &InputDIE, } else if (Tag == dwarf::DW_TAG_imported_declaration && AttrInfo.Name) { Unit.addNamespaceAccelerator(Die, AttrInfo.Name); } else if (isTypeTag(Tag) && !AttrInfo.IsDeclaration) { - bool Success = getDIENames(InputDIE, AttrInfo, DebugStrPool); + bool Success = getDIENames(InputDIE, AttrInfo, DebugStrPool, File, Unit); uint64_t RuntimeLang = dwarf::toUnsigned(InputDIE.find(dwarf::DW_AT_APPLE_runtime_class)) .value_or(0); diff --git a/llvm/lib/DWARFLinker/Classic/DWARFLinkerDeclContext.cpp b/llvm/lib/DWARFLinker/Classic/DWARFLinkerDeclContext.cpp index c9c8dddce9c44..66a1ba9c6711f 100644 --- a/llvm/lib/DWARFLinker/Classic/DWARFLinkerDeclContext.cpp +++ b/llvm/lib/DWARFLinker/Classic/DWARFLinkerDeclContext.cpp @@ -84,24 +84,26 @@ DeclContextTree::getChildDeclContext(DeclContext &Context, const DWARFDie &DIE, break; } - StringRef NameRef; + StringRef Name = DIE.getShortName(); + StringRef NameForUniquing; StringRef FileRef; if (const char *LinkageName = DIE.getLinkageName()) - NameRef = StringPool.internString(LinkageName); - else if (const char *ShortName = DIE.getShortName()) - NameRef = StringPool.internString(ShortName); + NameForUniquing = StringPool.internString(LinkageName); + else if (!Name.empty()) + NameForUniquing = StringPool.internString(Name); - bool IsAnonymousNamespace = NameRef.empty() && Tag == dwarf::DW_TAG_namespace; + bool IsAnonymousNamespace = + NameForUniquing.empty() && Tag == dwarf::DW_TAG_namespace; if (IsAnonymousNamespace) { // FIXME: For dsymutil-classic compatibility. I think uniquing within // anonymous namespaces is wrong. There is no ODR guarantee there. - NameRef = "(anonymous namespace)"; + NameForUniquing = "(anonymous namespace)"; } if (Tag != dwarf::DW_TAG_class_type && Tag != dwarf::DW_TAG_structure_type && Tag != dwarf::DW_TAG_union_type && - Tag != dwarf::DW_TAG_enumeration_type && NameRef.empty()) + Tag != dwarf::DW_TAG_enumeration_type && NameForUniquing.empty()) return PointerIntPair(nullptr); unsigned Line = 0; @@ -140,10 +142,10 @@ DeclContextTree::getChildDeclContext(DeclContext &Context, const DWARFDie &DIE, } } - if (!Line && NameRef.empty()) + if (!Line && NameForUniquing.empty()) return PointerIntPair(nullptr); - // We hash NameRef, which is the mangled name, in order to get most + // We hash NameForUniquing, which is the mangled name, in order to get most // overloaded functions resolve correctly. // // Strictly speaking, hashing the Tag is only necessary for a @@ -153,7 +155,8 @@ DeclContextTree::getChildDeclContext(DeclContext &Context, const DWARFDie &DIE, // FIXME: dsymutil-classic won't unique the same type presented // once as a struct and once as a class. Using the Tag in the fully // qualified name hash to get the same effect. - unsigned Hash = hash_combine(Context.getQualifiedNameHash(), Tag, NameRef); + unsigned Hash = + hash_combine(Context.getQualifiedNameHash(), Tag, NameForUniquing); // FIXME: dsymutil-classic compatibility: when we don't have a name, // use the filename. @@ -161,15 +164,16 @@ DeclContextTree::getChildDeclContext(DeclContext &Context, const DWARFDie &DIE, Hash = hash_combine(Hash, FileRef); // Now look if this context already exists. - DeclContext Key(Hash, Line, ByteSize, Tag, NameRef, FileRef, Context); + DeclContext Key(Hash, Line, ByteSize, Tag, Name, NameForUniquing, FileRef, + Context); auto ContextIter = Contexts.find(&Key); if (ContextIter == Contexts.end()) { // The context wasn't found. bool Inserted; - DeclContext *NewContext = - new (Allocator) DeclContext(Hash, Line, ByteSize, Tag, NameRef, FileRef, - Context, DIE, U.getUniqueID()); + DeclContext *NewContext = new (Allocator) + DeclContext(Hash, Line, ByteSize, Tag, Name, NameForUniquing, FileRef, + Context, DIE, U.getUniqueID()); std::tie(ContextIter, Inserted) = Contexts.insert(NewContext); assert(Inserted && "Failed to insert DeclContext"); (void)Inserted; diff --git a/llvm/test/tools/dsymutil/AArch64/dummy-debug-map-arm64.map b/llvm/test/tools/dsymutil/AArch64/dummy-debug-map-arm64.map index 50d860290422c..bd2b2014ee22c 100644 --- a/llvm/test/tools/dsymutil/AArch64/dummy-debug-map-arm64.map +++ b/llvm/test/tools/dsymutil/AArch64/dummy-debug-map-arm64.map @@ -11,9 +11,13 @@ objects: - filename: 1.o symbols: - { sym: _bar, objAddr: 0x0, binAddr: 0x10000, size: 0x10 } + - { sym: __Z13lib1_internalv, objAddr: 0x0, binAddr: 0x10020, size: 0x20 } + - { sym: __ZN3Foo4funcIZ13lib1_internalvE3$_0EEvv, objAddr: 0x0, binAddr: 0x10040, size: 0x20 } - filename: 2.o symbols: - { sym: __Z3foov, objAddr: 0x0, binAddr: 0x20000, size: 0x10 } + - { sym: __Z13lib1_internalv, objAddr: 0x0, binAddr: 0x20020, size: 0x20 } + - { sym: __ZN3Foo4funcIZ13lib1_internalvE3$_0EEvv, objAddr: 0x0, binAddr: 0x20040, size: 0x20 } - filename: 3.o symbols: - { sym: __Z3foov, objAddr: 0x0, binAddr: 0x30000, size: 0x10 } diff --git a/llvm/test/tools/dsymutil/AArch64/dwarf5-str-offsets-base-strx.test b/llvm/test/tools/dsymutil/AArch64/dwarf5-str-offsets-base-strx.test index c0c4fe835682f..c5110a873c603 100644 --- a/llvm/test/tools/dsymutil/AArch64/dwarf5-str-offsets-base-strx.test +++ b/llvm/test/tools/dsymutil/AArch64/dwarf5-str-offsets-base-strx.test @@ -98,7 +98,7 @@ CHECK: DW_AT_str_offsets_base [DW_FORM_sec_offset] (0x000000 CHECK: DW_AT_comp_dir [DW_FORM_strx] (indexed (00000004) string = "/Users/shubham/Development/test109275485") CHECK: DW_TAG_subprogram -CHECK: DW_AT_low_pc [DW_FORM_addrx] (indexed (00000000) address = 0x0000000000010000) +CHECK: DW_AT_low_pc [DW_FORM_addrx] (indexed (00000000) address = 0x0000000000010040) CHECK: DW_AT_linkage_name [DW_FORM_strx] (indexed (00000005) string = "_Z4foo2i") CHECK: DW_AT_name [DW_FORM_strx] (indexed (00000006) string = "foo2") diff --git a/llvm/test/tools/dsymutil/AArch64/inlined-low_pc.c b/llvm/test/tools/dsymutil/AArch64/inlined-low_pc.c index d2d36f675e8b7..b89a6f99ebcb3 100644 --- a/llvm/test/tools/dsymutil/AArch64/inlined-low_pc.c +++ b/llvm/test/tools/dsymutil/AArch64/inlined-low_pc.c @@ -10,10 +10,10 @@ int bar(int a) { return foo(a); } // RUN: llvm-dwarfdump - | FileCheck %s // CHECK: DW_TAG_subprogram -// CHECK: DW_AT_low_pc{{.*}}0x0000000000010000 +// CHECK: DW_AT_low_pc{{.*}}0x0000000000010040 // CHECK: DW_AT_name{{.*}}"bar" // CHECK-NOT: NULL // CHECK: DW_TAG_inlined_subroutine // CHECK-NEXT: DW_AT_abstract_origin{{.*}}"foo" -// CHECK-NEXT: DW_AT_low_pc{{.*}}0x0000000000010000 +// CHECK-NEXT: DW_AT_low_pc{{.*}}0x0000000000010040 diff --git a/llvm/test/tools/dsymutil/AArch64/odr-uniquing-DW_AT_name-conflict.test b/llvm/test/tools/dsymutil/AArch64/odr-uniquing-DW_AT_name-conflict.test new file mode 100644 index 0000000000000..b6edb8bca3194 --- /dev/null +++ b/llvm/test/tools/dsymutil/AArch64/odr-uniquing-DW_AT_name-conflict.test @@ -0,0 +1,28 @@ +# Tests the case where a DW_TAG_subprogram for a method declaration +# got uniqued into a DW_TAG_subprogram with the same linkage name (but +# different DW_AT_name). Make sure the DW_TAG_subprogram DIE for the +# definition, which previously pointed to the now de-deduplicated declaration, +# gets inserted into the .debug_names table using the DW_AT_name of the canonical +# declaration DW_TAG_subprogram. +# +# Object files compiled as follows: +# clang -g -c -o 1.o Inputs/odr-uniquing-DW_AT_name-conflict/lib1.cpp +# clang -g -c -o 2.o Inputs/odr-uniquing-DW_AT_name-conflict/lib2.cpp + +# RUN: dsymutil -f -oso-prepend-path=%p/../Inputs/odr-uniquing-DW_AT_name-conflict -y %p/dummy-debug-map-arm64.map -o - \ +# RUN: | llvm-dwarfdump --verify - | FileCheck %s + +# RUN: dsymutil --linker parallel -f -oso-prepend-path=%p/../Inputs/odr-uniquing-DW_AT_name-conflict -y %p/dummy-debug-map-arm64.map -o - \ +# RUN: | not llvm-dwarfdump --verify - | FileCheck %s --check-prefix=PARALLEL-ODR + +# RUN: dsymutil -f -oso-prepend-path=%p/../Inputs/odr-uniquing-DW_AT_name-conflict -y %p/dummy-debug-map-arm64.map -no-odr -o - \ +# RUN: | llvm-dwarfdump --verify - | FileCheck %s + +# RUN: dsymutil --linker parallel -f -oso-prepend-path=%p/../Inputs/odr-uniquing-DW_AT_name-conflict -y %p/dummy-debug-map-arm64.map -no-odr -o - \ +# RUN: | llvm-dwarfdump --verify - | FileCheck %s + +# CHECK: No errors. + +# FIXME: parallel DWARFLinker uses wrong DW_AT_name when inserting uniqued subprogram into .debug_names +# PARALLEL-ODR: Verifying .debug_names... +# PARALLEL-ODR-NEXT: error: Name Index {{.*}} mismatched Name of DIE diff --git a/llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/1.o b/llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/1.o new file mode 100644 index 0000000000000000000000000000000000000000..5932a3c89aaeb1b87292f3466bd5550f4c685637 GIT binary patch literal 2416 zcma)7O>Ep$5T0kRH@oZ7G~I|K1c57I(SqXrNt9NhGzr^@MP#IT^Yi9? zp7&%RTAAL^mK zFul02Wk*_;D}*PYzLwz0#o$~NLsHi*yVy9T8^vPXZjxPidGdv$t&j-k689Y9`LzN< z_?8*rm7S7H&UUe?K^C)Fxl0=j` zF|F-vOLXpl?ykVb<}$moIR&eiyI=#Ce6+bdc?JEe--`N=cc)Q`NL2 zsu#|vzn@j>eS7)_)t%9S$axtfwwHjmysB9WO0(hB4 z)3xlpX4Gn#DHr1%-^rd@Gsc0Ds+G5nHSuX>r|hI!)TqE_RMdty!&W~1r+s5YX_ z@ULo~^(KVNI{ld!J={X3S~iyqyHaeJOPPXu+NrpP|C04g{UJ*D{1g}unZo!7*43_Dy%hZ2~IGr)r1V>k-II}-Y7 zu0=|ybzFPy2Wng#w!JmvdJmb7V;&_o{y)1PAc09z(x^Ndte`wZ)VrlegvU{bFMAt9 z&s@AXI{`mjeeU+dSLr<1wP|C0YPfLzm0ia^yk7it9Fs}5KG-WIQ{PBTUWqXYzLwOK zd_hRy7k1Pr+E#Z5MO_N^gBcFdzEH-gHY61;Q3STZWdY@G42XzSvK8()LNEp$5T3VQZ}z7&O*bM5LEuWYXhE@eH&I$bX%ez&2}x;01B5?WUfbKO@Y>7T zy9EO3Pc9&Fi9`=Q6@-we)I$y+B5^23dZ-{5ggBKGa6mv${4|sq`*}llL1d(P^Yi9? zp7&@M^Xr)d&N(|qeyKh#5e zVR~_4%Z{`hR|qeL`dX4F7l(6E3`tEhty1H(rk6@}t4Vh8*~zDlwL&7COWbpq=hq4d z;ag^eS8>WNIot8B^T^k^+}I(W=hrp2T=v@Wrh*d(ILz^8MXqcK+J1|o{17kjwc{;w zyfT+7JH*pXGXQMI`;LFTpJ(u6B2?C~9(?Lw^KK)hIHsn}75UOYtnr?l;Ldf#Bi9%Ilb>C2ELzf@-!B~RJjeTuniuJtsQ)_h z3;U1oG%9ji|GmQd<4_0#uOUtwZQFhUC+V!i^4)_M5gCbs$bS>p6Qq;^m`9mGCP_r8 z6VuwxwnXO+=*|jkY%a4an^Um*UKecOl6Nab{`py(=-0XtiQ3f{aVX~YR z1^L1o^3P}Gdf%SDLAhseZ^G-tq!mW$M86{o@>u4P%s4(HLJUSTnGBhjW|oa+1UWKA zg+-x92F!i<@q`L9SwY4}L_}o-*C)GC0Z4_|-=7_r=r^;+(UTS-)-T9m6yRF`BCz1N z8SDcV6QZM!1HB4l#X!Q5Ra0pY1d-vX#YK@ZCUzxRERn~#MAu0iF`5dPVta68u8HHL z$m!T>GDDA97#aq_2s)x$H;($_$e$qJK+a-q`j7{ZCy}XEw@~P0*J`#U8Mf}6mYP=G ztGJF-$SXM|Cmjg9Lxp?}EhV4NDWeCa7apoOzEyX0d&u=(0=rTwC`PRYDgjjt-A4dV zT{K+NDkyrbrcSvS@A#ftw3?Rf)~vcZZ|i>9tyewugl?QXwW!p%;~z1ninGylVN@AW zX82bX&wLreWu1QYX%DxcRx8GmZrP=Vv7{E=Gmh=*rl-~|vtd{!rJUEIO@Q`%et4Uw z3CCO)7I5OpVbn&-4ab=22$~+=ujQtvn@ynkBL{4~S~7J>_oe7W#zw{p!v~Lozf|#{ z7f)LZijNAC6pX=e6-O!@)g>abiF=T#W9H_bIgvY*qXudb#nZi`JNM$H%fg4Zz zS|o*9$F=8vpvJ{v+gn4fH<9T$=22qf|GE1v60{>^RPGN}P#yy6-Qs<*2T+HPdmBTK zUA#Cu0bgBv;`Y7Q=seiU5J4leQf*HS{`VmgChDH*Q8+8 literal 0 HcmV?d00001 diff --git a/llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/lib1.cpp b/llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/lib1.cpp new file mode 100644 index 0000000000000..4cf90f071ee8c --- /dev/null +++ b/llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/lib1.cpp @@ -0,0 +1,5 @@ +#include "lib1.h" + +[[gnu::weak]] void lib1_internal() { + Foo{}.func(); +} diff --git a/llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/lib1.h b/llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/lib1.h new file mode 100644 index 0000000000000..3b3cefbaeac17 --- /dev/null +++ b/llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/lib1.h @@ -0,0 +1,3 @@ +struct Foo { + template void func() {} +}; diff --git a/llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/lib2.cpp b/llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/lib2.cpp new file mode 100644 index 0000000000000..4cf90f071ee8c --- /dev/null +++ b/llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/lib2.cpp @@ -0,0 +1,5 @@ +#include "lib1.h" + +[[gnu::weak]] void lib1_internal() { + Foo{}.func(); +} diff --git a/llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/main.cpp b/llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/main.cpp new file mode 100644 index 0000000000000..77f2cc4c8affe --- /dev/null +++ b/llvm/test/tools/dsymutil/Inputs/odr-uniquing-DW_AT_name-conflict/main.cpp @@ -0,0 +1,6 @@ +[[gnu::weak]] void lib1_internal(); + +int main() { + lib1_internal(); + __builtin_debugtrap(); +} From e92bb83c1810c61a7fa81d55a1690cffa2b14b60 Mon Sep 17 00:00:00 2001 From: Anatoly Trosinenko Date: Mon, 24 Nov 2025 22:18:51 +0300 Subject: [PATCH 37/37] [AArch64][PAC] Simplify emission of authenticated pointer check (NFC) (#160899) The `AArch64AsmPrinter::emitPtrauthCheckAuthenticatedValue` method accepts two arguments, `bool ShouldTrap` and `const MCSymbol *OnFailure`, that control the behavior of the emitted instruction sequence when the check fails: * `ShouldTrap` requests an error to be generated * `OnFailure` requests branching to the given label after clearing the PAC field An assertion in `emitPtrauthCheckAuthenticatedValue` ensures that when `ShouldTrap` is true, `OnFailure` must be null. But the opposite holds as well: when `ShouldTrap` is false, `OnFailure` is always non-null, as otherwise the entire sequence following `AUT[ID][AB]` instruction would turn into a very expensive equivalent of XPAC (unless the CPU implements FEAT_FPAC): authenticate Xn inspect PAC field of Xn if PAC field was not cleared: clear PAC field In other words, the value of `ShouldTrap` argument can be computed as `OnFailure == nullptr` at all existing call sites. In fact, at three of four call sites, constant `true` and `nullptr` are passed as the values of these function arguments. `emitPtrauthAuthResign` is the only caller that potentially makes use of checking-but-not-trapping mode of `emitPtrauthCheckAuthenticatedValue`, and it passes a non-null pointer as `OnFailure` when `ShouldTrap` is false. This commit makes the invariant explicit by omitting the `ShouldTrap` argument and inferring its value from the `OnFailure` argument instead. --- llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp | 54 +++++++++---------- 1 file changed, 24 insertions(+), 30 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index 73d9699f71477..5da6181ba36dd 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -162,8 +162,7 @@ class AArch64AsmPrinter : public AsmPrinter { Register ScratchReg, AArch64PACKey::ID Key, AArch64PAuth::AuthCheckMethod Method, - bool ShouldTrap, - const MCSymbol *OnFailure); + const MCSymbol *OnFailure = nullptr); // Check authenticated LR before tail calling. void emitPtrauthTailCallHardening(const MachineInstr *TC); @@ -1937,14 +1936,19 @@ Register AArch64AsmPrinter::emitPtrauthDiscriminator(uint16_t Disc, return ScratchReg; } -/// Emits a code sequence to check an authenticated pointer value. +/// Emit a code sequence to check an authenticated pointer value. /// -/// If OnFailure argument is passed, jump there on check failure instead -/// of proceeding to the next instruction (only if ShouldTrap is false). +/// This function emits a sequence of instructions that checks if TestedReg was +/// authenticated successfully. On success, execution continues at the next +/// instruction after the sequence. +/// +/// The action performed on failure depends on the OnFailure argument: +/// * if OnFailure is not nullptr, control is transferred to that label after +/// clearing the PAC field +/// * otherwise, BRK instruction is emitted to generate an error void AArch64AsmPrinter::emitPtrauthCheckAuthenticatedValue( Register TestedReg, Register ScratchReg, AArch64PACKey::ID Key, - AArch64PAuth::AuthCheckMethod Method, bool ShouldTrap, - const MCSymbol *OnFailure) { + AArch64PAuth::AuthCheckMethod Method, const MCSymbol *OnFailure) { // Insert a sequence to check if authentication of TestedReg succeeded, // such as: // @@ -1981,7 +1985,7 @@ void AArch64AsmPrinter::emitPtrauthCheckAuthenticatedValue( .addReg(getWRegFromXReg(ScratchReg)) .addReg(TestedReg) .addImm(0)); - assert(ShouldTrap && !OnFailure && "DummyLoad always traps on error"); + assert(!OnFailure && "DummyLoad always traps on error"); return; } @@ -2035,15 +2039,14 @@ void AArch64AsmPrinter::emitPtrauthCheckAuthenticatedValue( llvm_unreachable("Unsupported check method"); } - if (ShouldTrap) { - assert(!OnFailure && "Cannot specify OnFailure with ShouldTrap"); + if (!OnFailure) { // Trapping sequences do a 'brk'. // brk #<0xc470 + aut key> EmitToStreamer(MCInstBuilder(AArch64::BRK).addImm(0xc470 | Key)); } else { // Non-trapping checked sequences return the stripped result in TestedReg, - // skipping over success-only code (such as re-signing the pointer) if - // there is one. + // skipping over success-only code (such as re-signing the pointer) by + // jumping to OnFailure label. // Note that this can introduce an authentication oracle (such as based on // the high bits of the re-signed value). @@ -2068,12 +2071,9 @@ void AArch64AsmPrinter::emitPtrauthCheckAuthenticatedValue( MCInstBuilder(XPACOpc).addReg(TestedReg).addReg(TestedReg)); } - if (OnFailure) { - // b Lend - EmitToStreamer( - MCInstBuilder(AArch64::B) - .addExpr(MCSymbolRefExpr::create(OnFailure, OutContext))); - } + // b Lend + const auto *OnFailureExpr = MCSymbolRefExpr::create(OnFailure, OutContext); + EmitToStreamer(MCInstBuilder(AArch64::B).addExpr(OnFailureExpr)); } // If the auth check succeeds, we can continue. @@ -2100,9 +2100,8 @@ void AArch64AsmPrinter::emitPtrauthTailCallHardening(const MachineInstr *TC) { "Neither x16 nor x17 is available as a scratch register"); AArch64PACKey::ID Key = AArch64FI->shouldSignWithBKey() ? AArch64PACKey::IB : AArch64PACKey::IA; - emitPtrauthCheckAuthenticatedValue( - AArch64::LR, ScratchReg, Key, LRCheckMethod, - /*ShouldTrap=*/true, /*OnFailure=*/nullptr); + emitPtrauthCheckAuthenticatedValue(AArch64::LR, ScratchReg, Key, + LRCheckMethod); } void AArch64AsmPrinter::emitPtrauthAuthResign( @@ -2176,9 +2175,8 @@ void AArch64AsmPrinter::emitPtrauthAuthResign( if (IsAUTPAC && !ShouldTrap) EndSym = createTempSymbol("resign_end_"); - emitPtrauthCheckAuthenticatedValue(AUTVal, Scratch, AUTKey, - AArch64PAuth::AuthCheckMethod::XPAC, - ShouldTrap, EndSym); + emitPtrauthCheckAuthenticatedValue( + AUTVal, Scratch, AUTKey, AArch64PAuth::AuthCheckMethod::XPAC, EndSym); } // We already emitted unchecked and checked-but-non-trapping AUTs. @@ -2517,9 +2515,7 @@ void AArch64AsmPrinter::LowerMOVaddrPAC(const MachineInstr &MI) { : AArch64PACKey::DA); emitPtrauthCheckAuthenticatedValue(AArch64::X16, AArch64::X17, AuthKey, - AArch64PAuth::AuthCheckMethod::XPAC, - /*ShouldTrap=*/true, - /*OnFailure=*/nullptr); + AArch64PAuth::AuthCheckMethod::XPAC); } } else { EmitToStreamer(MCInstBuilder(AArch64::LDRXui) @@ -2652,9 +2648,7 @@ void AArch64AsmPrinter::LowerLOADgotAUTH(const MachineInstr &MI) { (AuthOpcode == AArch64::AUTIA ? AArch64PACKey::IA : AArch64PACKey::DA); emitPtrauthCheckAuthenticatedValue(AuthResultReg, AArch64::X17, AuthKey, - AArch64PAuth::AuthCheckMethod::XPAC, - /*ShouldTrap=*/true, - /*OnFailure=*/nullptr); + AArch64PAuth::AuthCheckMethod::XPAC); emitMovXReg(DstReg, AuthResultReg); }