From 0fd330dfe3d0504f4143aea58e88d52e62bf7da7 Mon Sep 17 00:00:00 2001 From: Finn Plummer Date: Fri, 24 Oct 2025 13:55:03 -0700 Subject: [PATCH 01/19] [NFC][DirectX] Refactor `DXILPrepare`/`DXILTranslateMetadata` (#164285) This pr updates `DXILPrepare` and `DXILTranslateMetadata` by moving all the removal of metadata from `DXILPrepare` to `DXILTranslateMetadata` to have a more consistent definition of what each pass is doing. It restricts the `DXILPrepare` to only update function attributes and insert bitcasts, and moves the removal of metadata to `DXILTranslateMetadata` so that all manipulation of metadata is done in a single pass. --- llvm/docs/DirectX/DXILArchitecture.rst | 7 +- llvm/lib/Target/DirectX/DXILPrepare.cpp | 89 +++--------- .../Target/DirectX/DXILTranslateMetadata.cpp | 130 +++++++++++++----- .../Target/DirectX/DXILTranslateMetadata.h | 3 +- .../CodeGen/DirectX/legalize-module-flags.ll | 2 +- .../CodeGen/DirectX/legalize-module-flags2.ll | 2 +- llvm/test/CodeGen/DirectX/llc-pipeline.ll | 2 +- .../CodeGen/DirectX/metadata-stripping.ll | 2 +- .../CodeGen/DirectX/strip-llvm-errno-tbaa.ll | 5 +- .../CodeGen/DirectX/strip-rootsignatures.ll | 5 +- 10 files changed, 126 insertions(+), 121 deletions(-) diff --git a/llvm/docs/DirectX/DXILArchitecture.rst b/llvm/docs/DirectX/DXILArchitecture.rst index 32b1e72deae7c..bce7fdaa386ed 100644 --- a/llvm/docs/DirectX/DXILArchitecture.rst +++ b/llvm/docs/DirectX/DXILArchitecture.rst @@ -118,9 +118,10 @@ The passes to generate DXIL IR follow the flow: Each of these passes has a defined responsibility: #. DXILOpLowering translates LLVM intrinsic calls to dx.op calls. -#. DXILPrepare transforms the DXIL IR to be compatible with LLVM 3.7, and - inserts bitcasts to allow typed pointers to be inserted. -#. DXILTranslateMetadata emits the DXIL Metadata structures. +#. DXILPrepare updates functions in the DXIL IR to be compatible with LLVM 3.7, + namely removing attributes, and inserting bitcasts to allow typed pointers + to be inserted. +#. DXILTranslateMetadata transforms and emits all recognized DXIL Metadata. The passes to encode DXIL to binary in the DX Container follow the flow: diff --git a/llvm/lib/Target/DirectX/DXILPrepare.cpp b/llvm/lib/Target/DirectX/DXILPrepare.cpp index 42e90f0e27517..d6fa65fe54b50 100644 --- a/llvm/lib/Target/DirectX/DXILPrepare.cpp +++ b/llvm/lib/Target/DirectX/DXILPrepare.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// /// -/// \file This file contains pases and utilities to convert a modern LLVM +/// \file This file contains passes and utilities to convert a modern LLVM /// module into a module compatible with the LLVM 3.7-based DirectX Intermediate /// Language (DXIL). //===----------------------------------------------------------------------===// @@ -16,7 +16,6 @@ #include "DirectX.h" #include "DirectXIRPasses/PointerTypeAnalysis.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringSet.h" #include "llvm/Analysis/DXILMetadataAnalysis.h" #include "llvm/Analysis/DXILResource.h" @@ -27,7 +26,6 @@ #include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/VersionTuple.h" #define DEBUG_TYPE "dxil-prepare" @@ -116,31 +114,6 @@ static void removeStringFunctionAttributes(Function &F, F.removeRetAttrs(DeadAttrs); } -static void cleanModuleFlags(Module &M) { - NamedMDNode *MDFlags = M.getModuleFlagsMetadata(); - if (!MDFlags) - return; - - SmallVector FlagEntries; - M.getModuleFlagsMetadata(FlagEntries); - bool Updated = false; - for (auto &Flag : FlagEntries) { - // llvm 3.7 only supports behavior up to AppendUnique. - if (Flag.Behavior <= Module::ModFlagBehavior::AppendUnique) - continue; - Flag.Behavior = Module::ModFlagBehavior::Warning; - Updated = true; - } - - if (!Updated) - return; - - MDFlags->eraseFromParent(); - - for (auto &Flag : FlagEntries) - M.addModuleFlag(Flag.Behavior, Flag.Key->getString(), Flag.Val); -} - class DXILPrepareModule : public ModulePass { static Value *maybeGenerateBitcast(IRBuilder<> &Builder, @@ -202,15 +175,6 @@ class DXILPrepareModule : public ModulePass { Builder.getPtrTy(PtrTy->getAddressSpace()))); } - static std::array getCompatibleInstructionMDs(llvm::Module &M) { - return {M.getMDKindID("dx.nonuniform"), - M.getMDKindID("dx.controlflow.hints"), - M.getMDKindID("dx.precise"), - llvm::LLVMContext::MD_range, - llvm::LLVMContext::MD_alias_scope, - llvm::LLVMContext::MD_noalias}; - } - public: bool runOnModule(Module &M) override { PointerTypeMap PointerTypes = PointerTypeAnalysis::run(M); @@ -224,10 +188,7 @@ class DXILPrepareModule : public ModulePass { const dxil::ModuleMetadataInfo MetadataInfo = getAnalysis().getModuleMetadata(); VersionTuple ValVer = MetadataInfo.ValidatorVersion; - bool SkipValidation = ValVer.getMajor() == 0 && ValVer.getMinor() == 0; - - // construct allowlist of valid metadata node kinds - std::array DXILCompatibleMDs = getCompatibleInstructionMDs(M); + bool AllowExperimental = ValVer.getMajor() == 0 && ValVer.getMinor() == 0; for (auto &F : M.functions()) { F.removeFnAttrs(AttrMask); @@ -235,7 +196,7 @@ class DXILPrepareModule : public ModulePass { // Only remove string attributes if we are not skipping validation. // This will reserve the experimental attributes when validation version // is 0.0 for experiment mode. - removeStringFunctionAttributes(F, SkipValidation); + removeStringFunctionAttributes(F, AllowExperimental); for (size_t Idx = 0, End = F.arg_size(); Idx < End; ++Idx) F.removeParamAttrs(Idx, AttrMask); @@ -243,11 +204,17 @@ class DXILPrepareModule : public ModulePass { IRBuilder<> Builder(&BB); for (auto &I : make_early_inc_range(BB)) { - I.dropUnknownNonDebugMetadata(DXILCompatibleMDs); + if (auto *CB = dyn_cast(&I)) { + CB->removeFnAttrs(AttrMask); + CB->removeRetAttrs(AttrMask); + for (size_t Idx = 0, End = CB->arg_size(); Idx < End; ++Idx) + CB->removeParamAttrs(Idx, AttrMask); + continue; + } // Emtting NoOp bitcast instructions allows the ValueEnumerator to be // unmodified as it reserves instruction IDs during contruction. - if (auto LI = dyn_cast(&I)) { + if (auto *LI = dyn_cast(&I)) { if (Value *NoOpBitcast = maybeGenerateBitcast( Builder, PointerTypes, I, LI->getPointerOperand(), LI->getType())) { @@ -257,7 +224,7 @@ class DXILPrepareModule : public ModulePass { } continue; } - if (auto SI = dyn_cast(&I)) { + if (auto *SI = dyn_cast(&I)) { if (Value *NoOpBitcast = maybeGenerateBitcast( Builder, PointerTypes, I, SI->getPointerOperand(), SI->getValueOperand()->getType())) { @@ -268,39 +235,16 @@ class DXILPrepareModule : public ModulePass { } continue; } - if (auto GEP = dyn_cast(&I)) { + if (auto *GEP = dyn_cast(&I)) { if (Value *NoOpBitcast = maybeGenerateBitcast( Builder, PointerTypes, I, GEP->getPointerOperand(), GEP->getSourceElementType())) GEP->setOperand(0, NoOpBitcast); continue; } - if (auto *CB = dyn_cast(&I)) { - CB->removeFnAttrs(AttrMask); - CB->removeRetAttrs(AttrMask); - for (size_t Idx = 0, End = CB->arg_size(); Idx < End; ++Idx) - CB->removeParamAttrs(Idx, AttrMask); - continue; - } } } } - // Remove flags not for DXIL. - cleanModuleFlags(M); - - // dx.rootsignatures will have been parsed from its metadata form as its - // binary form as part of the RootSignatureAnalysisWrapper, so safely - // remove it as it is not recognized in DXIL - if (NamedMDNode *RootSignature = M.getNamedMetadata("dx.rootsignatures")) - RootSignature->eraseFromParent(); - - // llvm.errno.tbaa was recently added but is not supported in LLVM 3.7 and - // causes all tests using the DXIL Validator to fail. - // - // This is a temporary fix and should be replaced with a whitelist once - // we have determined all metadata that the DXIL Validator allows - if (NamedMDNode *ErrNo = M.getNamedMetadata("llvm.errno.tbaa")) - ErrNo->eraseFromParent(); return true; } @@ -308,11 +252,11 @@ class DXILPrepareModule : public ModulePass { DXILPrepareModule() : ModulePass(ID) {} void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); - AU.addRequired(); - AU.addPreserved(); - AU.addPreserved(); + AU.addPreserved(); AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); } static char ID; // Pass identification. }; @@ -323,7 +267,6 @@ char DXILPrepareModule::ID = 0; INITIALIZE_PASS_BEGIN(DXILPrepareModule, DEBUG_TYPE, "DXIL Prepare Module", false, false) INITIALIZE_PASS_DEPENDENCY(DXILMetadataAnalysisWrapperPass) -INITIALIZE_PASS_DEPENDENCY(RootSignatureAnalysisWrapper) INITIALIZE_PASS_END(DXILPrepareModule, DEBUG_TYPE, "DXIL Prepare Module", false, false) diff --git a/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp b/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp index 9eebcc9b13063..1e4797bbd05aa 100644 --- a/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp +++ b/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp @@ -7,8 +7,10 @@ //===----------------------------------------------------------------------===// #include "DXILTranslateMetadata.h" +#include "DXILRootSignature.h" #include "DXILShaderFlags.h" #include "DirectX.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/DXILMetadataAnalysis.h" @@ -204,9 +206,9 @@ getEntryPropAsMetadata(const EntryProperties &EP, uint64_t EntryShaderFlags, return MDNode::get(Ctx, MDVals); } -MDTuple *constructEntryMetadata(const Function *EntryFn, MDTuple *Signatures, - MDNode *Resources, MDTuple *Properties, - LLVMContext &Ctx) { +static MDTuple *constructEntryMetadata(const Function *EntryFn, + MDTuple *Signatures, MDNode *Resources, + MDTuple *Properties, LLVMContext &Ctx) { // Each entry point metadata record specifies: // * reference to the entry point function global symbol // * unmangled name @@ -290,42 +292,82 @@ static MDTuple *emitTopLevelLibraryNode(Module &M, MDNode *RMD, return constructEntryMetadata(nullptr, nullptr, RMD, Properties, Ctx); } -// TODO: We might need to refactor this to be more generic, -// in case we need more metadata to be replaced. -static void translateBranchMetadata(Module &M) { - for (Function &F : M) { - for (BasicBlock &BB : F) { - Instruction *BBTerminatorInst = BB.getTerminator(); +static void translateBranchMetadata(Module &M, Instruction *BBTerminatorInst) { + MDNode *HlslControlFlowMD = + BBTerminatorInst->getMetadata("hlsl.controlflow.hint"); + + if (!HlslControlFlowMD) + return; - MDNode *HlslControlFlowMD = - BBTerminatorInst->getMetadata("hlsl.controlflow.hint"); + assert(HlslControlFlowMD->getNumOperands() == 2 && + "invalid operands for hlsl.controlflow.hint"); - if (!HlslControlFlowMD) - continue; + MDBuilder MDHelper(M.getContext()); - assert(HlslControlFlowMD->getNumOperands() == 2 && - "invalid operands for hlsl.controlflow.hint"); + llvm::Metadata *HintsStr = MDHelper.createString("dx.controlflow.hints"); + llvm::Metadata *HintsValue = MDHelper.createConstant( + mdconst::extract(HlslControlFlowMD->getOperand(1))); - MDBuilder MDHelper(M.getContext()); - ConstantInt *Op1 = - mdconst::extract(HlslControlFlowMD->getOperand(1)); + MDNode *MDNode = llvm::MDNode::get(M.getContext(), {HintsStr, HintsValue}); - SmallVector Vals( - ArrayRef{MDHelper.createString("dx.controlflow.hints"), - MDHelper.createConstant(Op1)}); + BBTerminatorInst->setMetadata("dx.controlflow.hints", MDNode); + BBTerminatorInst->setMetadata("hlsl.controlflow.hint", nullptr); +} + +static std::array getCompatibleInstructionMDs(llvm::Module &M) { + return { + M.getMDKindID("dx.nonuniform"), M.getMDKindID("dx.controlflow.hints"), + M.getMDKindID("dx.precise"), llvm::LLVMContext::MD_range, + llvm::LLVMContext::MD_alias_scope, llvm::LLVMContext::MD_noalias}; +} - MDNode *MDNode = llvm::MDNode::get(M.getContext(), Vals); +static void translateInstructionMetadata(Module &M) { + // construct allowlist of valid metadata node kinds + std::array DXILCompatibleMDs = getCompatibleInstructionMDs(M); - BBTerminatorInst->setMetadata("dx.controlflow.hints", MDNode); - BBTerminatorInst->setMetadata("hlsl.controlflow.hint", nullptr); + for (Function &F : M) { + for (BasicBlock &BB : F) { + // This needs to be done first so that "hlsl.controlflow.hints" isn't + // removed in the whitelist below + if (auto *I = BB.getTerminator()) + translateBranchMetadata(M, I); + + for (auto &I : make_early_inc_range(BB)) { + I.dropUnknownNonDebugMetadata(DXILCompatibleMDs); + } } } } -static void translateMetadata(Module &M, DXILResourceMap &DRM, - DXILResourceTypeMap &DRTM, - const ModuleShaderFlags &ShaderFlags, - const ModuleMetadataInfo &MMDI) { +static void cleanModuleFlags(Module &M) { + NamedMDNode *MDFlags = M.getModuleFlagsMetadata(); + if (!MDFlags) + return; + + SmallVector FlagEntries; + M.getModuleFlagsMetadata(FlagEntries); + bool Updated = false; + for (auto &Flag : FlagEntries) { + // llvm 3.7 only supports behavior up to AppendUnique. + if (Flag.Behavior <= Module::ModFlagBehavior::AppendUnique) + continue; + Flag.Behavior = Module::ModFlagBehavior::Warning; + Updated = true; + } + + if (!Updated) + return; + + MDFlags->eraseFromParent(); + + for (auto &Flag : FlagEntries) + M.addModuleFlag(Flag.Behavior, Flag.Key->getString(), Flag.Val); +} + +static void translateGlobalMetadata(Module &M, DXILResourceMap &DRM, + DXILResourceTypeMap &DRTM, + const ModuleShaderFlags &ShaderFlags, + const ModuleMetadataInfo &MMDI) { LLVMContext &Ctx = M.getContext(); IRBuilder<> IRB(Ctx); SmallVector EntryFnMDNodes; @@ -381,6 +423,22 @@ static void translateMetadata(Module &M, DXILResourceMap &DRM, M.getOrInsertNamedMetadata("dx.entryPoints"); for (auto *Entry : EntryFnMDNodes) EntryPointsNamedMD->addOperand(Entry); + + cleanModuleFlags(M); + + // dx.rootsignatures will have been parsed from its metadata form as its + // binary form as part of the RootSignatureAnalysisWrapper, so safely + // remove it as it is not recognized in DXIL + if (NamedMDNode *RootSignature = M.getNamedMetadata("dx.rootsignatures")) + RootSignature->eraseFromParent(); + + // llvm.errno.tbaa was recently added but is not supported in LLVM 3.7 and + // causes all tests using the DXIL Validator to fail. + // + // This is a temporary fix and should be replaced with a allowlist once + // we have determined all metadata that the DXIL Validator allows + if (NamedMDNode *ErrNo = M.getNamedMetadata("llvm.errno.tbaa")) + ErrNo->eraseFromParent(); } PreservedAnalyses DXILTranslateMetadata::run(Module &M, @@ -390,8 +448,8 @@ PreservedAnalyses DXILTranslateMetadata::run(Module &M, const ModuleShaderFlags &ShaderFlags = MAM.getResult(M); const dxil::ModuleMetadataInfo MMDI = MAM.getResult(M); - translateMetadata(M, DRM, DRTM, ShaderFlags, MMDI); - translateBranchMetadata(M); + translateGlobalMetadata(M, DRM, DRTM, ShaderFlags, MMDI); + translateInstructionMetadata(M); return PreservedAnalyses::all(); } @@ -409,10 +467,13 @@ class DXILTranslateMetadataLegacy : public ModulePass { AU.addRequired(); AU.addRequired(); AU.addRequired(); - AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); - AU.addPreserved(); AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); } bool runOnModule(Module &M) override { @@ -425,8 +486,8 @@ class DXILTranslateMetadataLegacy : public ModulePass { dxil::ModuleMetadataInfo MMDI = getAnalysis().getModuleMetadata(); - translateMetadata(M, DRM, DRTM, ShaderFlags, MMDI); - translateBranchMetadata(M); + translateGlobalMetadata(M, DRM, DRTM, ShaderFlags, MMDI); + translateInstructionMetadata(M); return true; } }; @@ -443,6 +504,7 @@ INITIALIZE_PASS_BEGIN(DXILTranslateMetadataLegacy, "dxil-translate-metadata", "DXIL Translate Metadata", false, false) INITIALIZE_PASS_DEPENDENCY(DXILResourceWrapperPass) INITIALIZE_PASS_DEPENDENCY(ShaderFlagsAnalysisWrapper) +INITIALIZE_PASS_DEPENDENCY(RootSignatureAnalysisWrapper) INITIALIZE_PASS_DEPENDENCY(DXILMetadataAnalysisWrapperPass) INITIALIZE_PASS_END(DXILTranslateMetadataLegacy, "dxil-translate-metadata", "DXIL Translate Metadata", false, false) diff --git a/llvm/lib/Target/DirectX/DXILTranslateMetadata.h b/llvm/lib/Target/DirectX/DXILTranslateMetadata.h index f3f5eb1901406..4c1ffac1781e6 100644 --- a/llvm/lib/Target/DirectX/DXILTranslateMetadata.h +++ b/llvm/lib/Target/DirectX/DXILTranslateMetadata.h @@ -13,7 +13,8 @@ namespace llvm { -/// A pass that transforms DXIL Intrinsics that don't have DXIL opCodes +/// A pass that transforms LLVM Metadata in the module to it's DXIL equivalent, +/// then emits all recognized DXIL Metadata class DXILTranslateMetadata : public PassInfoMixin { public: PreservedAnalyses run(Module &M, ModuleAnalysisManager &); diff --git a/llvm/test/CodeGen/DirectX/legalize-module-flags.ll b/llvm/test/CodeGen/DirectX/legalize-module-flags.ll index 6c29deabc2aa3..044bd91866e61 100644 --- a/llvm/test/CodeGen/DirectX/legalize-module-flags.ll +++ b/llvm/test/CodeGen/DirectX/legalize-module-flags.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -dxil-prepare -mtriple=dxil-unknown-shadermodel6.0-compute %s | FileCheck %s +; RUN: opt -S -dxil-translate-metadata -mtriple=dxil-unknown-shadermodel6.0-compute %s | FileCheck %s ; Make sure behavior flag > 6 is fixed. ; CHECK: !{i32 2, !"frame-pointer", i32 2} diff --git a/llvm/test/CodeGen/DirectX/legalize-module-flags2.ll b/llvm/test/CodeGen/DirectX/legalize-module-flags2.ll index 244ec8d54e131..b8a60a8b6e662 100644 --- a/llvm/test/CodeGen/DirectX/legalize-module-flags2.ll +++ b/llvm/test/CodeGen/DirectX/legalize-module-flags2.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -dxil-prepare -mtriple=dxil-unknown-shadermodel6.0-library %s | FileCheck %s +; RUN: opt -S -dxil-translate-metadata -mtriple=dxil-unknown-shadermodel6.0-library %s | FileCheck %s ; CHECK: define void @main() ; Make sure behavior flag > 6 is fixed. diff --git a/llvm/test/CodeGen/DirectX/llc-pipeline.ll b/llvm/test/CodeGen/DirectX/llc-pipeline.ll index 13c25396ea98f..d265826cd2469 100644 --- a/llvm/test/CodeGen/DirectX/llc-pipeline.ll +++ b/llvm/test/CodeGen/DirectX/llc-pipeline.ll @@ -40,8 +40,8 @@ ; CHECK-NEXT: DXIL Resources Analysis ; CHECK-NEXT: DXIL Module Metadata analysis ; CHECK-NEXT: DXIL Shader Flag Analysis -; CHECK-NEXT: DXIL Translate Metadata ; CHECK-NEXT: DXIL Root Signature Analysis +; CHECK-NEXT: DXIL Translate Metadata ; CHECK-NEXT: DXIL Post Optimization Validation ; CHECK-NEXT: DXIL Op Lowering ; CHECK-NEXT: DXIL Prepare Module diff --git a/llvm/test/CodeGen/DirectX/metadata-stripping.ll b/llvm/test/CodeGen/DirectX/metadata-stripping.ll index eb939babd7d62..531ab6c334d24 100644 --- a/llvm/test/CodeGen/DirectX/metadata-stripping.ll +++ b/llvm/test/CodeGen/DirectX/metadata-stripping.ll @@ -1,4 +1,4 @@ -; RUN: opt -S --dxil-prepare %s | FileCheck %s +; RUN: opt -S --dxil-translate-metadata %s | FileCheck %s ; Test that only metadata nodes that are valid in DXIL are allowed through diff --git a/llvm/test/CodeGen/DirectX/strip-llvm-errno-tbaa.ll b/llvm/test/CodeGen/DirectX/strip-llvm-errno-tbaa.ll index 9190d0305d63f..2c4140dae9e3e 100644 --- a/llvm/test/CodeGen/DirectX/strip-llvm-errno-tbaa.ll +++ b/llvm/test/CodeGen/DirectX/strip-llvm-errno-tbaa.ll @@ -1,6 +1,6 @@ -; RUN: opt -S -dxil-prepare < %s | FileCheck %s +; RUN: opt -S -dxil-translate-metadata < %s | FileCheck %s -; Ensures that dxil-prepare will remove the llvm.errno.tbaa metadata +; Ensures that dxil-translate-metadata will remove the llvm.errno.tbaa metadata target triple = "dxil-unknown-shadermodel6.0-compute" @@ -10,7 +10,6 @@ entry: } ; CHECK-NOT: !llvm.errno.tbaa -; CHECK-NOT: {{^!}} !llvm.errno.tbaa = !{!0} diff --git a/llvm/test/CodeGen/DirectX/strip-rootsignatures.ll b/llvm/test/CodeGen/DirectX/strip-rootsignatures.ll index 3ac617ae871fc..daf20bf636b00 100644 --- a/llvm/test/CodeGen/DirectX/strip-rootsignatures.ll +++ b/llvm/test/CodeGen/DirectX/strip-rootsignatures.ll @@ -1,6 +1,6 @@ -; RUN: opt -S -dxil-prepare < %s | FileCheck %s +; RUN: opt -S -dxil-translate-metadata < %s | FileCheck %s -; Ensures that dxil-prepare will remove the dx.rootsignatures metadata +; Ensures that dxil-translate-metadata will remove the dx.rootsignatures metadata target triple = "dxil-unknown-shadermodel6.0-compute" @@ -10,7 +10,6 @@ entry: } ; CHECK-NOT: !dx.rootsignatures -; CHECK-NOT: {{^!}} !dx.rootsignatures = !{!2} ; list of function/root signature pairs !2 = !{ ptr @main, !3, i32 2 } ; function, root signature From b6e6a4dc6d494191a9665715b0d989876778a46d Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Fri, 24 Oct 2025 21:03:08 +0000 Subject: [PATCH 02/19] [msan] Convert target("aarch64.svcount") from compile-time crash to MSan false negatives (#165028) MSan currently crashes at compile-time when it encounters target("aarch64.svcount") (e.g., https://github.com/llvm/llvm-project/pull/164315). This patch duct-tapes MSan so that it won't crash at compile-time, and instead propagates a clean shadow (resulting in false negatives but not false positives). --- .../Instrumentation/MemorySanitizer.cpp | 40 +- .../AArch64/sme-aarch64-svcount-mini.ll | 10 +- .../AArch64/sme-aarch64-svcount.ll | 83 ++- .../AArch64/sme2-intrinsics-add-mini.ll | 17 +- .../AArch64/sme2-intrinsics-add.ll | 485 +++++++++++++++++- 5 files changed, 621 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index b6cbecb6133f4..10b03bbcd33dc 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -226,6 +226,7 @@ static const Align kMinOriginAlignment = Align(4); static const Align kShadowTLSAlignment = Align(8); // These constants must be kept in sync with the ones in msan.h. +// TODO: increase size to match SVE/SVE2/SME/SME2 limits static const unsigned kParamTLSSize = 800; static const unsigned kRetvalTLSSize = 800; @@ -1544,6 +1545,22 @@ struct MemorySanitizerVisitor : public InstVisitor { } } + static bool isAArch64SVCount(Type *Ty) { + if (TargetExtType *TTy = dyn_cast(Ty)) + return TTy->getName() == "aarch64.svcount"; + return false; + } + + // This is intended to match the "AArch64 Predicate-as-Counter Type" (aka + // 'target("aarch64.svcount")', but not e.g., . + static bool isScalableNonVectorType(Type *Ty) { + if (!isAArch64SVCount(Ty)) + LLVM_DEBUG(dbgs() << "isScalableNonVectorType: Unexpected type " << *Ty + << "\n"); + + return Ty->isScalableTy() && !isa(Ty); + } + void materializeChecks() { #ifndef NDEBUG // For assert below. @@ -1672,6 +1689,12 @@ struct MemorySanitizerVisitor : public InstVisitor { LLVM_DEBUG(dbgs() << "getShadowTy: " << *ST << " ===> " << *Res << "\n"); return Res; } + if (isScalableNonVectorType(OrigTy)) { + LLVM_DEBUG(dbgs() << "getShadowTy: Scalable non-vector type: " << *OrigTy + << "\n"); + return OrigTy; + } + uint32_t TypeSize = DL.getTypeSizeInBits(OrigTy); return IntegerType::get(*MS.C, TypeSize); } @@ -2185,8 +2208,14 @@ struct MemorySanitizerVisitor : public InstVisitor { << *OrigIns << "\n"); return; } -#ifndef NDEBUG + Type *ShadowTy = Shadow->getType(); + if (isScalableNonVectorType(ShadowTy)) { + LLVM_DEBUG(dbgs() << "Skipping check of scalable non-vector " << *Shadow + << " before " << *OrigIns << "\n"); + return; + } +#ifndef NDEBUG assert((isa(ShadowTy) || isa(ShadowTy) || isa(ShadowTy) || isa(ShadowTy)) && "Can only insert checks for integer, vector, and aggregate shadow " @@ -6972,6 +7001,15 @@ struct MemorySanitizerVisitor : public InstVisitor { // an extra "select". This results in much more compact IR. // Sa = select Sb, poisoned, (select b, Sc, Sd) Sa1 = getPoisonedShadow(getShadowTy(I.getType())); + } else if (isScalableNonVectorType(I.getType())) { + // This is intended to handle target("aarch64.svcount"), which can't be + // handled in the else branch because of incompatibility with CreateXor + // ("The supported LLVM operations on this type are limited to load, + // store, phi, select and alloca instructions"). + + // TODO: this currently underapproximates. Use Arm SVE EOR in the else + // branch as needed instead. + Sa1 = getCleanShadow(getShadowTy(I.getType())); } else { // Sa = select Sb, [ (c^d) | Sc | Sd ], [ b ? Sc : Sd ] // If Sb (condition is poisoned), look for bits in c and d that are equal diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount-mini.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount-mini.ll index 1c869bd41b931..e7491e985fa26 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount-mini.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount-mini.ll @@ -1,14 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -passes=msan -mattr=+sme -o - %s - -; XFAIL: * +; RUN: opt -S -passes=msan -mattr=+sme -o - %s | FileCheck %s ; Forked from llvm/test/CodeGen/AArch64/sme-aarch64-svcount.ll -; Manually minimized to show MSan leads to a compiler crash target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64--linux-android9001" define target("aarch64.svcount") @test_return_arg1(target("aarch64.svcount") %arg0, target("aarch64.svcount") %arg1) nounwind { +; CHECK-LABEL: @test_return_arg1( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: store target("aarch64.svcount") zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret target("aarch64.svcount") [[ARG1:%.*]] +; ret target("aarch64.svcount") %arg1 } diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount.ll index 00cf3204464d0..e1ea9e68aefc3 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount.ll @@ -1,7 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -passes=msan -mattr=+sme -o - %s - -; XFAIL: * +; RUN: opt -S -passes=msan -mattr=+sme -o - %s | FileCheck %s ; Forked from llvm/test/CodeGen/AArch64/sme-aarch64-svcount.ll @@ -12,16 +10,49 @@ target triple = "aarch64--linux-android9001" ; Test simple loads, stores and return. ; define target("aarch64.svcount") @test_load(ptr %ptr) nounwind { +; CHECK-LABEL: @test_load( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[RES:%.*]] = load target("aarch64.svcount"), ptr [[PTR:%.*]], align 2 +; CHECK-NEXT: store target("aarch64.svcount") zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret target("aarch64.svcount") [[RES]] +; %res = load target("aarch64.svcount"), ptr %ptr ret target("aarch64.svcount") %res } define void @test_store(ptr %ptr, target("aarch64.svcount") %val) nounwind { +; CHECK-LABEL: @test_store( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PTR:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576 +; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr +; CHECK-NEXT: store target("aarch64.svcount") zeroinitializer, ptr [[TMP3]], align 2 +; CHECK-NEXT: store target("aarch64.svcount") [[VAL:%.*]], ptr [[PTR]], align 2 +; CHECK-NEXT: ret void +; store target("aarch64.svcount") %val, ptr %ptr ret void } define target("aarch64.svcount") @test_alloca_store_reload(target("aarch64.svcount") %val) nounwind { +; CHECK-LABEL: @test_alloca_store_reload( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[PTR:%.*]] = alloca target("aarch64.svcount"), align 1 +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576 +; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[TMP5]], i8 0, i64 [[TMP2]], i1 false) +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 193514046488576 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: store target("aarch64.svcount") zeroinitializer, ptr [[TMP8]], align 2 +; CHECK-NEXT: store target("aarch64.svcount") [[VAL:%.*]], ptr [[PTR]], align 2 +; CHECK-NEXT: [[RES:%.*]] = load target("aarch64.svcount"), ptr [[PTR]], align 2 +; CHECK-NEXT: store target("aarch64.svcount") zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret target("aarch64.svcount") [[RES]] +; %ptr = alloca target("aarch64.svcount"), align 1 store target("aarch64.svcount") %val, ptr %ptr %res = load target("aarch64.svcount"), ptr %ptr @@ -33,10 +64,20 @@ define target("aarch64.svcount") @test_alloca_store_reload(target("aarch64.svcou ; define target("aarch64.svcount") @test_return_arg1(target("aarch64.svcount") %arg0, target("aarch64.svcount") %arg1) nounwind { +; CHECK-LABEL: @test_return_arg1( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: store target("aarch64.svcount") zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret target("aarch64.svcount") [[ARG1:%.*]] +; ret target("aarch64.svcount") %arg1 } define target("aarch64.svcount") @test_return_arg4(target("aarch64.svcount") %arg0, target("aarch64.svcount") %arg1, target("aarch64.svcount") %arg2, target("aarch64.svcount") %arg3, target("aarch64.svcount") %arg4) nounwind { +; CHECK-LABEL: @test_return_arg4( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: store target("aarch64.svcount") zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret target("aarch64.svcount") [[ARG4:%.*]] +; ret target("aarch64.svcount") %arg4 } @@ -46,22 +87,58 @@ define target("aarch64.svcount") @test_return_arg4(target("aarch64.svcount") %ar declare void @take_svcount_1(target("aarch64.svcount") %arg) define void @test_pass_1arg(target("aarch64.svcount") %arg) nounwind { +; CHECK-LABEL: @test_pass_1arg( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: call void @take_svcount_1(target("aarch64.svcount") [[ARG:%.*]]) +; CHECK-NEXT: ret void +; call void @take_svcount_1(target("aarch64.svcount") %arg) ret void } declare void @take_svcount_5(target("aarch64.svcount") %arg0, target("aarch64.svcount") %arg1, target("aarch64.svcount") %arg2, target("aarch64.svcount") %arg3, target("aarch64.svcount") %arg4) define void @test_pass_5args(target("aarch64.svcount") %arg) nounwind { +; CHECK-LABEL: @test_pass_5args( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: call void @take_svcount_5(target("aarch64.svcount") [[ARG:%.*]], target("aarch64.svcount") [[ARG]], target("aarch64.svcount") [[ARG]], target("aarch64.svcount") [[ARG]], target("aarch64.svcount") [[ARG]]) +; CHECK-NEXT: ret void +; call void @take_svcount_5(target("aarch64.svcount") %arg, target("aarch64.svcount") %arg, target("aarch64.svcount") %arg, target("aarch64.svcount") %arg, target("aarch64.svcount") %arg) ret void } define target("aarch64.svcount") @test_sel(target("aarch64.svcount") %x, target("aarch64.svcount") %y, i1 %cmp) sanitize_memory { +; CHECK-LABEL: @test_sel( +; CHECK-NEXT: [[TMP1:%.*]] = load i1, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[CMP:%.*]], target("aarch64.svcount") zeroinitializer, target("aarch64.svcount") zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[TMP1]], target("aarch64.svcount") zeroinitializer, target("aarch64.svcount") [[TMP2]] +; CHECK-NEXT: [[X_Y:%.*]] = select i1 [[CMP]], target("aarch64.svcount") [[X:%.*]], target("aarch64.svcount") [[Y:%.*]] +; CHECK-NEXT: store target("aarch64.svcount") [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret target("aarch64.svcount") [[X_Y]] +; %x.y = select i1 %cmp, target("aarch64.svcount") %x, target("aarch64.svcount") %y ret target("aarch64.svcount") %x.y } define target("aarch64.svcount") @test_sel_cc(target("aarch64.svcount") %x, target("aarch64.svcount") %y, i32 %k) sanitize_memory { +; CHECK-LABEL: @test_sel_cc( +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[K:%.*]], -2147483648 +; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 +; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i32 [[TMP4]], -2147483606 +; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP5]], -2147483606 +; CHECK-NEXT: [[TMP8:%.*]] = xor i1 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[K]], 42 +; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[CMP]], target("aarch64.svcount") zeroinitializer, target("aarch64.svcount") zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[TMP8]], target("aarch64.svcount") zeroinitializer, target("aarch64.svcount") [[TMP9]] +; CHECK-NEXT: [[X_Y:%.*]] = select i1 [[CMP]], target("aarch64.svcount") [[X:%.*]], target("aarch64.svcount") [[Y:%.*]] +; CHECK-NEXT: store target("aarch64.svcount") [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret target("aarch64.svcount") [[X_Y]] +; %cmp = icmp sgt i32 %k, 42 %x.y = select i1 %cmp, target("aarch64.svcount") %x, target("aarch64.svcount") %y ret target("aarch64.svcount") %x.y diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add-mini.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add-mini.ll index 3f43efa233621..3ae73c5719c3a 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add-mini.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add-mini.ll @@ -1,7 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -passes=msan -mattr=+sme2 -mattr=+sme-i16i64 -mattr=+sme-f64f64 -o - %s - -; XFAIL: * +; RUN: opt -S -passes=msan -mattr=+sme2 -mattr=+sme-i16i64 -mattr=+sme-f64f64 -o - %s | FileCheck %s ; Forked from llvm/test/CodeGen/AArch64/sme2-intrinsics-add.ll ; Manually reduced to show MSan leads to a compiler crash @@ -10,6 +8,19 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64--linux-android9001" define void @multi_vector_add_za_vg1x4_f32_tuple(i64 %stride, ptr %ptr) sanitize_memory { +; CHECK-LABEL: @multi_vector_add_za_vg1x4_f32_tuple( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr getelementptr (i8, ptr @__msan_param_tls, i64 8), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1:![0-9]+]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5:[0-9]+]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") [[TMP2]], ptr [[PTR:%.*]]) +; CHECK-NEXT: ret void +; %1 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() %2 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") %1, ptr %ptr) ret void diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add.ll index cd04373c11d20..8d00b930abf95 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add.ll @@ -1,7 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -passes=msan -mattr=+sme2 -mattr=+sme-i16i64 -mattr=+sme-f64f64 -o - %s - -; XFAIL: * +; RUN: opt -S -passes=msan -mattr=+sme2 -mattr=+sme-i16i64 -mattr=+sme-f64f64 -o - %s | FileCheck %s ; Forked from llvm/test/CodeGen/AArch64/sme2-intrinsics-add.ll @@ -9,6 +7,27 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64--linux-android9001" define void @multi_vector_add_write_single_za_vg1x2_i32(i32 %slice, %zn0, %zn1, %zm) sanitize_memory { +; CHECK-LABEL: @multi_vector_add_write_single_za_vg1x2_i32( +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1:![0-9]+]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7:[0-9]+]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32 [[SLICE:%.*]], [[ZN0:%.*]], [[ZN1:%.*]], [[ZM:%.*]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[TMP1]], 0 +; CHECK-NEXT: [[SLICE_7:%.*]] = add i32 [[SLICE]], 7 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32 [[SLICE_7]], [[ZN0]], [[ZN1]], [[ZM]]) +; CHECK-NEXT: ret void +; call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32 %slice, %zn0, %zn1, %zm) @@ -20,6 +39,27 @@ define void @multi_vector_add_write_single_za_vg1x2_i32(i32 %slice, %zn0, %zn1, %zm) sanitize_memory { +; CHECK-LABEL: @multi_vector_add_write_single_za_vg1x2_i64( +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv2i64(i32 [[SLICE:%.*]], [[ZN0:%.*]], [[ZN1:%.*]], [[ZM:%.*]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[TMP1]], 0 +; CHECK-NEXT: [[SLICE_7:%.*]] = add i32 [[SLICE]], 7 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv2i64(i32 [[SLICE_7]], [[ZN0]], [[ZN1]], [[ZM]]) +; CHECK-NEXT: ret void +; call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv2i64(i32 %slice, %zn0, %zn1, %zm) @@ -32,6 +72,27 @@ define void @multi_vector_add_write_single_za_vg1x2_i64(i32 %slice, %zn0, %zn1, +; CHECK-LABEL: @multi_vector_add_write_single_za_vg1x4_i32( +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv4i32(i32 [[SLICE:%.*]], [[ZN0:%.*]], [[ZN1:%.*]], [[ZN2:%.*]], [[ZN3:%.*]], [[ZM:%.*]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[TMP1]], 0 +; CHECK-NEXT: [[SLICE_7:%.*]] = add i32 [[SLICE]], 7 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv4i32(i32 [[SLICE_7]], [[ZN0]], [[ZN1]], [[ZN2]], [[ZN3]], [[ZM]]) +; CHECK-NEXT: ret void +; %zn2, %zn3, %zm) sanitize_memory { call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv4i32(i32 %slice, @@ -47,6 +108,27 @@ define void @multi_vector_add_write_single_za_vg1x4_i32(i32 %slice, [[ZN0:%.*]], [[ZN1:%.*]], [[ZN2:%.*]], [[ZN3:%.*]], [[ZM:%.*]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[TMP1]], 0 +; CHECK-NEXT: [[SLICE_7:%.*]] = add i32 [[SLICE]], 7 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv2i64(i32 [[SLICE_7]], [[ZN0]], [[ZN1]], [[ZN2]], [[ZN3]], [[ZM]]) +; CHECK-NEXT: ret void +; %zn0, %zn1, %zn2, %zn3, %zm) sanitize_memory { @@ -64,6 +146,27 @@ define void @multi_vector_add_write_single_za_vg1x4_i64(i32 %slice, define void @multi_vector_add_write_za_vg1x2_i32(i32 %slice, %zn0, %zn1, +; CHECK-LABEL: @multi_vector_add_write_za_vg1x2_i32( +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv4i32(i32 [[SLICE:%.*]], [[ZN0:%.*]], [[ZN1:%.*]], [[ZM1:%.*]], [[ZM2:%.*]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[TMP1]], 0 +; CHECK-NEXT: [[SLICE_7:%.*]] = add i32 [[SLICE]], 7 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv4i32(i32 [[SLICE_7]], [[ZN0]], [[ZN1]], [[ZM1]], [[ZM2]]) +; CHECK-NEXT: ret void +; %zm1, %zm2) sanitize_memory { call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv4i32(i32 %slice, %zn0, %zn1, @@ -77,6 +180,27 @@ define void @multi_vector_add_write_za_vg1x2_i32(i32 %slice, define void @multi_vector_add_write_za_vg1x2_i64(i32 %slice, %zn0, %zn1, +; CHECK-LABEL: @multi_vector_add_write_za_vg1x2_i64( +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv2i64(i32 [[SLICE:%.*]], [[ZN0:%.*]], [[ZN1:%.*]], [[ZM1:%.*]], [[ZM2:%.*]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[TMP1]], 0 +; CHECK-NEXT: [[SLICE_7:%.*]] = add i32 [[SLICE]], 7 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv2i64(i32 [[SLICE_7]], [[ZN0]], [[ZN1]], [[ZM1]], [[ZM2]]) +; CHECK-NEXT: ret void +; %zm1, %zm2) sanitize_memory { call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv2i64(i32 %slice, %zn0, %zn1, @@ -91,6 +215,27 @@ define void @multi_vector_add_write_za_vg1x2_i64(i32 %slice, define void @multi_vector_add_write_za_vg1x4_i32(i32 %slice, %zn0, %zn1, +; CHECK-LABEL: @multi_vector_add_write_za_vg1x4_i32( +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv4i32(i32 [[SLICE:%.*]], [[ZN0:%.*]], [[ZN1:%.*]], [[ZN2:%.*]], [[ZN3:%.*]], [[ZM0:%.*]], [[ZM1:%.*]], [[ZM2:%.*]], [[ZM3:%.*]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[TMP1]], 0 +; CHECK-NEXT: [[SLICE_7:%.*]] = add i32 [[SLICE]], 7 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv4i32(i32 [[SLICE_7]], [[ZN0]], [[ZN1]], [[ZN2]], [[ZN3]], [[ZM0]], [[ZM1]], [[ZM2]], [[ZM3]]) +; CHECK-NEXT: ret void +; %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) sanitize_memory { @@ -109,6 +254,27 @@ define void @multi_vector_add_write_za_vg1x4_i32(i32 %slice, } define void @multi_vector_add_write_za_vg1x4_i64(i32 %slice, %zn0, %zn1, +; CHECK-LABEL: @multi_vector_add_write_za_vg1x4_i64( +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv2i64(i32 [[SLICE:%.*]], [[ZN0:%.*]], [[ZN1:%.*]], [[ZN2:%.*]], [[ZN3:%.*]], [[ZM0:%.*]], [[ZM1:%.*]], [[ZM2:%.*]], [[ZM3:%.*]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[TMP1]], 0 +; CHECK-NEXT: [[SLICE_7:%.*]] = add i32 [[SLICE]], 7 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv2i64(i32 [[SLICE_7]], [[ZN0]], [[ZN1]], [[ZN2]], [[ZN3]], [[ZM0]], [[ZM1]], [[ZM2]], [[ZM3]]) +; CHECK-NEXT: ret void +; %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) sanitize_memory { @@ -127,6 +293,27 @@ define void @multi_vector_add_write_za_vg1x4_i64(i32 %slice, } define void @multi_vector_add_za_vg1x2_i32(i32 %slice, %zn0, %zn1) sanitize_memory { +; CHECK-LABEL: @multi_vector_add_za_vg1x2_i32( +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4i32(i32 [[SLICE:%.*]], [[ZN0:%.*]], [[ZN1:%.*]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[TMP1]], 0 +; CHECK-NEXT: [[SLICE_7:%.*]] = add i32 [[SLICE]], 7 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4i32(i32 [[SLICE_7]], [[ZN0]], [[ZN1]]) +; CHECK-NEXT: ret void +; call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4i32(i32 %slice, %zn0, %zn1) %slice.7 = add i32 %slice, 7 call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4i32(i32 %slice.7, %zn0, %zn1) @@ -134,6 +321,27 @@ define void @multi_vector_add_za_vg1x2_i32(i32 %slice, %zn0, } define void @multi_vector_add_za_vg1x2_i64(i32 %slice, %zn0, %zn1) sanitize_memory { +; CHECK-LABEL: @multi_vector_add_za_vg1x2_i64( +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2i64(i32 [[SLICE:%.*]], [[ZN0:%.*]], [[ZN1:%.*]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[TMP1]], 0 +; CHECK-NEXT: [[SLICE_7:%.*]] = add i32 [[SLICE]], 7 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2i64(i32 [[SLICE_7]], [[ZN0]], [[ZN1]]) +; CHECK-NEXT: ret void +; call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2i64(i32 %slice, %zn0, %zn1) %slice.7 = add i32 %slice, 7 call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2i64(i32 %slice.7, %zn0, %zn1) @@ -141,6 +349,27 @@ define void @multi_vector_add_za_vg1x2_i64(i32 %slice, %zn0, } define void @multi_vector_add_za_vg1x2_f32(i32 %slice, %zn0, %zn1) sanitize_memory { +; CHECK-LABEL: @multi_vector_add_za_vg1x2_f32( +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4f32(i32 [[SLICE:%.*]], [[ZN0:%.*]], [[ZN1:%.*]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[TMP1]], 0 +; CHECK-NEXT: [[SLICE_7:%.*]] = add i32 [[SLICE]], 7 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4f32(i32 [[SLICE_7]], [[ZN0]], [[ZN1]]) +; CHECK-NEXT: ret void +; call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4f32(i32 %slice, %zn0, %zn1) %slice.7 = add i32 %slice, 7 @@ -150,6 +379,27 @@ define void @multi_vector_add_za_vg1x2_f32(i32 %slice, %zn0 } define void @multi_vector_add_za_vg1x2_f64(i32 %slice, %zn0, %zn1) sanitize_memory { +; CHECK-LABEL: @multi_vector_add_za_vg1x2_f64( +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2f64(i32 [[SLICE:%.*]], [[ZN0:%.*]], [[ZN1:%.*]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[TMP1]], 0 +; CHECK-NEXT: [[SLICE_7:%.*]] = add i32 [[SLICE]], 7 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2f64(i32 [[SLICE_7]], [[ZN0]], [[ZN1]]) +; CHECK-NEXT: ret void +; call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2f64(i32 %slice, %zn0, %zn1) %slice.7 = add i32 %slice, 7 @@ -159,6 +409,36 @@ define void @multi_vector_add_za_vg1x2_f64(i32 %slice, %zn } define void @multi_vector_add_za_vg1x2_f64_tuple(i64 %stride, ptr %ptr) sanitize_memory { +; CHECK-LABEL: @multi_vector_add_za_vg1x2_f64_tuple( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr getelementptr (i8, ptr @__msan_param_tls, i64 8), align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv2f64(target("aarch64.svcount") [[TMP2]], ptr [[PTR:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP5]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP5]], 1 +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[STRIDE:%.*]] +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[_MSPROP]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv2f64(target("aarch64.svcount") [[TMP2]], ptr [[ARRAYIDX2]]) +; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , } [[TMP10]], 0 +; CHECK-NEXT: [[TMP12:%.*]] = extractvalue { , } [[TMP10]], 1 +; CHECK-NEXT: call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2f64(i32 0, [[TMP6]], [[TMP11]]) +; CHECK-NEXT: call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2f64(i32 0, [[TMP7]], [[TMP12]]) +; CHECK-NEXT: ret void +; entry: %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() %1 = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv2f64(target("aarch64.svcount") %0, ptr %ptr) @@ -175,6 +455,27 @@ entry: define void @multi_vector_add_za_vg1x4_i32(i32 %slice, %zn0, %zn1, %zn2, %zn3) sanitize_memory { +; CHECK-LABEL: @multi_vector_add_za_vg1x4_i32( +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4i32(i32 [[SLICE:%.*]], [[ZN0:%.*]], [[ZN1:%.*]], [[ZN2:%.*]], [[ZN3:%.*]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[TMP1]], 0 +; CHECK-NEXT: [[SLICE_7:%.*]] = add i32 [[SLICE]], 7 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4i32(i32 [[SLICE_7]], [[ZN0]], [[ZN1]], [[ZN2]], [[ZN3]]) +; CHECK-NEXT: ret void +; call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4i32(i32 %slice, %zn0, %zn1, %zn2, %zn3) @@ -186,6 +487,27 @@ define void @multi_vector_add_za_vg1x4_i32(i32 %slice, %zn0, } define void @multi_vector_add_za_vg1x4_i64(i32 %slice, %zn0, %zn1, %zn2, %zn3) sanitize_memory { +; CHECK-LABEL: @multi_vector_add_za_vg1x4_i64( +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2i64(i32 [[SLICE:%.*]], [[ZN0:%.*]], [[ZN1:%.*]], [[ZN2:%.*]], [[ZN3:%.*]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[TMP1]], 0 +; CHECK-NEXT: [[SLICE_7:%.*]] = add i32 [[SLICE]], 7 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2i64(i32 [[SLICE_7]], [[ZN0]], [[ZN1]], [[ZN2]], [[ZN3]]) +; CHECK-NEXT: ret void +; call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2i64(i32 %slice, %zn0, %zn1, %zn2, %zn3) @@ -197,6 +519,27 @@ define void @multi_vector_add_za_vg1x4_i64(i32 %slice, %zn0, } define void @multi_vector_add_za_vg1x4_f32(i32 %slice, %zn0, %zn1, %zn2, %zn3) sanitize_memory { +; CHECK-LABEL: @multi_vector_add_za_vg1x4_f32( +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 [[SLICE:%.*]], [[ZN0:%.*]], [[ZN1:%.*]], [[ZN2:%.*]], [[ZN3:%.*]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[TMP1]], 0 +; CHECK-NEXT: [[SLICE_7:%.*]] = add i32 [[SLICE]], 7 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 [[SLICE_7]], [[ZN0]], [[ZN1]], [[ZN2]], [[ZN3]]) +; CHECK-NEXT: ret void +; call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 %slice, %zn0, %zn1, %zn2, %zn3) @@ -208,6 +551,73 @@ define void @multi_vector_add_za_vg1x4_f32(i32 %slice, %zn0 } define void @multi_vector_add_za_vg1x4_f32_tuple(i64 %stride, ptr %ptr) sanitize_memory { +; CHECK-LABEL: @multi_vector_add_za_vg1x4_f32_tuple( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr getelementptr (i8, ptr @__msan_param_tls, i64 8), align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") [[TMP2]], ptr [[PTR:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP5]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP5]], 1 +; CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP5]], 2 +; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP5]], 3 +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[STRIDE:%.*]] +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[_MSPROP]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[TMP12:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") [[TMP2]], ptr [[ARRAYIDX2]]) +; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP12]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { , , , } [[TMP12]], 1 +; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP12]], 2 +; CHECK-NEXT: [[TMP16:%.*]] = extractvalue { , , , } [[TMP12]], 3 +; CHECK-NEXT: [[TMP17:%.*]] = shl i64 [[TMP1]], 1 +; CHECK-NEXT: [[TMP18:%.*]] = or i64 [[TMP17]], 0 +; CHECK-NEXT: [[MUL3:%.*]] = shl i64 [[STRIDE]], 1 +; CHECK-NEXT: [[_MSPROP1:%.*]] = or i64 [[TMP0]], [[TMP18]] +; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[MUL3]] +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[_MSPROP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP4]], label [[TMP19:%.*]], label [[TMP20:%.*]], !prof [[PROF1]] +; CHECK: 19: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 20: +; CHECK-NEXT: [[TMP21:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") [[TMP2]], ptr [[ARRAYIDX4]]) +; CHECK-NEXT: [[TMP22:%.*]] = extractvalue { , , , } [[TMP21]], 0 +; CHECK-NEXT: [[TMP23:%.*]] = extractvalue { , , , } [[TMP21]], 1 +; CHECK-NEXT: [[TMP24:%.*]] = extractvalue { , , , } [[TMP21]], 2 +; CHECK-NEXT: [[TMP25:%.*]] = extractvalue { , , , } [[TMP21]], 3 +; CHECK-NEXT: [[MSPROP_MUL_CST:%.*]] = mul i64 [[TMP1]], 1 +; CHECK-NEXT: [[MUL5:%.*]] = mul i64 [[STRIDE]], 3 +; CHECK-NEXT: [[_MSPROP2:%.*]] = or i64 [[TMP0]], [[MSPROP_MUL_CST]] +; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[MUL5]] +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i64 [[_MSPROP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP5]], label [[TMP26:%.*]], label [[TMP27:%.*]], !prof [[PROF1]] +; CHECK: 26: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 27: +; CHECK-NEXT: [[TMP28:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") [[TMP2]], ptr [[ARRAYIDX6]]) +; CHECK-NEXT: [[TMP29:%.*]] = extractvalue { , , , } [[TMP28]], 0 +; CHECK-NEXT: [[TMP30:%.*]] = extractvalue { , , , } [[TMP28]], 1 +; CHECK-NEXT: [[TMP31:%.*]] = extractvalue { , , , } [[TMP28]], 2 +; CHECK-NEXT: [[TMP32:%.*]] = extractvalue { , , , } [[TMP28]], 3 +; CHECK-NEXT: call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 0, [[TMP6]], [[TMP13]], [[TMP22]], [[TMP29]]) +; CHECK-NEXT: call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 0, [[TMP7]], [[TMP14]], [[TMP23]], [[TMP30]]) +; CHECK-NEXT: call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 0, [[TMP8]], [[TMP15]], [[TMP24]], [[TMP31]]) +; CHECK-NEXT: call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 0, [[TMP9]], [[TMP16]], [[TMP25]], [[TMP32]]) +; CHECK-NEXT: ret void +; entry: %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() %1 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") %0, ptr %ptr) @@ -243,6 +653,27 @@ entry: } define void @multi_vector_add_za_vg1x4_f64(i32 %slice, %zn0, %zn1, %zn2, %zn3) sanitize_memory { +; CHECK-LABEL: @multi_vector_add_za_vg1x4_f64( +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2f64(i32 [[SLICE:%.*]], [[ZN0:%.*]], [[ZN1:%.*]], [[ZN2:%.*]], [[ZN3:%.*]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[TMP1]], 0 +; CHECK-NEXT: [[SLICE_7:%.*]] = add i32 [[SLICE]], 7 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2f64(i32 [[SLICE_7]], [[ZN0]], [[ZN1]], [[ZN2]], [[ZN3]]) +; CHECK-NEXT: ret void +; call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2f64(i32 %slice, %zn0, %zn1, %zn2, %zn3) @@ -255,6 +686,12 @@ define void @multi_vector_add_za_vg1x4_f64(i32 %slice, %zn define { , } @multi_vec_add_single_x2_s8( %unused, %zdn1, %zdn2, %zm) sanitize_memory { +; CHECK-LABEL: @multi_vec_add_single_x2_s8( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[RES:%.*]] = call { , } @llvm.aarch64.sve.add.single.x2.nxv16i8( [[ZDN1:%.*]], [[ZDN2:%.*]], [[ZM:%.*]]) +; CHECK-NEXT: store { , } zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret { , } [[RES]] +; %res = call { , } @llvm.aarch64.sve.add.single.x2.nxv16i8( %zdn1, %zdn2, %zm) @@ -262,6 +699,12 @@ define { , } @multi_vec_add_single_x2_s8(, } @multi_vec_add_single_x2_s16( %unused, %zdn1, %zdn2, %zm) sanitize_memory { +; CHECK-LABEL: @multi_vec_add_single_x2_s16( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[RES:%.*]] = call { , } @llvm.aarch64.sve.add.single.x2.nxv8i16( [[ZDN1:%.*]], [[ZDN2:%.*]], [[ZM:%.*]]) +; CHECK-NEXT: store { , } zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret { , } [[RES]] +; %res = call { , } @llvm.aarch64.sve.add.single.x2.nxv8i16( %zdn1, %zdn2, %zm) @@ -269,6 +712,12 @@ define { , } @multi_vec_add_single_x2_s16(< } define { , } @multi_vec_add_single_x2_s32( %unused, %zdn1, %zdn2, %zm) sanitize_memory { +; CHECK-LABEL: @multi_vec_add_single_x2_s32( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[RES:%.*]] = call { , } @llvm.aarch64.sve.add.single.x2.nxv4i32( [[ZDN1:%.*]], [[ZDN2:%.*]], [[ZM:%.*]]) +; CHECK-NEXT: store { , } zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret { , } [[RES]] +; %res = call { , } @llvm.aarch64.sve.add.single.x2.nxv4i32( %zdn1, %zdn2, %zm) @@ -276,6 +725,12 @@ define { , } @multi_vec_add_single_x2_s32(< } define { , } @multi_vec_add_single_x2_s64( %unused, %zdn1, %zdn2, %zm) sanitize_memory { +; CHECK-LABEL: @multi_vec_add_single_x2_s64( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[RES:%.*]] = call { , } @llvm.aarch64.sve.add.single.x2.nxv2i64( [[ZDN1:%.*]], [[ZDN2:%.*]], [[ZM:%.*]]) +; CHECK-NEXT: store { , } zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret { , } [[RES]] +; %res = call { , } @llvm.aarch64.sve.add.single.x2.nxv2i64( %zdn1, %zdn2, %zm) @@ -284,6 +739,12 @@ define { , } @multi_vec_add_single_x2_s64(< define { , , , } @multi_vec_add_single_x4_s8( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm) sanitize_memory { +; CHECK-LABEL: @multi_vec_add_single_x4_s8( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[RES:%.*]] = call { , , , } @llvm.aarch64.sve.add.single.x4.nxv16i8( [[ZDN1:%.*]], [[ZDN2:%.*]], [[ZDN3:%.*]], [[ZDN4:%.*]], [[ZM:%.*]]) +; CHECK-NEXT: store { , , , } zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret { , , , } [[RES]] +; %res = call { , , , } @llvm.aarch64.sve.add.single.x4.nxv16i8( %zdn1, %zdn2, %zdn3, %zdn4, @@ -292,6 +753,12 @@ define { , , , , , , } @multi_vec_add_x4_single_s16( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm) sanitize_memory { +; CHECK-LABEL: @multi_vec_add_x4_single_s16( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[RES:%.*]] = call { , , , } @llvm.aarch64.sve.add.single.x4.nxv8i16( [[ZDN1:%.*]], [[ZDN2:%.*]], [[ZDN3:%.*]], [[ZDN4:%.*]], [[ZM:%.*]]) +; CHECK-NEXT: store { , , , } zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret { , , , } [[RES]] +; %res = call { , , , } @llvm.aarch64.sve.add.single.x4.nxv8i16( %zdn1, %zdn2, %zdn3, %zdn4, @@ -300,6 +767,12 @@ define { , , , , , , } @multi_vec_add_x4_single_s32( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm) sanitize_memory { +; CHECK-LABEL: @multi_vec_add_x4_single_s32( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[RES:%.*]] = call { , , , } @llvm.aarch64.sve.add.single.x4.nxv4i32( [[ZDN1:%.*]], [[ZDN2:%.*]], [[ZDN3:%.*]], [[ZDN4:%.*]], [[ZM:%.*]]) +; CHECK-NEXT: store { , , , } zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret { , , , } [[RES]] +; %res = call { , , , } @llvm.aarch64.sve.add.single.x4.nxv4i32( %zdn1, %zdn2, %zdn3, %zdn4, @@ -308,6 +781,12 @@ define { , , , , , , } @multi_vec_add_x4_single_s64( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm) sanitize_memory { +; CHECK-LABEL: @multi_vec_add_x4_single_s64( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[RES:%.*]] = call { , , , } @llvm.aarch64.sve.add.single.x4.nxv2i64( [[ZDN1:%.*]], [[ZDN2:%.*]], [[ZDN3:%.*]], [[ZDN4:%.*]], [[ZM:%.*]]) +; CHECK-NEXT: store { , , , } zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret { , , , } [[RES]] +; %res = call { , , , } @llvm.aarch64.sve.add.single.x4.nxv2i64( %zdn1, %zdn2, %zdn3, %zdn4, From e07aef9dde4cc84af8b696b97c294b6497ce667a Mon Sep 17 00:00:00 2001 From: Bruno De Fraine Date: Fri, 24 Oct 2025 23:33:25 +0200 Subject: [PATCH 03/19] [clang][Sema] close IsStandardConversion hole when adding cfi_unchecked_callee (#164592) Commit b194cf1e401a changed this function for the case where attribute `cfi_unchecked_callee` is added in a function conversion. But this introduces a hole (issue #162798), and it seems the change was unnecessary: the preceding `TryFunctionConversion` will already allow adding the `cfi_unchecked_callee` attribute, and will update `FromType` if it succeeds. So we revert the changes to `IsStandardConversion`. We also remove the helper function `AddingCFIUncheckedCallee` which is no longer needed, and simplify the corresponding `DiscardingCFIUncheckedCallee`. Fixes: #162798 --- clang/include/clang/Sema/Sema.h | 5 ---- clang/lib/Sema/SemaChecking.cpp | 30 ++++--------------- clang/lib/Sema/SemaOverload.cpp | 11 +++---- .../cfi-unchecked-callee-attribute.cpp | 1 + 4 files changed, 10 insertions(+), 37 deletions(-) diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 189798f71dbad..52904c72d1cfc 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -2681,11 +2681,6 @@ class Sema final : public SemaBase { /// function without this attribute. bool DiscardingCFIUncheckedCallee(QualType From, QualType To) const; - /// Returns true if `From` is a function or pointer to a function without the - /// `cfi_unchecked_callee` attribute but `To` is a function or pointer to - /// function with this attribute. - bool AddingCFIUncheckedCallee(QualType From, QualType To) const; - /// This function calls Action when it determines that E designates a /// misaligned member due to the packed attribute. This is used to emit /// local diagnostics like in reference binding. diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index f99c01eaad9a5..f4517877b04c8 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -12373,14 +12373,9 @@ static void DiagnoseMixedUnicodeImplicitConversion(Sema &S, const Type *Source, } } -enum CFIUncheckedCalleeChange { - None, - Adding, - Discarding, -}; - -static CFIUncheckedCalleeChange AdjustingCFIUncheckedCallee(QualType From, - QualType To) { +bool Sema::DiscardingCFIUncheckedCallee(QualType From, QualType To) const { + From = Context.getCanonicalType(From); + To = Context.getCanonicalType(To); QualType MaybePointee = From->getPointeeType(); if (!MaybePointee.isNull() && MaybePointee->getAs()) From = MaybePointee; @@ -12392,25 +12387,10 @@ static CFIUncheckedCalleeChange AdjustingCFIUncheckedCallee(QualType From, if (const auto *ToFn = To->getAs()) { if (FromFn->getCFIUncheckedCalleeAttr() && !ToFn->getCFIUncheckedCalleeAttr()) - return Discarding; - if (!FromFn->getCFIUncheckedCalleeAttr() && - ToFn->getCFIUncheckedCalleeAttr()) - return Adding; + return true; } } - return None; -} - -bool Sema::DiscardingCFIUncheckedCallee(QualType From, QualType To) const { - From = Context.getCanonicalType(From); - To = Context.getCanonicalType(To); - return ::AdjustingCFIUncheckedCallee(From, To) == Discarding; -} - -bool Sema::AddingCFIUncheckedCallee(QualType From, QualType To) const { - From = Context.getCanonicalType(From); - To = Context.getCanonicalType(To); - return ::AdjustingCFIUncheckedCallee(From, To) == Adding; + return false; } void Sema::CheckImplicitConversion(Expr *E, QualType T, SourceLocation CC, diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index 1f2511197aa68..37f351174e3d0 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -2532,15 +2532,12 @@ static bool IsStandardConversion(Sema &S, Expr* From, QualType ToType, SCS.setToType(2, FromType); - // If we have not converted the argument type to the parameter type, - // this is a bad conversion sequence, unless we're resolving an overload in C. - // - // Permit conversions from a function without `cfi_unchecked_callee` to a - // function with `cfi_unchecked_callee`. - if (CanonFrom == CanonTo || S.AddingCFIUncheckedCallee(CanonFrom, CanonTo)) + if (CanonFrom == CanonTo) return true; - if ((S.getLangOpts().CPlusPlus || !InOverloadResolution)) + // If we have not converted the argument type to the parameter type, + // this is a bad conversion sequence, unless we're resolving an overload in C. + if (S.getLangOpts().CPlusPlus || !InOverloadResolution) return false; ExprResult ER = ExprResult{From}; diff --git a/clang/test/Frontend/cfi-unchecked-callee-attribute.cpp b/clang/test/Frontend/cfi-unchecked-callee-attribute.cpp index 072f217ff7b19..a5a17dd5a4d82 100644 --- a/clang/test/Frontend/cfi-unchecked-callee-attribute.cpp +++ b/clang/test/Frontend/cfi-unchecked-callee-attribute.cpp @@ -9,6 +9,7 @@ void (*checked_ptr)(void) = unchecked; // expected-warning{{implicit conversion void (CFI_UNCHECKED_CALLEE *unchecked_ptr)(void) = unchecked; void (CFI_UNCHECKED_CALLEE *from_normal)(void) = checked; void (CFI_UNCHECKED_CALLEE *c_no_function_decay)(void) = &unchecked; +void (CFI_UNCHECKED_CALLEE __attribute__((noreturn)) *other_conflict)(void) = &checked; // expected-error{{cannot initialize a variable of type 'void (*)() __attribute__((noreturn)) __attribute__((cfi_unchecked_callee))' with an rvalue of type 'void (*)()'}} void (CFI_UNCHECKED_CALLEE *arr[10])(void); void (*cfi_elem)(void) = arr[1]; // expected-warning{{implicit conversion from 'void (*)() __attribute__((cfi_unchecked_callee))' to 'void (*)()' discards 'cfi_unchecked_callee' attribute}} void (CFI_UNCHECKED_CALLEE *cfi_unchecked_elem)(void) = arr[1]; From bd27abcceedfc60f4598124aa022cd0b766da3d8 Mon Sep 17 00:00:00 2001 From: PiJoules <6019989+PiJoules@users.noreply.github.com> Date: Fri, 24 Oct 2025 14:36:23 -0700 Subject: [PATCH 04/19] =?UTF-8?q?Revert=20"Reapply=20"[clang-format]=20Ann?= =?UTF-8?q?otate=20::operator=20and=20Foo::operator=E2=80=A6=20(#165038)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit … correctly" (#164670)" This reverts commit 50ca1f407801cd268a1c130b9576dfb51fe7f392. Reverting because this leads to the bug on ToT described in https://github.com/llvm/llvm-project/issues/164866. The original fix addresses an old regression which we'd still like to land eventually. See the discussion in https://github.com/llvm/llvm-project/pull/164670 for more context. --- clang/lib/Format/TokenAnnotator.cpp | 10 +++++++--- clang/unittests/Format/TokenAnnotatorTest.cpp | 5 ----- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 1d0dfd0b9c151..a8a9c51952fbd 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -3791,12 +3791,18 @@ static bool isFunctionDeclarationName(const LangOptions &LangOpts, if (Current.is(TT_FunctionDeclarationName)) return true; - if (Current.isNoneOf(tok::identifier, tok::kw_operator)) + if (!Current.Tok.getIdentifierInfo()) return false; const auto *Prev = Current.getPreviousNonComment(); assert(Prev); + if (Prev->is(tok::coloncolon)) + Prev = Prev->Previous; + + if (!Prev) + return false; + const auto &Previous = *Prev; if (const auto *PrevPrev = Previous.getPreviousNonComment(); @@ -3845,8 +3851,6 @@ static bool isFunctionDeclarationName(const LangOptions &LangOpts, // Find parentheses of parameter list. if (Current.is(tok::kw_operator)) { - if (Line.startsWith(tok::kw_friend)) - return true; if (Previous.Tok.getIdentifierInfo() && Previous.isNoneOf(tok::kw_return, tok::kw_co_return)) { return true; diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp index ca99940890984..f3637383a0a65 100644 --- a/clang/unittests/Format/TokenAnnotatorTest.cpp +++ b/clang/unittests/Format/TokenAnnotatorTest.cpp @@ -1129,11 +1129,6 @@ TEST_F(TokenAnnotatorTest, UnderstandsOverloadedOperators) { ASSERT_EQ(Tokens.size(), 7u) << Tokens; // Not TT_FunctionDeclarationName. EXPECT_TOKEN(Tokens[3], tok::kw_operator, TT_Unknown); - - Tokens = annotate("SomeAPI::operator()();"); - ASSERT_EQ(Tokens.size(), 9u) << Tokens; - // Not TT_FunctionDeclarationName. - EXPECT_TOKEN(Tokens[2], tok::kw_operator, TT_Unknown); } TEST_F(TokenAnnotatorTest, OverloadedOperatorInTemplate) { From 8c29bce1e9f03a22b42d11604e7555e16306f2aa Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 24 Oct 2025 22:38:23 +0100 Subject: [PATCH 05/19] [VPlan] Remove SCEVToExpansion mapping (NFC). (#164490) VPlan::SCEVToExpansion isn't needed any longer, as SCEV expansion de-duplication is handled locally in expandSCEVs. PR: https://github.com/llvm/llvm-project/pull/164490 --- llvm/lib/Transforms/Vectorize/VPlan.h | 14 -------------- llvm/lib/Transforms/Vectorize/VPlanUtils.cpp | 3 --- 2 files changed, 17 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index a1ad2dbcba8be..2591df8943752 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -4172,11 +4172,6 @@ class VPlan { /// definitions are VPValues that hold a pointer to their underlying IR. SmallVector VPLiveIns; - /// Mapping from SCEVs to the VPValues representing their expansions. - /// NOTE: This mapping is temporary and will be removed once all users have - /// been modeled in VPlan directly. - DenseMap SCEVToExpansion; - /// Blocks allocated and owned by the VPlan. They will be deleted once the /// VPlan is destroyed. SmallVector CreatedBlocks; @@ -4424,15 +4419,6 @@ class VPlan { LLVM_DUMP_METHOD void dump() const; #endif - VPValue *getSCEVExpansion(const SCEV *S) const { - return SCEVToExpansion.lookup(S); - } - - void addSCEVExpansion(const SCEV *S, VPValue *V) { - assert(!SCEVToExpansion.contains(S) && "SCEV already expanded"); - SCEVToExpansion[S] = V; - } - /// Clone the current VPlan, update all VPValues of the new VPlan and cloned /// recipes to refer to the clones, and return it. VPlan *duplicate(); diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp index 06c3d75e5708c..fe66f133d7c03 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp @@ -32,8 +32,6 @@ bool vputils::onlyScalarValuesUsed(const VPValue *Def) { } VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr) { - if (auto *Expanded = Plan.getSCEVExpansion(Expr)) - return Expanded; VPValue *Expanded = nullptr; if (auto *E = dyn_cast(Expr)) Expanded = Plan.getOrAddLiveIn(E->getValue()); @@ -50,7 +48,6 @@ VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr) { Plan.getEntry()->appendRecipe(Expanded->getDefiningRecipe()); } } - Plan.addSCEVExpansion(Expr, Expanded); return Expanded; } From 825eefe856cb957adf33924a9232d3f7e947e7f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?= =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?= =?UTF-8?q?=E3=83=B3=29?= Date: Fri, 24 Oct 2025 12:24:31 -1000 Subject: [PATCH 06/19] [flang][cuda] Accept scalar expression for bytes in kernel call (#165040) --- flang/include/flang/Parser/parse-tree.h | 4 ++-- flang/lib/Parser/program-parsers.cpp | 2 +- flang/test/Lower/CUDA/cuda-kernel-calls.cuf | 5 +++++ flang/test/Parser/cuf-sanity-common | 1 + flang/test/Parser/cuf-sanity-tree.CUF | 21 +++++++++++++++++++-- 5 files changed, 28 insertions(+), 5 deletions(-) diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index be64ef3770c60..bb47f31060885 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -3274,13 +3274,13 @@ struct FunctionReference { // R1521 call-stmt -> CALL procedure-designator [ chevrons ] // [( [actual-arg-spec-list] )] // (CUDA) chevrons -> <<< * | scalar-expr, scalar-expr [, -// scalar-int-expr [, scalar-int-expr ] ] >>> +// scalar-expr [, scalar-int-expr ] ] >>> struct CallStmt { BOILERPLATE(CallStmt); WRAPPER_CLASS(StarOrExpr, std::optional); struct Chevrons { TUPLE_CLASS_BOILERPLATE(Chevrons); - std::tuple, + std::tuple, std::optional> t; }; diff --git a/flang/lib/Parser/program-parsers.cpp b/flang/lib/Parser/program-parsers.cpp index 92c0a64b39a9d..740dbbfab9db7 100644 --- a/flang/lib/Parser/program-parsers.cpp +++ b/flang/lib/Parser/program-parsers.cpp @@ -484,7 +484,7 @@ constexpr auto starOrExpr{ applyFunction(presentOptional, scalarExpr))}; TYPE_PARSER(extension( "<<<" >> construct(starOrExpr, ", " >> scalarExpr, - maybe("," >> scalarIntExpr), maybe("," >> scalarIntExpr)) / + maybe("," >> scalarExpr), maybe("," >> scalarIntExpr)) / ">>>")) constexpr auto actualArgSpecList{optionalList(actualArgSpec)}; TYPE_CONTEXT_PARSER("CALL statement"_en_US, diff --git a/flang/test/Lower/CUDA/cuda-kernel-calls.cuf b/flang/test/Lower/CUDA/cuda-kernel-calls.cuf index 71e594e4742ec..e0941f74072ba 100644 --- a/flang/test/Lower/CUDA/cuda-kernel-calls.cuf +++ b/flang/test/Lower/CUDA/cuda-kernel-calls.cuf @@ -16,6 +16,7 @@ contains subroutine host() real, device :: a integer(8) :: stream + integer(4) :: nbytes ! CHECK-LABEL: func.func @_QMtest_callPhost() ! CHECK: %[[A:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda, uniq_name = "_QMtest_callFhostEa"} : (!fir.ref) -> (!fir.ref, !fir.ref) @@ -57,6 +58,10 @@ contains call dev_kernel1<<<*,32,0,stream>>>(a) ! CHECK: cuf.kernel_launch @_QMtest_callPdev_kernel1<<<%c-1{{.*}}, %c1{{.*}}, %c1{{.*}}, %c32{{.*}}, %c1{{.*}}, %c1{{.*}}, %c0{{.*}}, %{{.*}} : !fir.ref>>>(%{{.*}}) : (!fir.ref) + call dev_kernel1<<<*, 32, 0.8 * nbytes>>>(a) +! CHECK: %[[MUL:.*]] = arith.mulf %{{.*}}, %{{.*}} fastmath : f32 +! CHECK: %[[BYTES:.*]] = fir.convert %[[MUL]] : (f32) -> i32 +! CHECK: cuf.kernel_launch @_QMtest_callPdev_kernel1<<<%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %[[BYTES]]>>>(%{{.*}}) : (!fir.ref) end end diff --git a/flang/test/Parser/cuf-sanity-common b/flang/test/Parser/cuf-sanity-common index 816e03bed7220..2348c2edf3b73 100644 --- a/flang/test/Parser/cuf-sanity-common +++ b/flang/test/Parser/cuf-sanity-common @@ -43,6 +43,7 @@ module m call globalsub<<<1, 2>>> call globalsub<<<1, 2, 3>>> call globalsub<<<1, 2, 3, 4>>> + call globalsub<<<1, 2, 0.9*10, 4>>> call globalsub<<<*,5>>> allocate(pa(32), pinned = isPinned) end subroutine diff --git a/flang/test/Parser/cuf-sanity-tree.CUF b/flang/test/Parser/cuf-sanity-tree.CUF index 83d7540b8dec5..b4d53f27cf395 100644 --- a/flang/test/Parser/cuf-sanity-tree.CUF +++ b/flang/test/Parser/cuf-sanity-tree.CUF @@ -178,7 +178,7 @@ include "cuf-sanity-common" !CHECK: | | | | | | | LiteralConstant -> IntLiteralConstant = '1' !CHECK: | | | | | | Scalar -> Expr = '2_4' !CHECK: | | | | | | | LiteralConstant -> IntLiteralConstant = '2' -!CHECK: | | | | | | Scalar -> Integer -> Expr = '3_4' +!CHECK: | | | | | | Scalar -> Expr = '3_4' !CHECK: | | | | | | | LiteralConstant -> IntLiteralConstant = '3' !CHECK: | | | | ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> CallStmt = 'CALL globalsub<<<1_4,2_4,3_4,4_4>>>()' !CHECK: | | | | | Call @@ -188,10 +188,27 @@ include "cuf-sanity-common" !CHECK: | | | | | | | LiteralConstant -> IntLiteralConstant = '1' !CHECK: | | | | | | Scalar -> Expr = '2_4' !CHECK: | | | | | | | LiteralConstant -> IntLiteralConstant = '2' -!CHECK: | | | | | | Scalar -> Integer -> Expr = '3_4' +!CHECK: | | | | | | Scalar -> Expr = '3_4' !CHECK: | | | | | | | LiteralConstant -> IntLiteralConstant = '3' !CHECK: | | | | | | Scalar -> Integer -> Expr = '4_4' !CHECK: | | | | | | | LiteralConstant -> IntLiteralConstant = '4' +!CHECK: | | | | ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> CallStmt = 'CALL globalsub<<<1_4,2_4,9._4,4_4>>>()' +!CHECK: | | | | | Call +!CHECK: | | | | | | ProcedureDesignator -> Name = 'globalsub' +!CHECK: | | | | | Chevrons +!CHECK: | | | | | | StarOrExpr -> Scalar -> Expr = '1_4' +!CHECK: | | | | | | | LiteralConstant -> IntLiteralConstant = '1' +!CHECK: | | | | | | Scalar -> Expr = '2_4' +!CHECK: | | | | | | | LiteralConstant -> IntLiteralConstant = '2' +!CHECK: | | | | | | Scalar -> Expr = '9._4' +!CHECK: | | | | | | | Multiply +!CHECK: | | | | | | | | Expr = '8.9999997615814208984375e-1_4' +!CHECK: | | | | | | | | | LiteralConstant -> RealLiteralConstant +!CHECK: | | | | | | | | | | Real = '0.9' +!CHECK: | | | | | | | | Expr = '10_4' +!CHECK: | | | | | | | | | LiteralConstant -> IntLiteralConstant = '10' +!CHECK: | | | | | | Scalar -> Integer -> Expr = '4_4' +!CHECK: | | | | | | | LiteralConstant -> IntLiteralConstant = '4' !CHECK: | | | | ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> AllocateStmt !CHECK: | | | | | Allocation !CHECK: | | | | | | AllocateObject = 'pa' From fdcbf74a7da4fb074d5c408eb2ec4ed75fb74bf4 Mon Sep 17 00:00:00 2001 From: Baranov Victor Date: Sat, 25 Oct 2025 01:24:55 +0300 Subject: [PATCH 07/19] [Github][CI] Add default gha user for tooling containers (#164294) This would solve https://github.com/llvm/llvm-project/blob/c0073a9170aaa4f3504f7cdf20758176bcb14ac1/.github/workflows/pr-code-format.yml#L28-L34 --- .../containers/github-action-ci-tooling/Dockerfile | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/.github/workflows/containers/github-action-ci-tooling/Dockerfile b/.github/workflows/containers/github-action-ci-tooling/Dockerfile index 9d2aaf6bbd48a..8aaa2e88f2bab 100644 --- a/.github/workflows/containers/github-action-ci-tooling/Dockerfile +++ b/.github/workflows/containers/github-action-ci-tooling/Dockerfile @@ -37,6 +37,14 @@ RUN apt-get update && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* +# Create a new user with id 1001 as that is the user id that +# Github Actions uses to perform the checkout action. +RUN useradd gha -u 1001 -m -s /bin/bash +RUN adduser gha sudo +RUN echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers +# Don't set USER gha right away because we still need to install packages +# as root in 'ci-container-code-format' and 'ci-container-code-lint' containers + FROM base AS ci-container-code-format ARG LLVM_VERSION @@ -51,6 +59,8 @@ ENV PATH=${LLVM_SYSROOT}/bin:${PATH} COPY llvm/utils/git/requirements_formatting.txt requirements_formatting.txt RUN pip install -r requirements_formatting.txt --break-system-packages && \ rm requirements_formatting.txt +USER gha +WORKDIR /home/gha FROM base AS ci-container-code-lint @@ -80,3 +90,5 @@ RUN apt-get update && \ COPY llvm/utils/git/requirements_linting.txt requirements_linting.txt RUN pip install -r requirements_linting.txt --break-system-packages && \ rm requirements_linting.txt +USER gha +WORKDIR /home/gha From 9b80fc39606f6f02b88a21ac29e98a74b0b7426a Mon Sep 17 00:00:00 2001 From: Adrian Prantl Date: Fri, 24 Oct 2025 15:29:04 -0700 Subject: [PATCH 08/19] [lldb] Add missing function call in test (NFC) --- lldb/test/API/macosx/posix_spawn/TestLaunchProcessPosixSpawn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/test/API/macosx/posix_spawn/TestLaunchProcessPosixSpawn.py b/lldb/test/API/macosx/posix_spawn/TestLaunchProcessPosixSpawn.py index 8a321b2ff6324..0f40dfd09c958 100644 --- a/lldb/test/API/macosx/posix_spawn/TestLaunchProcessPosixSpawn.py +++ b/lldb/test/API/macosx/posix_spawn/TestLaunchProcessPosixSpawn.py @@ -40,7 +40,7 @@ def run_arch(self, exe, arch): launch_info = target.GetLaunchInfo() error = lldb.SBError() process = target.Launch(launch_info, error) - self.assertTrue(error.Success, str(error)) + self.assertTrue(error.Success(), str(error)) self.assertState(process.GetState(), lldb.eStateExited) self.assertIn("slice: {}".format(arch), process.GetSTDOUT(1000)) From 4c52c454c0f266a5948b5ba48c597571d1a0040a Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 24 Oct 2025 15:30:27 -0700 Subject: [PATCH 09/19] [ADT] Rename variable names in IndexedMap (NFC) (#164925) This patch renames variable names to conform to the LLVM Coding Standards. The public interface remains the same. --- llvm/include/llvm/ADT/IndexedMap.h | 36 +++++++++++++++--------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/llvm/include/llvm/ADT/IndexedMap.h b/llvm/include/llvm/ADT/IndexedMap.h index 55935a7afdab4..02193c79a6f0c 100644 --- a/llvm/include/llvm/ADT/IndexedMap.h +++ b/llvm/include/llvm/ADT/IndexedMap.h @@ -43,40 +43,40 @@ class IndexedMap { // is trivially copyable. using StorageT = SmallVector; - StorageT storage_; - T nullVal_ = T(); - ToIndexT toIndex_; + StorageT Storage; + T NullVal = T(); + ToIndexT ToIndex; public: IndexedMap() = default; - explicit IndexedMap(const T &val) : nullVal_(val) {} + explicit IndexedMap(const T &Val) : NullVal(Val) {} - typename StorageT::reference operator[](IndexT n) { - assert(toIndex_(n) < storage_.size() && "index out of bounds!"); - return storage_[toIndex_(n)]; + typename StorageT::reference operator[](IndexT N) { + assert(ToIndex(N) < Storage.size() && "index out of bounds!"); + return Storage[ToIndex(N)]; } - typename StorageT::const_reference operator[](IndexT n) const { - assert(toIndex_(n) < storage_.size() && "index out of bounds!"); - return storage_[toIndex_(n)]; + typename StorageT::const_reference operator[](IndexT N) const { + assert(ToIndex(N) < Storage.size() && "index out of bounds!"); + return Storage[ToIndex(N)]; } - void reserve(typename StorageT::size_type s) { storage_.reserve(s); } + void reserve(typename StorageT::size_type S) { Storage.reserve(S); } - void resize(typename StorageT::size_type s) { storage_.resize(s, nullVal_); } + void resize(typename StorageT::size_type S) { Storage.resize(S, NullVal); } - void clear() { storage_.clear(); } + void clear() { Storage.clear(); } - void grow(IndexT n) { - unsigned NewSize = toIndex_(n) + 1; - if (NewSize > storage_.size()) + void grow(IndexT N) { + unsigned NewSize = ToIndex(N) + 1; + if (NewSize > Storage.size()) resize(NewSize); } - bool inBounds(IndexT n) const { return toIndex_(n) < storage_.size(); } + bool inBounds(IndexT N) const { return ToIndex(N) < Storage.size(); } - typename StorageT::size_type size() const { return storage_.size(); } + typename StorageT::size_type size() const { return Storage.size(); } }; } // namespace llvm From 30e77152961b2c560127cc8391ca79f002497a09 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 24 Oct 2025 15:30:35 -0700 Subject: [PATCH 10/19] [ADT] Consolidate SparseSetValFunctor implementations (NFC) (#164926) This patch consolidates the two implementations of SparseSetValFunctor with "if constexpr". std::is_same_v is more readable than "KeyT, KeyT" in the template parameter list. --- llvm/include/llvm/ADT/SparseSet.h | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/llvm/include/llvm/ADT/SparseSet.h b/llvm/include/llvm/ADT/SparseSet.h index 4697de097e7eb..2ac23cc4cfa13 100644 --- a/llvm/include/llvm/ADT/SparseSet.h +++ b/llvm/include/llvm/ADT/SparseSet.h @@ -59,24 +59,20 @@ template struct SparseSetValTraits { } }; -/// SparseSetValFunctor - Helper class for selecting SparseSetValTraits. The -/// generic implementation handles ValueT classes which either provide -/// getSparseSetIndex() or specialize SparseSetValTraits<>. +/// SparseSetValFunctor - Helper class for getting a value's index. /// +/// In the generic case, this is done via SparseSetValTraits. When the value +/// type is the same as the key type, the KeyFunctor is used directly. template struct SparseSetValFunctor { unsigned operator()(const ValueT &Val) const { - return SparseSetValTraits::getValIndex(Val); + if constexpr (std::is_same_v) + return KeyFunctorT()(Val); + else + return SparseSetValTraits::getValIndex(Val); } }; -/// SparseSetValFunctor - Helper class for the common case of -/// identity key/value sets. -template -struct SparseSetValFunctor { - unsigned operator()(const KeyT &Key) const { return KeyFunctorT()(Key); } -}; - /// SparseSet - Fast set implementation for objects that can be identified by /// small unsigned keys. /// From 8388a5b3403a4f711890a397ec577a11bb9d5fc3 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 24 Oct 2025 15:30:42 -0700 Subject: [PATCH 11/19] [ADT] Rename identity_cxx20 to identity (#164927) Now that the old llvm::identity has moved into IndexedMap.h under a different name, this patch renames identity_cxx20 to identity. Note that llvm::identity closely models std::identity from C++20. --- llvm/include/llvm/ADT/STLForwardCompat.h | 2 +- llvm/include/llvm/ADT/SparseMultiSet.h | 2 +- llvm/include/llvm/ADT/SparseSet.h | 2 +- llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h | 2 +- llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h | 2 +- llvm/lib/CodeGen/RegAllocFast.cpp | 2 +- llvm/unittests/ADT/STLForwardCompatTest.cpp | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/llvm/include/llvm/ADT/STLForwardCompat.h b/llvm/include/llvm/ADT/STLForwardCompat.h index 9c81981534506..e02694f043fbb 100644 --- a/llvm/include/llvm/ADT/STLForwardCompat.h +++ b/llvm/include/llvm/ADT/STLForwardCompat.h @@ -125,7 +125,7 @@ struct detector>, Op, Args...> { template