diff --git a/clang/include/clang/Sema/SemaHLSL.h b/clang/include/clang/Sema/SemaHLSL.h index 86da323892f98..15edb7e77a22b 100644 --- a/clang/include/clang/Sema/SemaHLSL.h +++ b/clang/include/clang/Sema/SemaHLSL.h @@ -250,15 +250,20 @@ class SemaHLSL : public SemaBase { const RecordType *RT); void checkSemanticAnnotation(FunctionDecl *EntryPoint, const Decl *Param, - const HLSLAppliedSemanticAttr *SemanticAttr); + const HLSLAppliedSemanticAttr *SemanticAttr, + bool IsInput); + bool determineActiveSemanticOnScalar(FunctionDecl *FD, DeclaratorDecl *OutputDecl, DeclaratorDecl *D, SemanticInfo &ActiveSemantic, - llvm::StringSet<> &ActiveInputSemantics); + llvm::StringSet<> &ActiveSemantics, + bool IsInput); + bool determineActiveSemantic(FunctionDecl *FD, DeclaratorDecl *OutputDecl, DeclaratorDecl *D, SemanticInfo &ActiveSemantic, - llvm::StringSet<> &ActiveInputSemantics); + llvm::StringSet<> &ActiveSemantics, + bool IsInput); void processExplicitBindingsOnDecl(VarDecl *D); diff --git a/clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp b/clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp index 551027bb1c8eb..40888e7326659 100644 --- a/clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp @@ -19,18 +19,64 @@ using namespace clang::CIRGen; namespace { struct OpenACCDeclareCleanup final : EHScopeStack::Cleanup { + SourceRange declareRange; mlir::acc::DeclareEnterOp enterOp; - OpenACCDeclareCleanup(mlir::acc::DeclareEnterOp enterOp) : enterOp(enterOp) {} + OpenACCDeclareCleanup(SourceRange declareRange, + mlir::acc::DeclareEnterOp enterOp) + : declareRange(declareRange), enterOp(enterOp) {} + + template + void createOutOp(CIRGenFunction &cgf, InTy inOp) { + if constexpr (std::is_same_v) { + auto outOp = + OutTy::create(cgf.getBuilder(), inOp.getLoc(), inOp, + inOp.getStructured(), inOp.getImplicit(), + llvm::Twine(inOp.getNameAttr()), inOp.getBounds()); + outOp.setDataClause(inOp.getDataClause()); + outOp.setModifiers(inOp.getModifiers()); + } else { + auto outOp = + OutTy::create(cgf.getBuilder(), inOp.getLoc(), inOp, inOp.getVarPtr(), + inOp.getStructured(), inOp.getImplicit(), + llvm::Twine(inOp.getNameAttr()), inOp.getBounds()); + outOp.setDataClause(inOp.getDataClause()); + outOp.setModifiers(inOp.getModifiers()); + } + } void emit(CIRGenFunction &cgf) override { - mlir::acc::DeclareExitOp::create(cgf.getBuilder(), enterOp.getLoc(), - enterOp, {}); + auto exitOp = mlir::acc::DeclareExitOp::create( + cgf.getBuilder(), enterOp.getLoc(), enterOp, {}); - // TODO(OpenACC): Some clauses require that we add info about them to the - // DeclareExitOp. However, we don't have any of those implemented yet, so - // we should add infrastructure here to do that once we have one - // implemented. + // Some data clauses need to be referenced in 'exit', AND need to have an + // operation after the exit. Copy these from the enter operation. + for (mlir::Value val : enterOp.getDataClauseOperands()) { + if (auto copyin = val.getDefiningOp()) { + switch (copyin.getDataClause()) { + default: + cgf.cgm.errorNYI(declareRange, + "OpenACC local declare clause copyin cleanup"); + break; + case mlir::acc::DataClause::acc_copy: + createOutOp(cgf, copyin); + break; + case mlir::acc::DataClause::acc_copyin: + createOutOp(cgf, copyin); + break; + } + } else if (val.getDefiningOp()) { + // Link has no exit clauses, and shouldn't be copied. + continue; + } else if (val.getDefiningOp()) { + // DevicePtr has no exit clauses, and shouldn't be copied. + continue; + } else { + cgf.cgm.errorNYI(declareRange, "OpenACC local declare clause cleanup"); + continue; + } + exitOp.getDataClauseOperandsMutable().append(val); + } } }; } // namespace @@ -45,7 +91,7 @@ void CIRGenFunction::emitOpenACCDeclare(const OpenACCDeclareDecl &d) { d.clauses()); ehStack.pushCleanup(CleanupKind::NormalCleanup, - enterOp); + d.getSourceRange(), enterOp); } void CIRGenFunction::emitOpenACCRoutine(const OpenACCRoutineDecl &d) { diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp index c5c6bcd0153a4..1e7a332d1dc22 100644 --- a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp @@ -800,12 +800,16 @@ class OpenACCClauseCIREmitter final var, mlir::acc::DataClause::acc_copy, clause.getModifierList(), /*structured=*/true, /*implicit=*/false); + } else if constexpr (isOneOfTypes) { + for (const Expr *var : clause.getVarList()) + addDataOperand( + var, mlir::acc::DataClause::acc_copy, clause.getModifierList(), + /*structured=*/true, + /*implicit=*/false); } else if constexpr (isCombinedType) { applyToComputeOp(clause); } else { - // TODO: When we've implemented this for everything, switch this to an - // unreachable. declare construct remains. - return clauseNotImplemented(clause); + llvm_unreachable("Unknown construct kind in VisitCopyClause"); } } @@ -822,12 +826,16 @@ class OpenACCClauseCIREmitter final addDataOperand( var, mlir::acc::DataClause::acc_copyin, clause.getModifierList(), /*structured=*/false, /*implicit=*/false); + } else if constexpr (isOneOfTypes) { + for (const Expr *var : clause.getVarList()) + addDataOperand( + var, mlir::acc::DataClause::acc_copyin, clause.getModifierList(), + /*structured=*/true, + /*implicit=*/false); } else if constexpr (isCombinedType) { applyToComputeOp(clause); } else { - // TODO: When we've implemented this for everything, switch this to an - // unreachable. declare construct remains. - return clauseNotImplemented(clause); + llvm_unreachable("Unknown construct kind in VisitCopyInClause"); } } diff --git a/clang/lib/CodeGen/CGException.cpp b/clang/lib/CodeGen/CGException.cpp index f86af4581c345..e9d20672ce185 100644 --- a/clang/lib/CodeGen/CGException.cpp +++ b/clang/lib/CodeGen/CGException.cpp @@ -450,7 +450,7 @@ void CodeGenFunction::EmitCXXThrowExpr(const CXXThrowExpr *E, // Therefore, we emit a trap which will abort the program, and // prompt a warning indicating that a trap will be emitted. const llvm::Triple &T = Target.getTriple(); - if (CGM.getLangOpts().OpenMPIsTargetDevice && (T.isNVPTX() || T.isAMDGCN())) { + if (CGM.getLangOpts().OpenMPIsTargetDevice && T.isGPU()) { EmitTrapCall(llvm::Intrinsic::trap); return; } @@ -627,7 +627,7 @@ void CodeGenFunction::EmitCXXTryStmt(const CXXTryStmt &S) { // If we encounter a try statement on in an OpenMP target region offloaded to // a GPU, we treat it as a basic block. const bool IsTargetDevice = - (CGM.getLangOpts().OpenMPIsTargetDevice && (T.isNVPTX() || T.isAMDGCN())); + (CGM.getLangOpts().OpenMPIsTargetDevice && T.isGPU()); if (!IsTargetDevice) EnterCXXTryStmt(S); EmitStmt(S.getTryBlock()); diff --git a/clang/lib/CodeGen/CGExprComplex.cpp b/clang/lib/CodeGen/CGExprComplex.cpp index d281c4c20616a..bca7c30557f03 100644 --- a/clang/lib/CodeGen/CGExprComplex.cpp +++ b/clang/lib/CodeGen/CGExprComplex.cpp @@ -320,7 +320,7 @@ class ComplexExprEmitter QualType getPromotionType(FPOptionsOverride Features, QualType Ty, bool IsComplexDivisor) { if (auto *CT = Ty->getAs()) { - QualType ElementType = CT->getElementType(); + QualType ElementType = CT->getElementType().getCanonicalType(); bool IsFloatingType = ElementType->isFloatingType(); bool IsComplexRangePromoted = CGF.getLangOpts().getComplexRange() == LangOptions::ComplexRangeKind::CX_Promoted; diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp index 2a5f3f6895609..f5c07fe2e33ff 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.cpp +++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp @@ -731,13 +731,22 @@ llvm::Value *CGHLSLRuntime::emitSystemSemanticLoad( } if (SemanticName == "SV_POSITION") { - if (CGM.getTriple().getEnvironment() == Triple::EnvironmentType::Pixel) - return createSPIRVBuiltinLoad(B, CGM.getModule(), Type, - Semantic->getAttrName()->getName(), - /* BuiltIn::FragCoord */ 15); + if (CGM.getTriple().getEnvironment() == Triple::EnvironmentType::Pixel) { + if (CGM.getTarget().getTriple().isSPIRV()) + return createSPIRVBuiltinLoad(B, CGM.getModule(), Type, + Semantic->getAttrName()->getName(), + /* BuiltIn::FragCoord */ 15); + if (CGM.getTarget().getTriple().isDXIL()) + return emitDXILUserSemanticLoad(B, Type, Semantic, Index); + } + + if (CGM.getTriple().getEnvironment() == Triple::EnvironmentType::Vertex) { + return emitUserSemanticLoad(B, Type, Decl, Semantic, Index); + } } - llvm_unreachable("non-handled system semantic. FIXME."); + llvm_unreachable( + "Load hasn't been implemented yet for this system semantic. FIXME"); } static void createSPIRVBuiltinStore(IRBuilder<> &B, llvm::Module &M, @@ -760,12 +769,22 @@ void CGHLSLRuntime::emitSystemSemanticStore(IRBuilder<> &B, llvm::Value *Source, std::optional Index) { std::string SemanticName = Semantic->getAttrName()->getName().upper(); - if (SemanticName == "SV_POSITION") - createSPIRVBuiltinStore(B, CGM.getModule(), Source, - Semantic->getAttrName()->getName(), - /* BuiltIn::Position */ 0); - else - llvm_unreachable("non-handled system semantic. FIXME."); + if (SemanticName == "SV_POSITION") { + if (CGM.getTarget().getTriple().isDXIL()) { + emitDXILUserSemanticStore(B, Source, Semantic, Index); + return; + } + + if (CGM.getTarget().getTriple().isSPIRV()) { + createSPIRVBuiltinStore(B, CGM.getModule(), Source, + Semantic->getAttrName()->getName(), + /* BuiltIn::Position */ 0); + return; + } + } + + llvm_unreachable( + "Store hasn't been implemented yet for this system semantic. FIXME"); } llvm::Value *CGHLSLRuntime::handleScalarSemanticLoad( diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index cbc5931390376..9d7c851bead3e 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -4360,8 +4360,7 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args, // Set the flag to prevent the implementation from emitting device exception // handling code for those requiring so. - if ((Opts.OpenMPIsTargetDevice && (T.isNVPTX() || T.isAMDGCN())) || - Opts.OpenCLCPlusPlus) { + if ((Opts.OpenMPIsTargetDevice && T.isGPU()) || Opts.OpenCLCPlusPlus) { Opts.Exceptions = 0; Opts.CXXExceptions = 0; diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 1a2fe0b3c17a7..cd14000c6d3df 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -10743,7 +10743,7 @@ static void DetectPrecisionLossInComplexDivision(Sema &S, QualType DivisorTy, if (!CT) return; - QualType ElementType = CT->getElementType(); + QualType ElementType = CT->getElementType().getCanonicalType(); bool IsComplexRangePromoted = S.getLangOpts().getComplexRange() == LangOptions::ComplexRangeKind::CX_Promoted; if (!ElementType->isFloatingType() || !IsComplexRangePromoted) diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 0a164a7b5bbbd..ecab3946b58c7 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -771,9 +771,12 @@ void SemaHLSL::ActOnTopLevelFunction(FunctionDecl *FD) { } } -bool SemaHLSL::determineActiveSemanticOnScalar( - FunctionDecl *FD, DeclaratorDecl *OutputDecl, DeclaratorDecl *D, - SemanticInfo &ActiveSemantic, llvm::StringSet<> &UsedSemantics) { +bool SemaHLSL::determineActiveSemanticOnScalar(FunctionDecl *FD, + DeclaratorDecl *OutputDecl, + DeclaratorDecl *D, + SemanticInfo &ActiveSemantic, + llvm::StringSet<> &UsedSemantics, + bool IsInput) { if (ActiveSemantic.Semantic == nullptr) { ActiveSemantic.Semantic = D->getAttr(); if (ActiveSemantic.Semantic) @@ -792,7 +795,7 @@ bool SemaHLSL::determineActiveSemanticOnScalar( if (!A) return false; - checkSemanticAnnotation(FD, D, A); + checkSemanticAnnotation(FD, D, A, IsInput); OutputDecl->addAttr(A); unsigned Location = ActiveSemantic.Index.value_or(0); @@ -820,7 +823,8 @@ bool SemaHLSL::determineActiveSemantic(FunctionDecl *FD, DeclaratorDecl *OutputDecl, DeclaratorDecl *D, SemanticInfo &ActiveSemantic, - llvm::StringSet<> &UsedSemantics) { + llvm::StringSet<> &UsedSemantics, + bool IsInput) { if (ActiveSemantic.Semantic == nullptr) { ActiveSemantic.Semantic = D->getAttr(); if (ActiveSemantic.Semantic) @@ -833,12 +837,13 @@ bool SemaHLSL::determineActiveSemantic(FunctionDecl *FD, const RecordType *RT = dyn_cast(T); if (!RT) return determineActiveSemanticOnScalar(FD, OutputDecl, D, ActiveSemantic, - UsedSemantics); + UsedSemantics, IsInput); const RecordDecl *RD = RT->getDecl(); for (FieldDecl *Field : RD->fields()) { SemanticInfo Info = ActiveSemantic; - if (!determineActiveSemantic(FD, OutputDecl, Field, Info, UsedSemantics)) { + if (!determineActiveSemantic(FD, OutputDecl, Field, Info, UsedSemantics, + IsInput)) { Diag(Field->getLocation(), diag::note_hlsl_semantic_used_here) << Field; return false; } @@ -920,7 +925,7 @@ void SemaHLSL::CheckEntryPoint(FunctionDecl *FD) { // FIXME: Verify output semantics in parameters. if (!determineActiveSemantic(FD, Param, Param, ActiveSemantic, - ActiveInputSemantics)) { + ActiveInputSemantics, /* IsInput= */ true)) { Diag(Param->getLocation(), diag::note_previous_decl) << Param; FD->setInvalidDecl(); } @@ -932,12 +937,13 @@ void SemaHLSL::CheckEntryPoint(FunctionDecl *FD) { if (ActiveSemantic.Semantic) ActiveSemantic.Index = ActiveSemantic.Semantic->getSemanticIndex(); if (!FD->getReturnType()->isVoidType()) - determineActiveSemantic(FD, FD, FD, ActiveSemantic, ActiveOutputSemantics); + determineActiveSemantic(FD, FD, FD, ActiveSemantic, ActiveOutputSemantics, + /* IsInput= */ false); } void SemaHLSL::checkSemanticAnnotation( FunctionDecl *EntryPoint, const Decl *Param, - const HLSLAppliedSemanticAttr *SemanticAttr) { + const HLSLAppliedSemanticAttr *SemanticAttr, bool IsInput) { auto *ShaderAttr = EntryPoint->getAttr(); assert(ShaderAttr && "Entry point has no shader attribute"); llvm::Triple::EnvironmentType ST = ShaderAttr->getType(); @@ -961,11 +967,12 @@ void SemaHLSL::checkSemanticAnnotation( } if (SemanticName == "SV_POSITION") { - // TODO(#143523): allow use on other shader types & output once the overall - // semantic logic is implemented. - if (ST == llvm::Triple::Pixel) + // SV_Position can be an input or output in vertex shaders, + // but only an input in pixel shaders. + if (ST == llvm::Triple::Vertex || (ST == llvm::Triple::Pixel && IsInput)) return; - DiagnoseAttrStageMismatch(SemanticAttr, ST, {llvm::Triple::Pixel}); + DiagnoseAttrStageMismatch(SemanticAttr, ST, + {llvm::Triple::Pixel, llvm::Triple::Vertex}); return; } diff --git a/clang/test/AST/HLSL/semantic-input-struct-shadow.hlsl b/clang/test/AST/HLSL/semantic-input-struct-shadow.hlsl new file mode 100644 index 0000000000000..d4d89bd5d26ba --- /dev/null +++ b/clang/test/AST/HLSL/semantic-input-struct-shadow.hlsl @@ -0,0 +1,21 @@ +// RUN: %clang_cc1 -triple spirv-unknown-vulkan1.3-vertex -finclude-default-header -ast-dump -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.8-vertex -finclude-default-header -ast-dump -o - %s | FileCheck %s + + +// CHECK: CXXRecordDecl {{.*}} referenced struct S definition +// CHECK: FieldDecl {{.*}} field1 'int' +// CHECK-NEXT: HLSLParsedSemanticAttr {{.*}} "A" 0 +// CHECK: FieldDecl {{.*}} field2 'int' +// CHECK-NEXT: HLSLParsedSemanticAttr {{.*}} "B" 4 + +struct S { + int field1 : A; + int field2 : B4; +}; + +// CHECK: FunctionDecl {{.*}} main 'void (S)' +// CHECK-NEXT: ParmVarDecl {{.*}} s 'S' +// CHECK-NEXT: HLSLParsedSemanticAttr {{.*}} "C" 0 +// CHECK-NEXT: HLSLAppliedSemanticAttr {{.*}} "C" 0 +// CHECK-NEXT: HLSLAppliedSemanticAttr {{.*}} "C" 1 +void main(S s : C) {} diff --git a/clang/test/AST/HLSL/semantic-input-struct.hlsl b/clang/test/AST/HLSL/semantic-input-struct.hlsl new file mode 100644 index 0000000000000..d71fdcff631f4 --- /dev/null +++ b/clang/test/AST/HLSL/semantic-input-struct.hlsl @@ -0,0 +1,20 @@ +// RUN: %clang_cc1 -triple spirv-unknown-vulkan1.3-vertex -finclude-default-header -ast-dump -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.8-vertex -finclude-default-header -ast-dump -o - %s | FileCheck %s + + +// CHECK: CXXRecordDecl {{.*}} referenced struct S definition +// CHECK: FieldDecl {{.*}} field1 'int' +// CHECK-NEXT: HLSLParsedSemanticAttr {{.*}} "A" 0 +// CHECK: FieldDecl {{.*}} field2 'int' +// CHECK-NEXT: HLSLParsedSemanticAttr {{.*}} "B" 4 + +struct S { + int field1 : A; + int field2 : B4; +}; + +// CHECK: FunctionDecl {{.*}} main 'void (S)' +// CHECK-NEXT: ParmVarDecl {{.*}} s 'S' +// CHECK-NEXT: HLSLAppliedSemanticAttr {{.*}} "A" 0 +// CHECK-NEXT: HLSLAppliedSemanticAttr {{.*}} "B" 4 +void main(S s) {} diff --git a/clang/test/AST/HLSL/semantic-input.hlsl b/clang/test/AST/HLSL/semantic-input.hlsl new file mode 100644 index 0000000000000..4dc3ab9db7392 --- /dev/null +++ b/clang/test/AST/HLSL/semantic-input.hlsl @@ -0,0 +1,9 @@ +// RUN: %clang_cc1 -triple spirv-unknown-vulkan1.3-vertex -finclude-default-header -ast-dump -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.8-vertex -finclude-default-header -ast-dump -o - %s | FileCheck %s + +// CHECK: ParmVarDecl {{.*}} a 'float4':'vector' +// CHECK-NEXT: HLSLParsedSemanticAttr {{.*}} "ABC" 0 +// CHECK-NEXT: HLSLAppliedSemanticAttr {{.*}} "ABC" 0 + +void main(float4 a : ABC) { +} diff --git a/clang/test/AST/HLSL/semantic-output-struct-shadow.hlsl b/clang/test/AST/HLSL/semantic-output-struct-shadow.hlsl new file mode 100644 index 0000000000000..e83901bb17943 --- /dev/null +++ b/clang/test/AST/HLSL/semantic-output-struct-shadow.hlsl @@ -0,0 +1,23 @@ +// RUN: %clang_cc1 -triple spirv-unknown-vulkan1.3-vertex -finclude-default-header -ast-dump -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.8-vertex -finclude-default-header -ast-dump -o - %s | FileCheck %s + + +// CHECK: CXXRecordDecl {{.*}} referenced struct S definition +// CHECK: FieldDecl {{.*}} referenced field1 'int' +// CHECK-NEXT: HLSLParsedSemanticAttr {{.*}} "A" 0 +// CHECK: FieldDecl {{.*}} referenced field2 'int' +// CHECK-NEXT: HLSLParsedSemanticAttr {{.*}} "B" 4 + +struct S { + int field1 : A; + int field2 : B4; +}; + +// CHECK: FunctionDecl {{.*}} main 'S ()' +// CHECK: HLSLParsedSemanticAttr {{.*}} "DEF" 0 +// CHECK: HLSLAppliedSemanticAttr {{.*}} "DEF" 0 +// CHECK-NEXT: HLSLAppliedSemanticAttr {{.*}} "DEF" 1 +S main() : DEF { + S tmp; + return tmp; +} diff --git a/clang/test/AST/HLSL/semantic-output-struct.hlsl b/clang/test/AST/HLSL/semantic-output-struct.hlsl new file mode 100644 index 0000000000000..727c0f3040641 --- /dev/null +++ b/clang/test/AST/HLSL/semantic-output-struct.hlsl @@ -0,0 +1,22 @@ +// RUN: %clang_cc1 -triple spirv-unknown-vulkan1.3-vertex -finclude-default-header -ast-dump -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.8-vertex -finclude-default-header -ast-dump -o - %s | FileCheck %s + + +// CHECK: CXXRecordDecl {{.*}} referenced struct S definition +// CHECK: FieldDecl {{.*}} referenced field1 'int' +// CHECK-NEXT: HLSLParsedSemanticAttr {{.*}} "A" 0 +// CHECK: FieldDecl {{.*}} referenced field2 'int' +// CHECK-NEXT: HLSLParsedSemanticAttr {{.*}} "B" 4 + +struct S { + int field1 : A; + int field2 : B4; +}; + +// CHECK: FunctionDecl {{.*}} main 'S ()' +// CHECK: HLSLAppliedSemanticAttr {{.*}} "A" 0 +// CHECK-NEXT: HLSLAppliedSemanticAttr {{.*}} "B" 4 +S main() { + S tmp; + return tmp; +} diff --git a/clang/test/AST/HLSL/semantic-output.hlsl b/clang/test/AST/HLSL/semantic-output.hlsl new file mode 100644 index 0000000000000..63429387f8d66 --- /dev/null +++ b/clang/test/AST/HLSL/semantic-output.hlsl @@ -0,0 +1,9 @@ +// RUN: %clang_cc1 -triple spirv-unknown-vulkan1.3-vertex -finclude-default-header -ast-dump -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.8-vertex -finclude-default-header -ast-dump -o - %s | FileCheck %s + +// CHECK: FunctionDecl {{.*}} main 'uint ()' +// CHECK: HLSLParsedSemanticAttr {{.*}} "ABC" 0 +// CHECK: HLSLAppliedSemanticAttr {{.*}} "ABC" 0 +uint main() : ABC { + return 0; +} diff --git a/clang/test/CIR/CodeGenOpenACC/declare-copy.cpp b/clang/test/CIR/CodeGenOpenACC/declare-copy.cpp new file mode 100644 index 0000000000000..a8a9115a21b29 --- /dev/null +++ b/clang/test/CIR/CodeGenOpenACC/declare-copy.cpp @@ -0,0 +1,199 @@ +// RUN: %clang_cc1 -fopenacc -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir %s -o - | FileCheck %s + +struct HasSideEffects { + HasSideEffects(); + ~HasSideEffects(); +}; + +// TODO: OpenACC: Implement 'global', NS lowering. + +struct Struct { + static const HasSideEffects StaticMemHSE; + static const HasSideEffects StaticMemHSEArr[5]; + static const int StaticMemInt; + + // TODO: OpenACC: Implement static-local lowering. + + void MemFunc1(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEPtr) { + // CHECK: cir.func {{.*}}MemFunc1{{.*}}(%{{.*}}: !cir.ptr{{.*}}, %[[ARG_HSE:.*]]: !rec_HasSideEffects{{.*}}, %[[ARG_INT:.*]]: !s32i {{.*}}, %[[ARG_HSE_PTR:.*]]: !cir.ptr{{.*}}) + // CHECK-NEXT: cir.alloca{{.*}}["this" + // CHECK-NEXT: %[[ARG_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["ArgHSE" + // CHECK-NEXT: %[[ARG_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["ArgInt + // CHECK-NEXT: %[[ARG_HSE_PTR_ALLOCA:.*]] = cir.alloca !cir.ptr{{.*}}["ArgHSEPtr" + // CHECK-NEXT: %[[LOC_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["LocalHSE + // CHECK-NEXT: %[[LOC_HSE_ARR_ALLOCA:.*]] = cir.alloca !cir.array{{.*}}["LocalHSEArr + // CHECK-NEXT: %[[LOC_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["LocalInt + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.load + + HasSideEffects LocalHSE; + // CHECK-NEXT: cir.call{{.*}} : (!cir.ptr) -> () + HasSideEffects LocalHSEArr[5]; + int LocalInt; + +#pragma acc declare copy(always:ArgHSE, ArgInt, LocalHSE, LocalInt, ArgHSEPtr[1:1], LocalHSEArr[1:1]) + // CHECK: %[[ARG_HSE_COPYIN:.*]] = acc.copyin varPtr(%[[ARG_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, modifiers = #acc, name = "ArgHSE"} + // CHECK-NEXT: %[[ARG_INT_COPYIN:.*]] = acc.copyin varPtr(%[[ARG_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, modifiers = #acc, name = "ArgInt"} + // CHECK-NEXT: %[[LOC_HSE_COPYIN:.*]] = acc.copyin varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, modifiers = #acc, name = "LocalHSE"} + // CHECK-NEXT: %[[LOC_INT_COPYIN:.*]] = acc.copyin varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, modifiers = #acc, name = "LocalInt"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) + // CHECK-NEXT: %[[ARG_HSE_PTR_COPYIN:.*]] = acc.copyin varPtr(%[[ARG_HSE_PTR_ALLOCA]] : !cir.ptr>) bounds(%[[BOUND1]]) -> !cir.ptr> {dataClause = #acc, modifiers = #acc, name = "ArgHSEPtr[1:1]"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) + // CHECK-NEXT: %[[LOC_HSE_ARR_COPYIN:.*]] = acc.copyin varPtr(%[[LOC_HSE_ARR_ALLOCA]] : !cir.ptr>) bounds(%[[BOUND2]]) -> !cir.ptr> {dataClause = #acc, modifiers = #acc, name = "LocalHSEArr[1:1]"} + // CHECK-NEXT: %[[ENTER:.*]] = acc.declare_enter dataOperands(%[[ARG_HSE_COPYIN]], %[[ARG_INT_COPYIN]], %[[LOC_HSE_COPYIN]], %[[LOC_INT_COPYIN]], %[[ARG_HSE_PTR_COPYIN]], %[[LOC_HSE_ARR_COPYIN]] : !cir.ptr, !cir.ptr, !cir.ptr, !cir.ptr, !cir.ptr>, !cir.ptr>) + // + // CHECK-NEXT: acc.declare_exit token(%[[ENTER]]) dataOperands(%[[ARG_HSE_COPYIN]], %[[ARG_INT_COPYIN]], %[[LOC_HSE_COPYIN]], %[[LOC_INT_COPYIN]], %[[ARG_HSE_PTR_COPYIN]], %[[LOC_HSE_ARR_COPYIN]] : !cir.ptr, !cir.ptr, !cir.ptr, !cir.ptr, !cir.ptr>, !cir.ptr>) + // CHECK-NEXT: acc.copyout accPtr(%[[ARG_HSE_COPYIN]] : !cir.ptr) to varPtr(%[[ARG_HSE_ALLOCA]] : !cir.ptr) {dataClause = #acc, modifiers = #acc, name = "ArgHSE"} + // CHECK-NEXT: acc.copyout accPtr(%[[ARG_INT_COPYIN]] : !cir.ptr) to varPtr(%[[ARG_INT_ALLOCA]] : !cir.ptr) {dataClause = #acc, modifiers = #acc, name = "ArgInt"} + // CHECK-NEXT: acc.copyout accPtr(%[[LOC_HSE_COPYIN]] : !cir.ptr) to varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr) {dataClause = #acc, modifiers = #acc, name = "LocalHSE"} + // CHECK-NEXT: acc.copyout accPtr(%[[LOC_INT_COPYIN]] : !cir.ptr) to varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr) {dataClause = #acc, modifiers = #acc, name = "LocalInt"} + // CHECK-NEXT: acc.copyout accPtr(%[[ARG_HSE_PTR_COPYIN]] : !cir.ptr>) bounds(%[[BOUND1]]) to varPtr(%[[ARG_HSE_PTR_ALLOCA]] : !cir.ptr>) {dataClause = #acc, modifiers = #acc, name = "ArgHSEPtr[1:1]"} + // CHECK-NEXT: acc.copyout accPtr(%[[LOC_HSE_ARR_COPYIN]] : !cir.ptr>) bounds(%[[BOUND2]]) to varPtr(%[[LOC_HSE_ARR_ALLOCA]] : !cir.ptr>) {dataClause = #acc, modifiers = #acc, name = "LocalHSEArr[1:1]"} + } + void MemFunc2(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEPtr); +}; + +void use() { + Struct s; + s.MemFunc1(HasSideEffects{}, 0, nullptr); +} + +void Struct::MemFunc2(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEPtr) { + // CHECK: cir.func {{.*}}MemFunc2{{.*}}(%{{.*}}: !cir.ptr{{.*}}, %[[ARG_HSE:.*]]: !rec_HasSideEffects{{.*}}, %[[ARG_INT:.*]]: !s32i {{.*}}, %[[ARG_HSE_PTR:.*]]: !cir.ptr{{.*}}) + // CHECK-NEXT: cir.alloca{{.*}}["this" + // CHECK-NEXT: %[[ARG_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["ArgHSE" + // CHECK-NEXT: %[[ARG_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["ArgInt + // CHECK-NEXT: %[[ARG_HSE_PTR_ALLOCA:.*]] = cir.alloca !cir.ptr{{.*}}["ArgHSEPtr" + // CHECK-NEXT: %[[LOC_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["LocalHSE + // CHECK-NEXT: %[[LOC_HSE_ARR_ALLOCA:.*]] = cir.alloca !cir.array{{.*}}["LocalHSEArr + // CHECK-NEXT: %[[LOC_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["LocalInt + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.load + HasSideEffects LocalHSE; + // CHECK-NEXT: cir.call{{.*}} : (!cir.ptr) -> () + HasSideEffects LocalHSEArr[5]; + // CHECK: do { + // CHECK: } while { + // CHECK: } + int LocalInt; +#pragma acc declare copy(alwaysin:ArgHSE, ArgInt, ArgHSEPtr[1:1]) + // CHECK: %[[ARG_HSE_COPYIN:.*]] = acc.copyin varPtr(%[[ARG_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, modifiers = #acc, name = "ArgHSE"} + // CHECK-NEXT: %[[ARG_INT_COPYIN:.*]] = acc.copyin varPtr(%[[ARG_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, modifiers = #acc, name = "ArgInt"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) + // CHECK-NEXT: %[[ARG_HSE_PTR_COPYIN:.*]] = acc.copyin varPtr(%[[ARG_HSE_PTR_ALLOCA]] : !cir.ptr>) bounds(%[[BOUND1]]) -> !cir.ptr> {dataClause = #acc, modifiers = #acc, name = "ArgHSEPtr[1:1]"} + // CHECK-NEXT: %[[ENTER1:.*]] = acc.declare_enter dataOperands(%[[ARG_HSE_COPYIN]], %[[ARG_INT_COPYIN]], %[[ARG_HSE_PTR_COPYIN]] : !cir.ptr, !cir.ptr, !cir.ptr>) + +#pragma acc declare copy(alwaysout:LocalHSE, LocalInt, LocalHSEArr[1:1]) + // CHECK-NEXT: %[[LOC_HSE_COPYIN:.*]] = acc.copyin varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, modifiers = #acc, name = "LocalHSE"} + // CHECK-NEXT: %[[LOC_INT_COPYIN:.*]] = acc.copyin varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, modifiers = #acc, name = "LocalInt"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) + // CHECK-NEXT: %[[LOC_HSE_ARR_COPYIN:.*]] = acc.copyin varPtr(%[[LOC_HSE_ARR_ALLOCA]] : !cir.ptr>) bounds(%[[BOUND2]]) -> !cir.ptr> {dataClause = #acc, modifiers = #acc, name = "LocalHSEArr[1:1]"} + // CHECK-NEXT: %[[ENTER2:.*]] = acc.declare_enter dataOperands(%[[LOC_HSE_COPYIN]], %[[LOC_INT_COPYIN]], %[[LOC_HSE_ARR_COPYIN]] : !cir.ptr, !cir.ptr, !cir.ptr>) + + // CHECK-NEXT: acc.declare_exit token(%[[ENTER2]]) dataOperands(%[[LOC_HSE_COPYIN]], %[[LOC_INT_COPYIN]], %[[LOC_HSE_ARR_COPYIN]] : !cir.ptr, !cir.ptr, !cir.ptr>) + // CHECK-NEXT: acc.copyout accPtr(%[[LOC_HSE_COPYIN]] : !cir.ptr) to varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr) {dataClause = #acc, modifiers = #acc, name = "LocalHSE"} + // CHECK-NEXT: acc.copyout accPtr(%[[LOC_INT_COPYIN]] : !cir.ptr) to varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr) {dataClause = #acc, modifiers = #acc, name = "LocalInt"} + // CHECK-NEXT: acc.copyout accPtr(%[[LOC_HSE_ARR_COPYIN]] : !cir.ptr>) bounds(%[[BOUND2]]) to varPtr(%[[LOC_HSE_ARR_ALLOCA]] : !cir.ptr>) {dataClause = #acc, modifiers = #acc, name = "LocalHSEArr[1:1]"} + // + // CHECK-NEXT: acc.declare_exit token(%[[ENTER1]]) dataOperands(%[[ARG_HSE_COPYIN]], %[[ARG_INT_COPYIN]], %[[ARG_HSE_PTR_COPYIN]] : !cir.ptr, !cir.ptr, !cir.ptr>) + // CHECK-NEXT: acc.copyout accPtr(%[[ARG_HSE_COPYIN]] : !cir.ptr) to varPtr(%[[ARG_HSE_ALLOCA]] : !cir.ptr) {dataClause = #acc, modifiers = #acc, name = "ArgHSE"} + // CHECK-NEXT: acc.copyout accPtr(%[[ARG_INT_COPYIN]] : !cir.ptr) to varPtr(%[[ARG_INT_ALLOCA]] : !cir.ptr) {dataClause = #acc, modifiers = #acc, name = "ArgInt"} + // CHECK-NEXT: acc.copyout accPtr(%[[ARG_HSE_PTR_COPYIN]] : !cir.ptr>) bounds(%[[BOUND1]]) to varPtr(%[[ARG_HSE_PTR_ALLOCA]] : !cir.ptr>) {dataClause = #acc, modifiers = #acc, name = "ArgHSEPtr[1:1]"} +} + +extern "C" void do_thing(); + +extern "C" void NormalFunc(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEPtr) { + // CHECK: cir.func {{.*}}NormalFunc(%[[ARG_HSE:.*]]: !rec_HasSideEffects{{.*}}, %[[ARG_INT:.*]]: !s32i {{.*}}, %[[ARG_HSE_PTR:.*]]: !cir.ptr{{.*}}) + // CHECK-NEXT: %[[ARG_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["ArgHSE" + // CHECK-NEXT: %[[ARG_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["ArgInt + // CHECK-NEXT: %[[ARG_HSE_PTR_ALLOCA:.*]] = cir.alloca !cir.ptr{{.*}}["ArgHSEPtr" + // CHECK-NEXT: %[[LOC_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["LocalHSE + // CHECK-NEXT: %[[LOC_HSE_ARR_ALLOCA:.*]] = cir.alloca !cir.array{{.*}}["LocalHSEArr + // CHECK-NEXT: %[[LOC_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["LocalInt + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + HasSideEffects LocalHSE; + // CHECK-NEXT: cir.call{{.*}} : (!cir.ptr) -> () + HasSideEffects LocalHSEArr[5]; + // CHECK: do { + // CHECK: } while { + // CHECK: } + int LocalInt; +#pragma acc declare copy(capture:ArgHSE, ArgInt, ArgHSEPtr[1:1]) + // CHECK: %[[ARG_HSE_COPYIN:.*]] = acc.copyin varPtr(%[[ARG_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, modifiers = #acc, name = "ArgHSE"} + // CHECK-NEXT: %[[ARG_INT_COPYIN:.*]] = acc.copyin varPtr(%[[ARG_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, modifiers = #acc, name = "ArgInt"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) + // CHECK-NEXT: %[[ARG_HSE_PTR_COPYIN:.*]] = acc.copyin varPtr(%[[ARG_HSE_PTR_ALLOCA]] : !cir.ptr>) bounds(%[[BOUND1]]) -> !cir.ptr> {dataClause = #acc, modifiers = #acc, name = "ArgHSEPtr[1:1]"} + // CHECK-NEXT: %[[ENTER1:.*]] = acc.declare_enter dataOperands(%[[ARG_HSE_COPYIN]], %[[ARG_INT_COPYIN]], %[[ARG_HSE_PTR_COPYIN]] : !cir.ptr, !cir.ptr, !cir.ptr>) + { + // CHECK-NEXT: cir.scope { +#pragma acc declare copy(LocalHSE, LocalInt, LocalHSEArr[1:1]) + // CHECK-NEXT: %[[LOC_HSE_COPYIN:.*]] = acc.copyin varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, name = "LocalHSE"} + // CHECK-NEXT: %[[LOC_INT_COPYIN:.*]] = acc.copyin varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, name = "LocalInt"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) + // CHECK-NEXT: %[[LOC_HSE_ARR_COPYIN:.*]] = acc.copyin varPtr(%[[LOC_HSE_ARR_ALLOCA]] : !cir.ptr>) bounds(%[[BOUND2]]) -> !cir.ptr> {dataClause = #acc, name = "LocalHSEArr[1:1]"} + // CHECK-NEXT: %[[ENTER2:.*]] = acc.declare_enter dataOperands(%[[LOC_HSE_COPYIN]], %[[LOC_INT_COPYIN]], %[[LOC_HSE_ARR_COPYIN]] : !cir.ptr, !cir.ptr, !cir.ptr>) + + do_thing(); + // CHECK-NEXT: cir.call @do_thing + // CHECK-NEXT: acc.declare_exit token(%[[ENTER2]]) dataOperands(%[[LOC_HSE_COPYIN]], %[[LOC_INT_COPYIN]], %[[LOC_HSE_ARR_COPYIN]] : !cir.ptr, !cir.ptr, !cir.ptr>) + // CHECK-NEXT: acc.copyout accPtr(%[[LOC_HSE_COPYIN]] : !cir.ptr) to varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr) {dataClause = #acc, name = "LocalHSE"} + // CHECK-NEXT: acc.copyout accPtr(%[[LOC_INT_COPYIN]] : !cir.ptr) to varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr) {dataClause = #acc, name = "LocalInt"} + // CHECK-NEXT: acc.copyout accPtr(%[[LOC_HSE_ARR_COPYIN]] : !cir.ptr>) bounds(%[[BOUND2]]) to varPtr(%[[LOC_HSE_ARR_ALLOCA]] : !cir.ptr>) {dataClause = #acc, name = "LocalHSEArr[1:1]"} + } + // CHECK-NEXT: } + + // Make sure that cleanup gets put in the right scope. + do_thing(); + // CHECK-NEXT: cir.call @do_thing + // CHECK-NEXT: acc.declare_exit token(%[[ENTER1]]) dataOperands(%[[ARG_HSE_COPYIN]], %[[ARG_INT_COPYIN]], %[[ARG_HSE_PTR_COPYIN]] : !cir.ptr, !cir.ptr, !cir.ptr>) + + // CHECK-NEXT: acc.copyout accPtr(%[[ARG_HSE_COPYIN]] : !cir.ptr) to varPtr(%[[ARG_HSE_ALLOCA]] : !cir.ptr) {dataClause = #acc, modifiers = #acc, name = "ArgHSE"} + // CHECK-NEXT: acc.copyout accPtr(%[[ARG_INT_COPYIN]] : !cir.ptr) to varPtr(%[[ARG_INT_ALLOCA]] : !cir.ptr) {dataClause = #acc, modifiers = #acc, name = "ArgInt"} + // CHECK-NEXT: acc.copyout accPtr(%[[ARG_HSE_PTR_COPYIN]] : !cir.ptr>) bounds(%[[BOUND1]]) to varPtr(%[[ARG_HSE_PTR_ALLOCA]] : !cir.ptr>) {dataClause = #acc, modifiers = #acc, name = "ArgHSEPtr[1:1]"} +} + diff --git a/clang/test/CIR/CodeGenOpenACC/declare-copyin.cpp b/clang/test/CIR/CodeGenOpenACC/declare-copyin.cpp new file mode 100644 index 0000000000000..1ed7a7d101adb --- /dev/null +++ b/clang/test/CIR/CodeGenOpenACC/declare-copyin.cpp @@ -0,0 +1,199 @@ +// RUN: %clang_cc1 -fopenacc -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir %s -o - | FileCheck %s + +struct HasSideEffects { + HasSideEffects(); + ~HasSideEffects(); +}; + +// TODO: OpenACC: Implement 'global', NS lowering. + +struct Struct { + static const HasSideEffects StaticMemHSE; + static const HasSideEffects StaticMemHSEArr[5]; + static const int StaticMemInt; + + // TODO: OpenACC: Implement static-local lowering. + + void MemFunc1(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEPtr) { + // CHECK: cir.func {{.*}}MemFunc1{{.*}}(%{{.*}}: !cir.ptr{{.*}}, %[[ARG_HSE:.*]]: !rec_HasSideEffects{{.*}}, %[[ARG_INT:.*]]: !s32i {{.*}}, %[[ARG_HSE_PTR:.*]]: !cir.ptr{{.*}}) + // CHECK-NEXT: cir.alloca{{.*}}["this" + // CHECK-NEXT: %[[ARG_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["ArgHSE" + // CHECK-NEXT: %[[ARG_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["ArgInt + // CHECK-NEXT: %[[ARG_HSE_PTR_ALLOCA:.*]] = cir.alloca !cir.ptr{{.*}}["ArgHSEPtr" + // CHECK-NEXT: %[[LOC_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["LocalHSE + // CHECK-NEXT: %[[LOC_HSE_ARR_ALLOCA:.*]] = cir.alloca !cir.array{{.*}}["LocalHSEArr + // CHECK-NEXT: %[[LOC_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["LocalInt + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.load + + HasSideEffects LocalHSE; + // CHECK-NEXT: cir.call{{.*}} : (!cir.ptr) -> () + HasSideEffects LocalHSEArr[5]; + int LocalInt; + +#pragma acc declare copyin(always:ArgHSE, ArgInt, LocalHSE, LocalInt, ArgHSEPtr[1:1], LocalHSEArr[1:1]) + // CHECK: %[[ARG_HSE_COPYIN:.*]] = acc.copyin varPtr(%[[ARG_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {modifiers = #acc, name = "ArgHSE"} + // CHECK-NEXT: %[[ARG_INT_COPYIN:.*]] = acc.copyin varPtr(%[[ARG_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {modifiers = #acc, name = "ArgInt"} + // CHECK-NEXT: %[[LOC_HSE_COPYIN:.*]] = acc.copyin varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {modifiers = #acc, name = "LocalHSE"} + // CHECK-NEXT: %[[LOC_INT_COPYIN:.*]] = acc.copyin varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {modifiers = #acc, name = "LocalInt"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) + // CHECK-NEXT: %[[ARG_HSE_PTR_COPYIN:.*]] = acc.copyin varPtr(%[[ARG_HSE_PTR_ALLOCA]] : !cir.ptr>) bounds(%[[BOUND1]]) -> !cir.ptr> {modifiers = #acc, name = "ArgHSEPtr[1:1]"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) + // CHECK-NEXT: %[[LOC_HSE_ARR_COPYIN:.*]] = acc.copyin varPtr(%[[LOC_HSE_ARR_ALLOCA]] : !cir.ptr>) bounds(%[[BOUND2]]) -> !cir.ptr> {modifiers = #acc, name = "LocalHSEArr[1:1]"} + // CHECK-NEXT: %[[ENTER:.*]] = acc.declare_enter dataOperands(%[[ARG_HSE_COPYIN]], %[[ARG_INT_COPYIN]], %[[LOC_HSE_COPYIN]], %[[LOC_INT_COPYIN]], %[[ARG_HSE_PTR_COPYIN]], %[[LOC_HSE_ARR_COPYIN]] : !cir.ptr, !cir.ptr, !cir.ptr, !cir.ptr, !cir.ptr>, !cir.ptr>) + // + // CHECK-NEXT: acc.declare_exit token(%[[ENTER]]) dataOperands(%[[ARG_HSE_COPYIN]], %[[ARG_INT_COPYIN]], %[[LOC_HSE_COPYIN]], %[[LOC_INT_COPYIN]], %[[ARG_HSE_PTR_COPYIN]], %[[LOC_HSE_ARR_COPYIN]] : !cir.ptr, !cir.ptr, !cir.ptr, !cir.ptr, !cir.ptr>, !cir.ptr>) + // CHECK-NEXT: acc.delete accPtr(%[[ARG_HSE_COPYIN]] : !cir.ptr) {dataClause = #acc, modifiers = #acc, name = "ArgHSE"} + // CHECK-NEXT: acc.delete accPtr(%[[ARG_INT_COPYIN]] : !cir.ptr) {dataClause = #acc, modifiers = #acc, name = "ArgInt"} + // CHECK-NEXT: acc.delete accPtr(%[[LOC_HSE_COPYIN]] : !cir.ptr) {dataClause = #acc, modifiers = #acc, name = "LocalHSE"} + // CHECK-NEXT: acc.delete accPtr(%[[LOC_INT_COPYIN]] : !cir.ptr) {dataClause = #acc, modifiers = #acc, name = "LocalInt"} + // CHECK-NEXT: acc.delete accPtr(%[[ARG_HSE_PTR_COPYIN]] : !cir.ptr>) bounds(%[[BOUND1]]) {dataClause = #acc, modifiers = #acc, name = "ArgHSEPtr[1:1]"} + // CHECK-NEXT: acc.delete accPtr(%[[LOC_HSE_ARR_COPYIN]] : !cir.ptr>) bounds(%[[BOUND2]]) {dataClause = #acc, modifiers = #acc, name = "LocalHSEArr[1:1]"} + } + void MemFunc2(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEPtr); +}; + +void use() { + Struct s; + s.MemFunc1(HasSideEffects{}, 0, nullptr); +} + +void Struct::MemFunc2(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEPtr) { + // CHECK: cir.func {{.*}}MemFunc2{{.*}}(%{{.*}}: !cir.ptr{{.*}}, %[[ARG_HSE:.*]]: !rec_HasSideEffects{{.*}}, %[[ARG_INT:.*]]: !s32i {{.*}}, %[[ARG_HSE_PTR:.*]]: !cir.ptr{{.*}}) + // CHECK-NEXT: cir.alloca{{.*}}["this" + // CHECK-NEXT: %[[ARG_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["ArgHSE" + // CHECK-NEXT: %[[ARG_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["ArgInt + // CHECK-NEXT: %[[ARG_HSE_PTR_ALLOCA:.*]] = cir.alloca !cir.ptr{{.*}}["ArgHSEPtr" + // CHECK-NEXT: %[[LOC_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["LocalHSE + // CHECK-NEXT: %[[LOC_HSE_ARR_ALLOCA:.*]] = cir.alloca !cir.array{{.*}}["LocalHSEArr + // CHECK-NEXT: %[[LOC_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["LocalInt + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.load + HasSideEffects LocalHSE; + // CHECK-NEXT: cir.call{{.*}} : (!cir.ptr) -> () + HasSideEffects LocalHSEArr[5]; + // CHECK: do { + // CHECK: } while { + // CHECK: } + int LocalInt; +#pragma acc declare copyin(alwaysin:ArgHSE, ArgInt, ArgHSEPtr[1:1]) + // CHECK: %[[ARG_HSE_COPYIN:.*]] = acc.copyin varPtr(%[[ARG_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {modifiers = #acc, name = "ArgHSE"} + // CHECK-NEXT: %[[ARG_INT_COPYIN:.*]] = acc.copyin varPtr(%[[ARG_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {modifiers = #acc, name = "ArgInt"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) + // CHECK-NEXT: %[[ARG_HSE_PTR_COPYIN:.*]] = acc.copyin varPtr(%[[ARG_HSE_PTR_ALLOCA]] : !cir.ptr>) bounds(%[[BOUND1]]) -> !cir.ptr> {modifiers = #acc, name = "ArgHSEPtr[1:1]"} + // CHECK-NEXT: %[[ENTER1:.*]] = acc.declare_enter dataOperands(%[[ARG_HSE_COPYIN]], %[[ARG_INT_COPYIN]], %[[ARG_HSE_PTR_COPYIN]] : !cir.ptr, !cir.ptr, !cir.ptr>) + +#pragma acc declare copyin(alwaysin:LocalHSE, LocalInt, LocalHSEArr[1:1]) + // CHECK-NEXT: %[[LOC_HSE_COPYIN:.*]] = acc.copyin varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {modifiers = #acc, name = "LocalHSE"} + // CHECK-NEXT: %[[LOC_INT_COPYIN:.*]] = acc.copyin varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {modifiers = #acc, name = "LocalInt"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) + // CHECK-NEXT: %[[LOC_HSE_ARR_COPYIN:.*]] = acc.copyin varPtr(%[[LOC_HSE_ARR_ALLOCA]] : !cir.ptr>) bounds(%[[BOUND2]]) -> !cir.ptr> {modifiers = #acc, name = "LocalHSEArr[1:1]"} + // CHECK-NEXT: %[[ENTER2:.*]] = acc.declare_enter dataOperands(%[[LOC_HSE_COPYIN]], %[[LOC_INT_COPYIN]], %[[LOC_HSE_ARR_COPYIN]] : !cir.ptr, !cir.ptr, !cir.ptr>) + + // CHECK-NEXT: acc.declare_exit token(%[[ENTER2]]) dataOperands(%[[LOC_HSE_COPYIN]], %[[LOC_INT_COPYIN]], %[[LOC_HSE_ARR_COPYIN]] : !cir.ptr, !cir.ptr, !cir.ptr>) + // CHECK-NEXT: acc.delete accPtr(%[[LOC_HSE_COPYIN]] : !cir.ptr) {dataClause = #acc, modifiers = #acc, name = "LocalHSE"} + // CHECK-NEXT: acc.delete accPtr(%[[LOC_INT_COPYIN]] : !cir.ptr) {dataClause = #acc, modifiers = #acc, name = "LocalInt"} + // CHECK-NEXT: acc.delete accPtr(%[[LOC_HSE_ARR_COPYIN]] : !cir.ptr>) bounds(%[[BOUND2]]) {dataClause = #acc, modifiers = #acc, name = "LocalHSEArr[1:1]"} + // + // CHECK-NEXT: acc.declare_exit token(%[[ENTER1]]) dataOperands(%[[ARG_HSE_COPYIN]], %[[ARG_INT_COPYIN]], %[[ARG_HSE_PTR_COPYIN]] : !cir.ptr, !cir.ptr, !cir.ptr>) + // CHECK-NEXT: acc.delete accPtr(%[[ARG_HSE_COPYIN]] : !cir.ptr) {dataClause = #acc, modifiers = #acc, name = "ArgHSE"} + // CHECK-NEXT: acc.delete accPtr(%[[ARG_INT_COPYIN]] : !cir.ptr) {dataClause = #acc, modifiers = #acc, name = "ArgInt"} + // CHECK-NEXT: acc.delete accPtr(%[[ARG_HSE_PTR_COPYIN]] : !cir.ptr>) bounds(%[[BOUND1]]) {dataClause = #acc, modifiers = #acc, name = "ArgHSEPtr[1:1]"} +} + +extern "C" void do_thing(); + +extern "C" void NormalFunc(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEPtr) { + // CHECK: cir.func {{.*}}NormalFunc(%[[ARG_HSE:.*]]: !rec_HasSideEffects{{.*}}, %[[ARG_INT:.*]]: !s32i {{.*}}, %[[ARG_HSE_PTR:.*]]: !cir.ptr{{.*}}) + // CHECK-NEXT: %[[ARG_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["ArgHSE" + // CHECK-NEXT: %[[ARG_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["ArgInt + // CHECK-NEXT: %[[ARG_HSE_PTR_ALLOCA:.*]] = cir.alloca !cir.ptr{{.*}}["ArgHSEPtr" + // CHECK-NEXT: %[[LOC_HSE_ALLOCA:.*]] = cir.alloca !rec_HasSideEffects{{.*}}["LocalHSE + // CHECK-NEXT: %[[LOC_HSE_ARR_ALLOCA:.*]] = cir.alloca !cir.array{{.*}}["LocalHSEArr + // CHECK-NEXT: %[[LOC_INT_ALLOCA:.*]] = cir.alloca !s32i{{.*}}["LocalInt + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + // CHECK-NEXT: cir.store + HasSideEffects LocalHSE; + // CHECK-NEXT: cir.call{{.*}} : (!cir.ptr) -> () + HasSideEffects LocalHSEArr[5]; + // CHECK: do { + // CHECK: } while { + // CHECK: } + int LocalInt; +#pragma acc declare copyin(always:ArgHSE, ArgInt, ArgHSEPtr[1:1]) + // CHECK: %[[ARG_HSE_COPYIN:.*]] = acc.copyin varPtr(%[[ARG_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {modifiers = #acc, name = "ArgHSE"} + // CHECK-NEXT: %[[ARG_INT_COPYIN:.*]] = acc.copyin varPtr(%[[ARG_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {modifiers = #acc, name = "ArgInt"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) + // CHECK-NEXT: %[[ARG_HSE_PTR_COPYIN:.*]] = acc.copyin varPtr(%[[ARG_HSE_PTR_ALLOCA]] : !cir.ptr>) bounds(%[[BOUND1]]) -> !cir.ptr> {modifiers = #acc, name = "ArgHSEPtr[1:1]"} + // CHECK-NEXT: %[[ENTER1:.*]] = acc.declare_enter dataOperands(%[[ARG_HSE_COPYIN]], %[[ARG_INT_COPYIN]], %[[ARG_HSE_PTR_COPYIN]] : !cir.ptr, !cir.ptr, !cir.ptr>) + { + // CHECK-NEXT: cir.scope { +#pragma acc declare copyin(LocalHSE, LocalInt, LocalHSEArr[1:1]) + // CHECK-NEXT: %[[LOC_HSE_COPYIN:.*]] = acc.copyin varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "LocalHSE"} + // CHECK-NEXT: %[[LOC_INT_COPYIN:.*]] = acc.copyin varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "LocalInt"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 + // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 + // CHECK-NEXT: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) + // CHECK-NEXT: %[[LOC_HSE_ARR_COPYIN:.*]] = acc.copyin varPtr(%[[LOC_HSE_ARR_ALLOCA]] : !cir.ptr>) bounds(%[[BOUND2]]) -> !cir.ptr> {name = "LocalHSEArr[1:1]"} + // CHECK-NEXT: %[[ENTER2:.*]] = acc.declare_enter dataOperands(%[[LOC_HSE_COPYIN]], %[[LOC_INT_COPYIN]], %[[LOC_HSE_ARR_COPYIN]] : !cir.ptr, !cir.ptr, !cir.ptr>) + + do_thing(); + // CHECK-NEXT: cir.call @do_thing + // CHECK-NEXT: acc.declare_exit token(%[[ENTER2]]) dataOperands(%[[LOC_HSE_COPYIN]], %[[LOC_INT_COPYIN]], %[[LOC_HSE_ARR_COPYIN]] : !cir.ptr, !cir.ptr, !cir.ptr>) + // CHECK-NEXT: acc.delete accPtr(%[[LOC_HSE_COPYIN]] : !cir.ptr) {dataClause = #acc, name = "LocalHSE"} + // CHECK-NEXT: acc.delete accPtr(%[[LOC_INT_COPYIN]] : !cir.ptr) {dataClause = #acc, name = "LocalInt"} + // CHECK-NEXT: acc.delete accPtr(%[[LOC_HSE_ARR_COPYIN]] : !cir.ptr>) bounds(%[[BOUND2]]) {dataClause = #acc, name = "LocalHSEArr[1:1]"} + } + // CHECK-NEXT: } + + // Make sure that cleanup gets put in the right scope. + do_thing(); + // CHECK-NEXT: cir.call @do_thing + // CHECK-NEXT: acc.declare_exit token(%[[ENTER1]]) dataOperands(%[[ARG_HSE_COPYIN]], %[[ARG_INT_COPYIN]], %[[ARG_HSE_PTR_COPYIN]] : !cir.ptr, !cir.ptr, !cir.ptr>) + + // CHECK-NEXT: acc.delete accPtr(%[[ARG_HSE_COPYIN]] : !cir.ptr) {dataClause = #acc, modifiers = #acc, name = "ArgHSE"} + // CHECK-NEXT: acc.delete accPtr(%[[ARG_INT_COPYIN]] : !cir.ptr) {dataClause = #acc, modifiers = #acc, name = "ArgInt"} + // CHECK-NEXT: acc.delete accPtr(%[[ARG_HSE_PTR_COPYIN]] : !cir.ptr>) bounds(%[[BOUND1]]) {dataClause = #acc, modifiers = #acc, name = "ArgHSEPtr[1:1]"} +} + diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c index 13ad0545ab53f..6a884e98e9f3b 100644 --- a/clang/test/CodeGen/X86/avx2-builtins.c +++ b/clang/test/CodeGen/X86/avx2-builtins.c @@ -321,10 +321,10 @@ __m256i test_mm256_cmpeq_epi8(__m256i a, __m256i b) { // CHECK: icmp eq <32 x i8> return _mm256_cmpeq_epi8(a, b); } -TEST_CONSTEXPR(match_v16qi(_mm_cmpeq_epi8( - (__m128i)(__v16qs){1,-2,3,-4,-5,6,-7,8,-9,10,-11,12,-13,14,-15,16}, - (__m128i)(__v16qs){10,-2,6,-4,-5,12,-14,8,-9,20,-22,12,-26,14,-30,16}), - 0,-1,0,-1,-1,0,0,-1,-1,0,0,-1,0,-1,0,-1)); +TEST_CONSTEXPR(match_v32qi(_mm256_cmpeq_epi8( + (__m256i)(__v32qs){1,-2,3,-4,-5,6,-7,8,-9,10,-11,12,-13,14,-15,16,-16,15,-14,13,-12,11,-10,9,-8,7,-6,5,4,-3,2,-1}, + (__m256i)(__v32qs){10,-2,6,-4,-5,12,-14,8,-9,20,-22,12,-26,14,-30,16,10,-2,6,-4,-5,12,-14,8,-9,20,-22,12,-26,14,-30,16}), + 0, -1, 0, -1, -1, 0, 0, -1, -1, 0, 0, -1, 0, -1, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); __m256i test_mm256_cmpeq_epi16(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_cmpeq_epi16 diff --git a/clang/test/CodeGen/promoted-complex-div.c b/clang/test/CodeGen/promoted-complex-div.c index 7ed7b07db83ae..006b5e334e6ea 100644 --- a/clang/test/CodeGen/promoted-complex-div.c +++ b/clang/test/CodeGen/promoted-complex-div.c @@ -81,3 +81,55 @@ _Complex double divf(_Complex double a, _Complex double b) { return a / b; // nopromotion-warning{{excess precision is requested but the target does not support excess precision which may result in observable differences in complex division behavior}} } + +// This test ensures that Clang does not crash when complex element types +// require desugaring under -complex-range=promoted. Previously, a sugared +// typedef element type (e.g., 'typedef double a') caused a crash during +// complex range evaluation in both Sema and CodeGen. +typedef double a; +_Complex double *b; +// CHECK-LABEL: define dso_local void @DivideByComplexZero +void DivideByComplexZero() { + // CHECK: fpext double {{.*}} to x86_fp80 + // CHECK: fpext double {{.*}} to x86_fp80 + // CHECK: fmul x86_fp80 + // CHECK: fmul x86_fp80 + // CHECK: fadd x86_fp80 + // CHECK: fmul x86_fp80 + // CHECK: fmul x86_fp80 + // CHECK: fsub x86_fp80 + // CHECK: fdiv x86_fp80 + // CHECK: fdiv x86_fp80 + // CHECK: fptrunc x86_fp80 + // CHECK: fptrunc x86_fp80 + + // NOX87: call double @llvm.fabs.f64(double {{.*}}) + // NOX87-NEXT: call double @llvm.fabs.f64(double {{.*}} + // NOX87-NEXT: fcmp ugt double {{.*}}, {{.*}} + // NOX87-NEXT: br i1 {{.*}}, label + // NOX87: abs_rhsr_greater_or_equal_abs_rhsi: + // NOX87-NEXT: fmul double + // NOX87-NEXT: fadd double + // NOX87-NEXT: fdiv double + // NOX87-NEXT: fmul double + // NOX87-NEXT: fsub double + // NOX87-NEXT: fdiv double + // NOX87-NEXT: br label {{.*}} + // NOX87: abs_rhsr_less_than_abs_rhsi: + // NOX87-NEXT: fmul double + // NOX87-NEXT: fadd double + // NOX87-NEXT: fdiv double + // NOX87-NEXT: fmul double + // NOX87-NEXT: fsub double + // NOX87-NEXT: fdiv double + // NOX87-NEXT: br label {{.*}} + // NOX87: complex_div: + // NOX87-NEXT: phi double + // NOX87-NEXT: phi double + // NOX87-NEXT: getelementptr inbounds nuw { double, double }, ptr {{.*}}, i32 0, i32 0 + // NOX87-NEXT: getelementptr inbounds nuw { double, double }, ptr {{.*}}, i32 0, i32 1 + // NOX87-NEXT: store double + // NOX87-NEXT: store double + + *b /= 1.0iF * (a)0; +} diff --git a/clang/test/CodeGenHLSL/semantics/SV_Position.ps.hlsl b/clang/test/CodeGenHLSL/semantics/SV_Position.ps.hlsl index be30e79438831..b7d2283ea7766 100644 --- a/clang/test/CodeGenHLSL/semantics/SV_Position.ps.hlsl +++ b/clang/test/CodeGenHLSL/semantics/SV_Position.ps.hlsl @@ -1,11 +1,21 @@ -// RUN: %clang_cc1 -triple spirv-unknown-vulkan1.3-pixel -x hlsl -emit-llvm -finclude-default-header -disable-llvm-passes -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple spirv-pc-vulkan1.3-pixel -x hlsl -emit-llvm -finclude-default-header -disable-llvm-passes -o - %s | FileCheck %s --check-prefix=CHECK-SPIRV +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-pixel -x hlsl -emit-llvm -finclude-default-header -disable-llvm-passes -o - %s | FileCheck %s --check-prefix=CHECK-DXIL -// CHECK: @SV_Position = external hidden thread_local addrspace(7) externally_initialized constant <4 x float>, !spirv.Decorations !0 +// CHECK-SPIRV: @SV_Position = external hidden thread_local addrspace(7) externally_initialized constant <4 x float>, !spirv.Decorations ![[#MD_0:]] // CHECK: define void @main() {{.*}} { float4 main(float4 p : SV_Position) : A { - // CHECK: %[[#P:]] = load <4 x float>, ptr addrspace(7) @SV_Position, align 16 - // CHECK: %[[#R:]] = call spir_func <4 x float> @_Z4mainDv4_f(<4 x float> %[[#P]]) - // CHECK: store <4 x float> %[[#R]], ptr addrspace(8) @A0, align 16 + // CHECK-SPIRV: %[[#P:]] = load <4 x float>, ptr addrspace(7) @SV_Position, align 16 + // CHECK-SPIRV: %[[#R:]] = call spir_func <4 x float> @_Z4mainDv4_f(<4 x float> %[[#P]]) + // CHECK-SPIRV: store <4 x float> %[[#R]], ptr addrspace(8) @A0, align 16 + + // CHECK-DXIL: %SV_Position0 = call <4 x float> @llvm.dx.load.input.v4f32(i32 4, i32 0, i32 0, i8 0, i32 poison) + // CHECK-DXIL: %[[#TMP:]] = call <4 x float> @_Z4mainDv4_f(<4 x float> %SV_Position0) + // CHECK-DXIL: call void @llvm.dx.store.output.v4f32(i32 4, i32 0, i32 0, i8 0, i32 poison, <4 x float> %[[#TMP]]) return p; } + +// CHECK-SPIRV-DAG: ![[#MD_0]] = !{![[#MD_1:]]} +// CHECK-SPIRV-DAG: ![[#MD_1]] = !{i32 11, i32 15} +// | `-> BuiltIn Position +// `-> SPIR-V decoration 'FragCoord' diff --git a/clang/test/CodeGenHLSL/semantics/SV_Position.vs.hlsl b/clang/test/CodeGenHLSL/semantics/SV_Position.vs.hlsl new file mode 100644 index 0000000000000..0156c0bb816c1 --- /dev/null +++ b/clang/test/CodeGenHLSL/semantics/SV_Position.vs.hlsl @@ -0,0 +1,26 @@ +// RUN: %clang_cc1 -triple dxil-unknown-shadermodel6.8-vertex -x hlsl -emit-llvm -finclude-default-header -disable-llvm-passes -o - %s | FileCheck --check-prefix=CHECK-DXIL %s +// RUN: %clang_cc1 -triple spirv-unknown-vulkan1.3-vertex -x hlsl -emit-llvm -finclude-default-header -disable-llvm-passes -o - %s | FileCheck --check-prefix=CHECK-SPIRV %s + +// CHECK-SPIRV: @SV_Position0 = external hidden thread_local addrspace(7) externally_initialized constant <4 x float>, !spirv.Decorations ![[#MD_0:]] +// CHECK-SPIRV: @SV_Position = external hidden thread_local addrspace(8) global <4 x float>, !spirv.Decorations ![[#MD_2:]] + +// CHECK: define void @main() {{.*}} { +float4 main(float4 p : SV_Position) : SV_Position { + // CHECK-SPIRV: %[[#P:]] = load <4 x float>, ptr addrspace(7) @SV_Position0, align 16 + // CHECK-SPIRV: %[[#R:]] = call spir_func <4 x float> @_Z4mainDv4_f(<4 x float> %[[#P]]) + // CHECK-SPIRV: store <4 x float> %[[#R]], ptr addrspace(8) @SV_Position, align 16 + + // CHECK-DXIL: %SV_Position0 = call <4 x float> @llvm.dx.load.input.v4f32(i32 4, i32 0, i32 0, i8 0, i32 poison) + // CHECK-DXIL: %[[#TMP:]] = call <4 x float> @_Z4mainDv4_f(<4 x float> %SV_Position0) + // CHECK-DXIL: call void @llvm.dx.store.output.v4f32(i32 4, i32 0, i32 0, i8 0, i32 poison, <4 x float> %[[#TMP]]) + return p; +} + +// CHECK-SPIRV-DAG: ![[#MD_0]] = !{![[#MD_1:]]} +// CHECK-SPIRV-DAG: ![[#MD_2]] = !{![[#MD_3:]]} +// CHECK-SPIRV-DAG: ![[#MD_1]] = !{i32 30, i32 0} +// | `-> Location 0 +// `-> SPIR-V decoration 'Location' +// CHECK-SPIRV-DAG: ![[#MD_3]] = !{i32 11, i32 0} +// | `-> BuiltIn Position +// `-> SPIR-V decoration 'BuiltIn' diff --git a/clang/test/OpenMP/spirv_target_codegen_noexceptions.cpp b/clang/test/OpenMP/spirv_target_codegen_noexceptions.cpp new file mode 100644 index 0000000000000..42f8f3ea70f7d --- /dev/null +++ b/clang/test/OpenMP/spirv_target_codegen_noexceptions.cpp @@ -0,0 +1,9 @@ +// RUN: %clang_cc1 -fexceptions -fcxx-exceptions -Wno-openmp-target-exception -fopenmp -x c++ -triple x86_64-unknown-linux -fopenmp-targets=spirv64-intel -emit-llvm-bc %s -o %t-host.bc +// RUN: %clang_cc1 -fexceptions -fcxx-exceptions -Wno-openmp-target-exception -fopenmp -x c++ -triple spirv64-intel -fopenmp-targets=spirv64-intel -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-host.bc -o - | \ +// RUN: FileCheck -implicit-check-not='{{invoke|throw|cxa}}' %s +void foo() { + // CHECK: call addrspace(9) void @llvm.trap() + // CHECK-NEXT: call spir_func addrspace(9) void @__kmpc_target_deinit() + #pragma omp target + throw "bad"; +} diff --git a/clang/test/SemaHLSL/Semantics/position.ps.hlsl b/clang/test/SemaHLSL/Semantics/position.ps.hlsl index 2d02384821d90..47d07887911d6 100644 --- a/clang/test/SemaHLSL/Semantics/position.ps.hlsl +++ b/clang/test/SemaHLSL/Semantics/position.ps.hlsl @@ -1,13 +1,7 @@ -// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-pixel -x hlsl -finclude-default-header -o - %s -ast-dump | FileCheck %s +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-pixel -finclude-default-header -x hlsl -verify -o - %s +// RUN: %clang_cc1 -triple spirv-pc-vulkan1.3-pixel -finclude-default-header -x hlsl -verify -o - %s -// FIXME(Keenuts): change output semantic to something valid for pixels shaders -float4 main(float4 a : SV_Position2) : A { -// CHECK: FunctionDecl 0x{{[0-9a-fA-F]+}} <{{.*}}> line:[[@LINE-1]]:8 main 'float4 (float4)' -// CHECK-NEXT: ParmVarDecl 0x{{[0-9a-fA-F]+}} <{{.*}}> col:20 used a 'float4':'vector' -// CHECK-NEXT: HLSLParsedSemanticAttr 0x{{[0-9a-f]+}} "SV_Position" 2 -// CHECK-NEXT: HLSLAppliedSemanticAttr 0x{{[0-9a-f]+}} "SV_Position" 2 - -// CHECK: HLSLParsedSemanticAttr 0x{{[0-9a-f]+}} "A" 0 -// CHECK: HLSLAppliedSemanticAttr 0x{{[0-9a-f]+}} "A" 0 +float4 main(float4 a : A) : SV_Position { +// expected-error@-1 {{attribute 'SV_Position' is unsupported in 'pixel' shaders, requires one of the following: pixel, vertex}} return a; } diff --git a/clang/test/SemaHLSL/Semantics/position.vs.hlsl b/clang/test/SemaHLSL/Semantics/position.vs.hlsl deleted file mode 100644 index 9d0ff285ce055..0000000000000 --- a/clang/test/SemaHLSL/Semantics/position.vs.hlsl +++ /dev/null @@ -1,6 +0,0 @@ -// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-vertex -x hlsl -finclude-default-header -o - %s -verify - -// expected-error@+1 {{attribute 'SV_Position' is unsupported in 'vertex' shaders, requires pixel}} -float4 main(float4 a : SV_Position) : A { - return a; -} diff --git a/libcxx/include/forward_list b/libcxx/include/forward_list index 272e52d68f46a..56c45d0d46575 100644 --- a/libcxx/include/forward_list +++ b/libcxx/include/forward_list @@ -732,50 +732,52 @@ public: _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI void assign(size_type __n, const value_type& __v); - _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI allocator_type get_allocator() const _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI allocator_type get_allocator() const _NOEXCEPT { return allocator_type(this->__alloc_); } - _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI iterator begin() _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI iterator begin() _NOEXCEPT { return iterator(__base::__before_begin()->__next_); } - _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_iterator begin() const _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_iterator begin() const _NOEXCEPT { return const_iterator(__base::__before_begin()->__next_); } - _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI iterator end() _NOEXCEPT { return iterator(nullptr); } - _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_iterator end() const _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI iterator end() _NOEXCEPT { + return iterator(nullptr); + } + [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_iterator end() const _NOEXCEPT { return const_iterator(nullptr); } - _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_iterator cbegin() const _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_iterator cbegin() const _NOEXCEPT { return const_iterator(__base::__before_begin()->__next_); } - _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_iterator cend() const _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_iterator cend() const _NOEXCEPT { return const_iterator(nullptr); } - _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI iterator before_begin() _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI iterator before_begin() _NOEXCEPT { return iterator(__base::__before_begin()); } - _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_iterator before_begin() const _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_iterator before_begin() const _NOEXCEPT { return const_iterator(__base::__before_begin()); } - _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_iterator cbefore_begin() const _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_iterator cbefore_begin() const _NOEXCEPT { return const_iterator(__base::__before_begin()); } [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return __base::__before_begin()->__next_ == nullptr; } - _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT { return std::min(__node_traits::max_size(this->__alloc_), numeric_limits::max()); } - _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI reference front() { + [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI reference front() { _LIBCPP_ASSERT_NON_NULL(!empty(), "forward_list::front called on an empty list"); return __base::__before_begin()->__next_->__get_value(); } - _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_reference front() const { + [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_reference front() const { _LIBCPP_ASSERT_NON_NULL(!empty(), "forward_list::front called on an empty list"); return __base::__before_begin()->__next_->__get_value(); } diff --git a/libcxx/test/libcxx/diagnostics/forward_list.nodiscard.verify.cpp b/libcxx/test/libcxx/diagnostics/forward_list.nodiscard.verify.cpp index 7594a1d299a50..671c7f71ab2a2 100644 --- a/libcxx/test/libcxx/diagnostics/forward_list.nodiscard.verify.cpp +++ b/libcxx/test/libcxx/diagnostics/forward_list.nodiscard.verify.cpp @@ -13,6 +13,27 @@ #include void test() { - std::forward_list forward_list; - forward_list.empty(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::forward_list fl; + const std::forward_list cfl; + + fl.get_allocator(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + + fl.begin(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + cfl.begin(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + fl.end(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + cfl.end(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + fl.cbegin(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + cfl.cbegin(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + fl.cend(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + cfl.cend(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + fl.before_begin(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + cfl.before_begin(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + fl.cbefore_begin(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + cfl.cbefore_begin(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + + fl.empty(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + fl.max_size(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + + fl.front(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + cfl.front(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} } diff --git a/lldb/docs/dil-expr-lang.ebnf b/lldb/docs/dil-expr-lang.ebnf index 70eda3bf40650..ccd2b00223910 100644 --- a/lldb/docs/dil-expr-lang.ebnf +++ b/lldb/docs/dil-expr-lang.ebnf @@ -8,7 +8,7 @@ expression = unary_expression ; unary_expression = postfix_expression | unary_operator expression ; -unary_operator = "*" | "&" ; +unary_operator = "*" | "&" | "+" | "-"; postfix_expression = primary_expression | postfix_expression "[" integer_literal "]" diff --git a/lldb/include/lldb/Symbol/TypeSystem.h b/lldb/include/lldb/Symbol/TypeSystem.h index 25b208a65349b..99ea0585e5370 100644 --- a/lldb/include/lldb/Symbol/TypeSystem.h +++ b/lldb/include/lldb/Symbol/TypeSystem.h @@ -411,6 +411,18 @@ class TypeSystem : public PluginInterface, GetIntegralTemplateArgument(lldb::opaque_compiler_type_t type, size_t idx, bool expand_pack); + // DIL + + /// Checks if the type is eligible for integral promotion. + virtual bool IsPromotableIntegerType(lldb::opaque_compiler_type_t type); + + /// Perform integral promotion on a given type. + /// This promotes eligible types (boolean, integers, unscoped enumerations) + /// to a larger integer type according to type system rules. + /// \returns Promoted type. + virtual llvm::Expected + DoIntegralPromotion(CompilerType from, ExecutionContextScope *exe_scope); + // Dumping types #ifndef NDEBUG diff --git a/lldb/include/lldb/ValueObject/DILAST.h b/lldb/include/lldb/ValueObject/DILAST.h index 0f05d753f1b56..91f8d93c09622 100644 --- a/lldb/include/lldb/ValueObject/DILAST.h +++ b/lldb/include/lldb/ValueObject/DILAST.h @@ -33,6 +33,8 @@ enum class NodeKind { enum class UnaryOpKind { AddrOf, // "&" Deref, // "*" + Minus, // "-" + Plus, // "+" }; /// Forward declaration, for use in DIL AST nodes. Definition is at the very diff --git a/lldb/include/lldb/ValueObject/DILEval.h b/lldb/include/lldb/ValueObject/DILEval.h index eab3218ff828f..a65edc58cc4e7 100644 --- a/lldb/include/lldb/ValueObject/DILEval.h +++ b/lldb/include/lldb/ValueObject/DILEval.h @@ -61,6 +61,10 @@ class Interpreter : Visitor { llvm::Expected Visit(const BooleanLiteralNode *node) override; + /// Perform usual unary conversions on a value. At the moment this + /// includes array-to-pointer and integral promotion for eligible types. + llvm::Expected + UnaryConversion(lldb::ValueObjectSP valobj, uint32_t location); llvm::Expected PickIntegerType(lldb::TypeSystemSP type_system, std::shared_ptr ctx, diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp index 51cb883748514..aa8d309fbc730 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp @@ -7346,6 +7346,102 @@ CompilerType TypeSystemClang::GetTypeForFormatters(void *type) { return CompilerType(); } +bool TypeSystemClang::IsPromotableIntegerType( + lldb::opaque_compiler_type_t type) { + // Unscoped enums are always considered as promotable, even if their + // underlying type does not need to be promoted (e.g. "int"). + bool is_signed = false; + bool isUnscopedEnumerationType = + IsEnumerationType(type, is_signed) && !IsScopedEnumerationType(type); + if (isUnscopedEnumerationType) + return true; + + switch (GetBasicTypeEnumeration(type)) { + case lldb::eBasicTypeBool: + case lldb::eBasicTypeChar: + case lldb::eBasicTypeSignedChar: + case lldb::eBasicTypeUnsignedChar: + case lldb::eBasicTypeShort: + case lldb::eBasicTypeUnsignedShort: + case lldb::eBasicTypeWChar: + case lldb::eBasicTypeSignedWChar: + case lldb::eBasicTypeUnsignedWChar: + case lldb::eBasicTypeChar16: + case lldb::eBasicTypeChar32: + return true; + + default: + return false; + } + + llvm_unreachable("All cases handled above."); +} + +llvm::Expected +TypeSystemClang::DoIntegralPromotion(CompilerType from, + ExecutionContextScope *exe_scope) { + if (!from.IsInteger() && !from.IsUnscopedEnumerationType()) + return from; + + if (!from.IsPromotableIntegerType()) + return from; + + if (from.IsUnscopedEnumerationType()) { + EnumDecl *enum_decl = GetAsEnumDecl(from); + CompilerType promotion_type = GetType(enum_decl->getPromotionType()); + return DoIntegralPromotion(promotion_type, exe_scope); + } + + lldb::BasicType builtin_type = + from.GetCanonicalType().GetBasicTypeEnumeration(); + uint64_t from_size = 0; + if (builtin_type == lldb::eBasicTypeWChar || + builtin_type == lldb::eBasicTypeSignedWChar || + builtin_type == lldb::eBasicTypeUnsignedWChar || + builtin_type == lldb::eBasicTypeChar16 || + builtin_type == lldb::eBasicTypeChar32) { + // Find the type that can hold the entire range of values for our type. + bool is_signed = from.IsSigned(); + llvm::Expected from_size = from.GetByteSize(exe_scope); + if (!from_size) + return from_size.takeError(); + CompilerType promote_types[] = { + GetBasicTypeFromAST(lldb::eBasicTypeInt), + GetBasicTypeFromAST(lldb::eBasicTypeUnsignedInt), + GetBasicTypeFromAST(lldb::eBasicTypeLong), + GetBasicTypeFromAST(lldb::eBasicTypeUnsignedLong), + GetBasicTypeFromAST(lldb::eBasicTypeLongLong), + GetBasicTypeFromAST(lldb::eBasicTypeUnsignedLongLong), + }; + for (CompilerType &type : promote_types) { + llvm::Expected byte_size = type.GetByteSize(exe_scope); + if (!byte_size) + return byte_size.takeError(); + if (*from_size < *byte_size || + (*from_size == *byte_size && is_signed == type.IsSigned())) { + return type; + } + } + llvm_unreachable("char type should fit into long long"); + } + + // Here we can promote only to "int" or "unsigned int". + CompilerType int_type = GetBasicTypeFromAST(lldb::eBasicTypeInt); + llvm::Expected int_byte_size = int_type.GetByteSize(exe_scope); + if (!int_byte_size) + return int_byte_size.takeError(); + + // Signed integer types can be safely promoted to "int". + if (from.IsSigned()) { + return int_type; + } + // Unsigned integer types are promoted to "unsigned int" if "int" cannot hold + // their entire value range. + return (from_size == *int_byte_size) + ? GetBasicTypeFromAST(lldb::eBasicTypeUnsignedInt) + : int_type; +} + clang::EnumDecl *TypeSystemClang::GetAsEnumDecl(const CompilerType &type) { const clang::EnumType *enutype = llvm::dyn_cast(ClangUtil::GetCanonicalQualType(type)); diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h index 375891b3cfd2f..67d206e4d2df2 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h @@ -938,6 +938,14 @@ class TypeSystemClang : public TypeSystem { CompilerType GetTypeForFormatters(void *type) override; + // DIL + + bool IsPromotableIntegerType(lldb::opaque_compiler_type_t type) override; + + llvm::Expected + DoIntegralPromotion(CompilerType from, + ExecutionContextScope *exe_scope) override; + #define LLDB_INVALID_DECL_LEVEL UINT32_MAX // LLDB_INVALID_DECL_LEVEL is returned by CountDeclLevels if child_decl_ctx // could not be found in decl_ctx. diff --git a/lldb/source/Symbol/CompilerType.cpp b/lldb/source/Symbol/CompilerType.cpp index c999ab256fc98..1a39ea9476390 100644 --- a/lldb/source/Symbol/CompilerType.cpp +++ b/lldb/source/Symbol/CompilerType.cpp @@ -370,30 +370,10 @@ bool CompilerType::IsScalarOrUnscopedEnumerationType() const { } bool CompilerType::IsPromotableIntegerType() const { - // Unscoped enums are always considered as promotable, even if their - // underlying type does not need to be promoted (e.g. "int"). - if (IsUnscopedEnumerationType()) - return true; - - switch (GetBasicTypeEnumeration()) { - case lldb::eBasicTypeBool: - case lldb::eBasicTypeChar: - case lldb::eBasicTypeSignedChar: - case lldb::eBasicTypeUnsignedChar: - case lldb::eBasicTypeShort: - case lldb::eBasicTypeUnsignedShort: - case lldb::eBasicTypeWChar: - case lldb::eBasicTypeSignedWChar: - case lldb::eBasicTypeUnsignedWChar: - case lldb::eBasicTypeChar16: - case lldb::eBasicTypeChar32: - return true; - - default: - return false; - } - - llvm_unreachable("All cases handled above."); + if (IsValid()) + if (auto type_system_sp = GetTypeSystem()) + return type_system_sp->IsPromotableIntegerType(m_type); + return false; } bool CompilerType::IsPointerToVoid() const { diff --git a/lldb/source/Symbol/TypeSystem.cpp b/lldb/source/Symbol/TypeSystem.cpp index f7d634ffa2dec..8712142893835 100644 --- a/lldb/source/Symbol/TypeSystem.cpp +++ b/lldb/source/Symbol/TypeSystem.cpp @@ -123,6 +123,17 @@ CompilerType TypeSystem::GetTypeForFormatters(void *type) { return CompilerType(weak_from_this(), type); } +bool TypeSystem::IsPromotableIntegerType(lldb::opaque_compiler_type_t type) { + return false; +} + +llvm::Expected +TypeSystem::DoIntegralPromotion(CompilerType from, + ExecutionContextScope *exe_scope) { + return llvm::createStringError( + "Integral promotion is not implemented for this TypeSystem"); +} + bool TypeSystem::IsTemplateType(lldb::opaque_compiler_type_t type) { return false; } diff --git a/lldb/source/ValueObject/DILEval.cpp b/lldb/source/ValueObject/DILEval.cpp index a9dbfad298d05..40a05a467f883 100644 --- a/lldb/source/ValueObject/DILEval.cpp +++ b/lldb/source/ValueObject/DILEval.cpp @@ -21,6 +21,101 @@ namespace lldb_private::dil { +static llvm::Expected +GetTypeSystemFromCU(std::shared_ptr ctx) { + auto stack_frame = ctx->CalculateStackFrame(); + if (!stack_frame) + return llvm::createStringError("no stack frame in this context"); + SymbolContext symbol_context = + stack_frame->GetSymbolContext(lldb::eSymbolContextCompUnit); + lldb::LanguageType language = symbol_context.comp_unit->GetLanguage(); + + symbol_context = stack_frame->GetSymbolContext(lldb::eSymbolContextModule); + return symbol_context.module_sp->GetTypeSystemForLanguage(language); +} + +static CompilerType GetBasicType(lldb::TypeSystemSP type_system, + lldb::BasicType basic_type) { + if (type_system) + return type_system.get()->GetBasicTypeFromAST(basic_type); + + return CompilerType(); +} + +static lldb::ValueObjectSP +ArrayToPointerConversion(ValueObject &valobj, ExecutionContextScope &ctx) { + uint64_t addr = valobj.GetLoadAddress(); + ExecutionContext exe_ctx; + ctx.CalculateExecutionContext(exe_ctx); + return ValueObject::CreateValueObjectFromAddress( + "result", addr, exe_ctx, + valobj.GetCompilerType().GetArrayElementType(&ctx).GetPointerType(), + /* do_deref */ false); +} + +llvm::Expected +Interpreter::UnaryConversion(lldb::ValueObjectSP valobj, uint32_t location) { + if (!valobj) + return llvm::make_error(m_expr, "invalid value object", + location); + llvm::Expected type_system = + GetTypeSystemFromCU(m_exe_ctx_scope); + if (!type_system) + return type_system.takeError(); + + CompilerType in_type = valobj->GetCompilerType(); + if (valobj->IsBitfield()) { + // Promote bitfields. If `int` can represent the bitfield value, it is + // converted to `int`. Otherwise, if `unsigned int` can represent it, it + // is converted to `unsigned int`. Otherwise, it is treated as its + // underlying type. + uint32_t bitfield_size = valobj->GetBitfieldBitSize(); + // Some bitfields have undefined size (e.g. result of ternary operation). + // The AST's `bitfield_size` of those is 0, and no promotion takes place. + if (bitfield_size > 0 && in_type.IsInteger()) { + CompilerType int_type = GetBasicType(*type_system, lldb::eBasicTypeInt); + CompilerType uint_type = + GetBasicType(*type_system, lldb::eBasicTypeUnsignedInt); + llvm::Expected int_bit_size = + int_type.GetBitSize(m_exe_ctx_scope.get()); + if (!int_bit_size) + return int_bit_size.takeError(); + llvm::Expected uint_bit_size = + uint_type.GetBitSize(m_exe_ctx_scope.get()); + if (!uint_bit_size) + return int_bit_size.takeError(); + if (bitfield_size < *int_bit_size || + (in_type.IsSigned() && bitfield_size == *int_bit_size)) + return valobj->CastToBasicType(int_type); + if (bitfield_size <= *uint_bit_size) + return valobj->CastToBasicType(uint_type); + // Re-create as a const value with the same underlying type + Scalar scalar; + bool resolved = valobj->ResolveValue(scalar); + if (!resolved) + return llvm::createStringError("invalid scalar value"); + return ValueObject::CreateValueObjectFromScalar(m_target, scalar, in_type, + "result"); + } + } + + if (in_type.IsArrayType()) + valobj = ArrayToPointerConversion(*valobj, *m_exe_ctx_scope); + + if (valobj->GetCompilerType().IsInteger() || + valobj->GetCompilerType().IsUnscopedEnumerationType()) { + llvm::Expected promoted_type = + type_system.get()->DoIntegralPromotion(valobj->GetCompilerType(), + m_exe_ctx_scope.get()); + if (!promoted_type) + return promoted_type.takeError(); + if (!promoted_type->CompareTypes(valobj->GetCompilerType())) + return valobj->CastToBasicType(*promoted_type); + } + + return valobj; +} + static lldb::VariableSP DILFindVariable(ConstString name, VariableList &variable_list) { lldb::VariableSP exact_match; @@ -147,6 +242,10 @@ Interpreter::Interpreter(lldb::TargetSP target, llvm::StringRef expr, llvm::Expected Interpreter::Evaluate(const ASTNode *node) { // Evaluate an AST. auto value_or_error = node->Accept(this); + // Convert SP with a nullptr to an error. + if (value_or_error && !*value_or_error) + return llvm::make_error(m_expr, "invalid value object", + node->GetLocation()); // Return the computed value-or-error. The caller is responsible for // checking if an error occured during the evaluation. return value_or_error; @@ -175,21 +274,21 @@ Interpreter::Visit(const IdentifierNode *node) { llvm::Expected Interpreter::Visit(const UnaryOpNode *node) { Status error; - auto rhs_or_err = Evaluate(node->GetOperand()); - if (!rhs_or_err) - return rhs_or_err; + auto op_or_err = Evaluate(node->GetOperand()); + if (!op_or_err) + return op_or_err; - lldb::ValueObjectSP rhs = *rhs_or_err; + lldb::ValueObjectSP operand = *op_or_err; switch (node->GetKind()) { case UnaryOpKind::Deref: { - lldb::ValueObjectSP dynamic_rhs = rhs->GetDynamicValue(m_use_dynamic); - if (dynamic_rhs) - rhs = dynamic_rhs; + lldb::ValueObjectSP dynamic_op = operand->GetDynamicValue(m_use_dynamic); + if (dynamic_op) + operand = dynamic_op; - lldb::ValueObjectSP child_sp = rhs->Dereference(error); + lldb::ValueObjectSP child_sp = operand->Dereference(error); if (!child_sp && m_use_synthetic) { - if (lldb::ValueObjectSP synth_obj_sp = rhs->GetSyntheticValue()) { + if (lldb::ValueObjectSP synth_obj_sp = operand->GetSyntheticValue()) { error.Clear(); child_sp = synth_obj_sp->Dereference(error); } @@ -202,18 +301,69 @@ Interpreter::Visit(const UnaryOpNode *node) { } case UnaryOpKind::AddrOf: { Status error; - lldb::ValueObjectSP value = rhs->AddressOf(error); + lldb::ValueObjectSP value = operand->AddressOf(error); if (error.Fail()) return llvm::make_error(m_expr, error.AsCString(), node->GetLocation()); return value; } + case UnaryOpKind::Minus: { + if (operand->GetCompilerType().IsReferenceType()) { + operand = operand->Dereference(error); + if (error.Fail()) + return error.ToError(); + } + llvm::Expected conv_op = + UnaryConversion(operand, node->GetOperand()->GetLocation()); + if (!conv_op) + return conv_op; + operand = *conv_op; + CompilerType operand_type = operand->GetCompilerType(); + if (!operand_type.IsScalarType()) { + std::string errMsg = + llvm::formatv("invalid argument type '{0}' to unary expression", + operand_type.GetTypeName()); + return llvm::make_error(m_expr, errMsg, + node->GetLocation()); + } + Scalar scalar; + bool resolved = operand->ResolveValue(scalar); + if (!resolved) + break; + + bool negated = scalar.UnaryNegate(); + if (negated) + return ValueObject::CreateValueObjectFromScalar( + m_target, scalar, operand->GetCompilerType(), "result"); + break; } - - // Unsupported/invalid operation. - return llvm::make_error( - m_expr, "invalid ast: unexpected binary operator", node->GetLocation()); + case UnaryOpKind::Plus: { + if (operand->GetCompilerType().IsReferenceType()) { + operand = operand->Dereference(error); + if (error.Fail()) + return error.ToError(); + } + llvm::Expected conv_op = + UnaryConversion(operand, node->GetOperand()->GetLocation()); + if (!conv_op) + return conv_op; + operand = *conv_op; + CompilerType operand_type = operand->GetCompilerType(); + if (!operand_type.IsScalarType() && + // Unary plus is allowed for pointers. + !operand_type.IsPointerType()) { + std::string errMsg = + llvm::formatv("invalid argument type '{0}' to unary expression", + operand_type.GetTypeName()); + return llvm::make_error(m_expr, errMsg, + node->GetLocation()); + } + return operand; + } + } + return llvm::make_error(m_expr, "invalid unary operation", + node->GetLocation()); } llvm::Expected @@ -499,24 +649,6 @@ Interpreter::Visit(const BitFieldExtractionNode *node) { return child_valobj_sp; } -static llvm::Expected -GetTypeSystemFromCU(std::shared_ptr ctx) { - SymbolContext symbol_context = - ctx->GetSymbolContext(lldb::eSymbolContextCompUnit); - lldb::LanguageType language = symbol_context.comp_unit->GetLanguage(); - - symbol_context = ctx->GetSymbolContext(lldb::eSymbolContextModule); - return symbol_context.module_sp->GetTypeSystemForLanguage(language); -} - -static CompilerType GetBasicType(lldb::TypeSystemSP type_system, - lldb::BasicType basic_type) { - if (type_system) - return type_system.get()->GetBasicTypeFromAST(basic_type); - - return CompilerType(); -} - llvm::Expected Interpreter::PickIntegerType(lldb::TypeSystemSP type_system, std::shared_ptr ctx, diff --git a/lldb/source/ValueObject/DILParser.cpp b/lldb/source/ValueObject/DILParser.cpp index 82b97aafe2261..072ddff1e28d2 100644 --- a/lldb/source/ValueObject/DILParser.cpp +++ b/lldb/source/ValueObject/DILParser.cpp @@ -93,9 +93,12 @@ ASTNodeUP DILParser::ParseExpression() { return ParseUnaryExpression(); } // unary_operator: // "&" // "*" +// "+" +// "-" // ASTNodeUP DILParser::ParseUnaryExpression() { - if (CurToken().IsOneOf({Token::amp, Token::star})) { + if (CurToken().IsOneOf( + {Token::amp, Token::star, Token::minus, Token::plus})) { Token token = CurToken(); uint32_t loc = token.GetLocation(); m_dil_lexer.Advance(); @@ -107,7 +110,12 @@ ASTNodeUP DILParser::ParseUnaryExpression() { case Token::amp: return std::make_unique(loc, UnaryOpKind::AddrOf, std::move(rhs)); - + case Token::minus: + return std::make_unique(loc, UnaryOpKind::Minus, + std::move(rhs)); + case Token::plus: + return std::make_unique(loc, UnaryOpKind::Plus, + std::move(rhs)); default: llvm_unreachable("invalid token kind"); } diff --git a/lldb/test/API/commands/frame/var-dil/expr/Arithmetic/Makefile b/lldb/test/API/commands/frame/var-dil/expr/Arithmetic/Makefile new file mode 100644 index 0000000000000..99998b20bcb05 --- /dev/null +++ b/lldb/test/API/commands/frame/var-dil/expr/Arithmetic/Makefile @@ -0,0 +1,3 @@ +CXX_SOURCES := main.cpp + +include Makefile.rules diff --git a/lldb/test/API/commands/frame/var-dil/expr/Arithmetic/TestFrameVarDILArithmetic.py b/lldb/test/API/commands/frame/var-dil/expr/Arithmetic/TestFrameVarDILArithmetic.py new file mode 100644 index 0000000000000..53a85fed303f4 --- /dev/null +++ b/lldb/test/API/commands/frame/var-dil/expr/Arithmetic/TestFrameVarDILArithmetic.py @@ -0,0 +1,46 @@ +""" +Test DIL arithmetic. +""" + +import lldb +from lldbsuite.test.lldbtest import * +from lldbsuite.test.decorators import * +from lldbsuite.test import lldbutil + + +class TestFrameVarDILArithmetic(TestBase): + NO_DEBUG_INFO_TESTCASE = True + + def test_arithmetic(self): + self.build() + lldbutil.run_to_source_breakpoint( + self, "Set a breakpoint here", lldb.SBFileSpec("main.cpp") + ) + + self.runCmd("settings set target.experimental.use-DIL true") + + # Check unary results and integral promotion + self.expect_var_path("+0", value="0") + self.expect_var_path("-0", value="0") + self.expect_var_path("+1", value="1") + self.expect_var_path("-1", value="-1") + self.expect_var_path("-9223372036854775808", value="9223372036854775808") + self.expect_var_path("s", value="10", type="short") + self.expect_var_path("+s", value="10", type="int") + self.expect_var_path("-s", value="-10", type="int") + self.expect_var_path("+us", value="1", type="int") + self.expect_var_path("-us", value="-1", type="int") + self.expect_var_path("+ref", value="2", type="int") + self.expect_var_path("-ref", value="-2", type="int") + self.expect_var_path("+0.0", value="0") + self.expect_var_path("-0.0", value="-0") + self.expect_var_path("+enum_one", value="1") + self.expect_var_path("-enum_one", value="-1") + self.expect_var_path("+wchar", value="1") + self.expect_var_path("+char16", value="2") + self.expect_var_path("+char32", value="3") + self.expect_var_path("-bitfield.a", value="-1", type="int") + self.expect_var_path("+bitfield.a", value="1", type="int") + self.expect_var_path("+bitfield.b", value="2", type="int") + self.expect_var_path("+bitfield.c", value="3", type="unsigned int") + self.expect_var_path("+bitfield.d", value="4", type="uint64_t") diff --git a/lldb/test/API/commands/frame/var-dil/expr/Arithmetic/main.cpp b/lldb/test/API/commands/frame/var-dil/expr/Arithmetic/main.cpp new file mode 100644 index 0000000000000..2c70e93433f5f --- /dev/null +++ b/lldb/test/API/commands/frame/var-dil/expr/Arithmetic/main.cpp @@ -0,0 +1,23 @@ +#include + +int main(int argc, char **argv) { + short s = 10; + unsigned short us = 1; + + int x = 2; + int &ref = x; + enum Enum { kZero, kOne } enum_one = kOne; + wchar_t wchar = 1; + char16_t char16 = 2; + char32_t char32 = 3; + + struct BitFieldStruct { + char a : 4; + int b : 32; + unsigned int c : 32; + uint64_t d : 48; + }; + BitFieldStruct bitfield = {1, 2, 3, 4}; + + return 0; // Set a breakpoint here +} diff --git a/lldb/test/API/commands/frame/var-dil/expr/PointerArithmetic/Makefile b/lldb/test/API/commands/frame/var-dil/expr/PointerArithmetic/Makefile new file mode 100644 index 0000000000000..99998b20bcb05 --- /dev/null +++ b/lldb/test/API/commands/frame/var-dil/expr/PointerArithmetic/Makefile @@ -0,0 +1,3 @@ +CXX_SOURCES := main.cpp + +include Makefile.rules diff --git a/lldb/test/API/commands/frame/var-dil/expr/PointerArithmetic/TestFrameVarDILPointerArithmetic.py b/lldb/test/API/commands/frame/var-dil/expr/PointerArithmetic/TestFrameVarDILPointerArithmetic.py new file mode 100644 index 0000000000000..88429b370710e --- /dev/null +++ b/lldb/test/API/commands/frame/var-dil/expr/PointerArithmetic/TestFrameVarDILPointerArithmetic.py @@ -0,0 +1,29 @@ +""" +Test DIL pointer arithmetic. +""" + +import lldb +from lldbsuite.test.lldbtest import * +from lldbsuite.test.decorators import * +from lldbsuite.test import lldbutil + + +class TestFrameVarDILPointerArithmetic(TestBase): + NO_DEBUG_INFO_TESTCASE = True + + def test_pointer_arithmetic(self): + self.build() + lldbutil.run_to_source_breakpoint( + self, "Set a breakpoint here", lldb.SBFileSpec("main.cpp") + ) + + self.runCmd("settings set target.experimental.use-DIL true") + + self.expect_var_path("+array", type="int *") + self.expect_var_path("+array_ref", type="int *") + self.expect_var_path("+p_int0", type="int *") + self.expect( + "frame var -- '-p_int0'", + error=True, + substrs=["invalid argument type 'int *' to unary expression"], + ) diff --git a/lldb/test/API/commands/frame/var-dil/expr/PointerArithmetic/main.cpp b/lldb/test/API/commands/frame/var-dil/expr/PointerArithmetic/main.cpp new file mode 100644 index 0000000000000..b4e0e88b1ffc9 --- /dev/null +++ b/lldb/test/API/commands/frame/var-dil/expr/PointerArithmetic/main.cpp @@ -0,0 +1,11 @@ +void stop() {} + +int main(int argc, char **argv) { + int array[10]; + array[0] = 0; + int (&array_ref)[10] = array; + int *p_int0 = &array[0]; + + stop(); // Set a breakpoint here + return 0; +} diff --git a/lldb/unittests/UnwindAssembly/ARM64/TestArm64InstEmulation.cpp b/lldb/unittests/UnwindAssembly/ARM64/TestArm64InstEmulation.cpp index 033c300ad6926..e28366e9f0432 100644 --- a/lldb/unittests/UnwindAssembly/ARM64/TestArm64InstEmulation.cpp +++ b/lldb/unittests/UnwindAssembly/ARM64/TestArm64InstEmulation.cpp @@ -964,3 +964,110 @@ TEST_F(TestArm64InstEmulation, TestPrologueStartsWithStrD8) { EXPECT_TRUE(regloc.IsSame()); } } + +TEST_F(TestArm64InstEmulation, TestMidFunctionEpilogueAndBackwardsJump) { + ArchSpec arch("arm64-apple-ios15"); + std::unique_ptr engine( + static_cast( + UnwindAssemblyInstEmulation::CreateInstance(arch))); + ASSERT_NE(nullptr, engine); + + const UnwindPlan::Row *row; + AddressRange sample_range; + UnwindPlan unwind_plan(eRegisterKindLLDB); + UnwindPlan::Row::AbstractRegisterLocation regloc; + + // clang-format off + uint8_t data[] = { + 0xff, 0xc3, 0x00, 0xd1, // <+0>: sub sp, sp, #0x30 + 0xfd, 0x7b, 0x02, 0xa9, // <+4>: stp x29, x30, [sp, #0x20] + 0xfd, 0x83, 0x00, 0x91, // <+8>: add x29, sp, #0x20 + 0x1f, 0x04, 0x00, 0xf1, // <+12>: cmp x0, #0x1 + 0x21, 0x01, 0x00, 0x54, // <+16>: b.ne ; <+52> DO_SOMETHING_AND_GOTO_AFTER_EPILOGUE + 0xfd, 0x7b, 0x42, 0xa9, // <+20>: ldp x29, x30, [sp, #0x20] + 0xff, 0xc3, 0x00, 0x91, // <+24>: add sp, sp, #0x30 + 0xc0, 0x03, 0x5f, 0xd6, // <+28>: ret + // AFTER_EPILOGUE: LLDB computes the next 5 unwind states incorrectly. + 0x37, 0x00, 0x80, 0xd2, // <+32>: mov x23, #0x1 + 0xf6, 0x5f, 0x41, 0xa9, // <+36>: ldp x22, x23, [sp, #0x10] + 0xfd, 0x7b, 0x42, 0xa9, // <+40>: ldp x29, x30, [sp, #0x20] + 0xff, 0xc3, 0x00, 0x91, // <+44>: add sp, sp, #0x30 + 0xc0, 0x03, 0x5f, 0xd6, // <+48>: ret + // DO_SOMETHING_AND_GOTO_AFTER_EPILOGUE + 0xf6, 0x5f, 0x01, 0xa9, // <+52>: stp x22, x23, [sp, #0x10] + 0x36, 0x00, 0x80, 0xd2, // <+56>: mov x22, #0x1 + 0x37, 0x00, 0x80, 0xd2, // <+60>: mov x23, #0x1 + 0xf8, 0xff, 0xff, 0x17, // <+64>: b ; <+32> AFTER_EPILOGUE + }; + + // UnwindPlan we expect: + // row[0]: 0: CFA=sp +0 => + // row[1]: 4: CFA=sp+48 => + // row[2]: 8: CFA=sp+16 => fp=[CFA-16] lr=[CFA-8] + // row[3]: 12: CFA=fp+16 => fp=[CFA-16] lr=[CFA-8] + // row[4]: 24: CFA=sp+48 => fp= lr= + // + // This must come from +56 + // row[5]: 32: CFA=fp+16 => fp=[CFA-16] lr=[CFA-8] x22=[CFA-24], x23=[CFA-32] + // row[6]: 40: CFA=fp+16 => fp=[CFA-16] lr=[CFA-8] x22=same, x23 = same + // row[6]: 44: CFA=sp+48 => fp=same lr=same x22=same, x23 = same + // row[6]: 48: CFA=sp0 => fp=same lr=same x22=same, x23 = same + // + // row[x]: 52: CFA=fp+16 => fp=[CFA-16] lr=[CFA-8] + // row[x]: 56: CFA=fp+16 => fp=[CFA-16] lr=[CFA-8] x22=[CFA-24], x23=[CFA-32] + // clang-format on + + sample_range = AddressRange(0x1000, sizeof(data)); + + EXPECT_TRUE(engine->GetNonCallSiteUnwindPlanFromAssembly( + sample_range, data, sizeof(data), unwind_plan)); + + // At the end of prologue (+12), CFA = fp + 16. + // <+0>: sub sp, sp, #0x30 + // <+4>: stp x29, x30, [sp, #0x20] + // <+8>: add x29, sp, #0x20 + row = unwind_plan.GetRowForFunctionOffset(12); + EXPECT_EQ(12, row->GetOffset()); + EXPECT_TRUE(row->GetCFAValue().IsRegisterPlusOffset()); + EXPECT_EQ(row->GetCFAValue().GetRegisterNumber(), gpr_fp_arm64); + EXPECT_EQ(row->GetCFAValue().GetOffset(), 16); + + // +16 and +20 are the same as +12. + // <+12>: cmp x0, #0x1 + // <+16>: b.ne ; <+52> DO_SOMETHING_AND_GOTO_AFTER_EPILOGUE + EXPECT_EQ(12, unwind_plan.GetRowForFunctionOffset(16)->GetOffset()); + EXPECT_EQ(12, unwind_plan.GetRowForFunctionOffset(20)->GetOffset()); + + // After restoring $fp to caller's value, CFA = $sp + 48 + // <+20>: ldp x29, x30, [sp, #0x20] + row = unwind_plan.GetRowForFunctionOffset(24); + EXPECT_EQ(24, row->GetOffset()); + EXPECT_TRUE(row->GetCFAValue().IsRegisterPlusOffset()); + EXPECT_TRUE(row->GetCFAValue().GetRegisterNumber() == gpr_sp_arm64); + EXPECT_EQ(row->GetCFAValue().GetOffset(), 48); + + // $sp has been restored + // <+24>: add sp, sp, #0x30 + row = unwind_plan.GetRowForFunctionOffset(28); + EXPECT_EQ(28, row->GetOffset()); + EXPECT_TRUE(row->GetCFAValue().IsRegisterPlusOffset()); + EXPECT_TRUE(row->GetCFAValue().GetRegisterNumber() == gpr_sp_arm64); + EXPECT_EQ(row->GetCFAValue().GetOffset(), 0); + + // FIXME: Row for offset +32 incorrectly inherits the state of the `ret` + // instruction, but +32 _never_ executes after the `ret`. + // <+28>: ret + // <+32>: mov x23, #0x1 + row = unwind_plan.GetRowForFunctionOffset(32); + // FIXME: EXPECT_NE(32, row->GetOffset()); + + // Check that the state of this branch + // <+16>: b.ne ; <+52> DO_SOMETHING_AND_GOTO_AFTER_EPILOGUE + // was forwarded to the branch target: + // <+52>: stp x22, x23, [sp, #0x10] + row = unwind_plan.GetRowForFunctionOffset(52); + EXPECT_EQ(52, row->GetOffset()); + EXPECT_TRUE(row->GetCFAValue().IsRegisterPlusOffset()); + EXPECT_EQ(row->GetCFAValue().GetRegisterNumber(), gpr_fp_arm64); + EXPECT_EQ(row->GetCFAValue().GetOffset(), 16); +} diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp index 907f8300de6d2..396d64625fb5c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp @@ -173,6 +173,14 @@ Register AMDGPURegBankLegalizeCombiner::getReadAnyLaneSrc(Register Src) { if (mi_match(Src, MRI, m_GAMDGPUReadAnyLane(m_Reg(RALSrc)))) return RALSrc; + // RALSrc = G_ANYEXT S16Src + // TruncSrc = G_AMDGPU_READANYLANE RALSrc + // Src = G_TRUNC TruncSrc + if (mi_match(Src, MRI, + m_GTrunc(m_GAMDGPUReadAnyLane(m_GAnyExt(m_Reg(RALSrc)))))) { + return RALSrc; + } + // TruncSrc = G_AMDGPU_READANYLANE RALSrc // AextSrc = G_TRUNC TruncSrc // Src = G_ANYEXT AextSrc diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 70564973816b1..e8fda829e2394 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -300,6 +300,12 @@ def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "", [(set VR512:$dst, (v16i32 immAllZerosV))]>; def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "", [(set VR512:$dst, (v16i32 immAllOnesV))]>; +let AddedComplexity = 1, Predicates = [HasVLX] in { + def AVX512_128_SETALLONES : I<0, Pseudo, (outs VR128X:$dst), (ins), + "", [(set VR128X:$dst, (v4i32 immAllOnesV))]>; + def AVX512_256_SETALLONES : I<0, Pseudo, (outs VR256X:$dst), (ins), + "", [(set VR256X:$dst, (v8i32 immAllOnesV))]>; +} } let Predicates = [HasAVX512] in { diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index cb0208a4a5f32..b988ae0aca912 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -778,6 +778,8 @@ bool X86InstrInfo::isReMaterializableImpl( case X86::AVX512_128_SET0: case X86::AVX512_256_SET0: case X86::AVX512_512_SET0: + case X86::AVX512_128_SETALLONES: + case X86::AVX512_256_SETALLONES: case X86::AVX512_512_SETALLONES: case X86::AVX512_FsFLD0SD: case X86::AVX512_FsFLD0SH: @@ -6246,9 +6248,31 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef).addImm(0xf); return true; } + case X86::AVX512_128_SETALLONES: + case X86::AVX512_256_SETALLONES: case X86::AVX512_512_SETALLONES: { Register Reg = MIB.getReg(0); - MIB->setDesc(get(X86::VPTERNLOGDZrri)); + unsigned Opc; + switch (MI.getOpcode()) { + case X86::AVX512_128_SETALLONES: { + if (X86::VR128RegClass.contains(Reg)) + return Expand2AddrUndef(MIB, get(X86::VPCMPEQDrr)); + + Opc = X86::VPTERNLOGDZ128rri; + break; + } + case X86::AVX512_256_SETALLONES: { + if (X86::VR256RegClass.contains(Reg)) + return Expand2AddrUndef(MIB, get(X86::VPCMPEQDYrr)); + + Opc = X86::VPTERNLOGDZ256rri; + break; + } + case X86::AVX512_512_SETALLONES: + Opc = X86::VPTERNLOGDZrri; + break; + } + MIB->setDesc(get(Opc)); // VPTERNLOGD needs 3 register inputs and an immediate. // 0xff will return 1s for any input. MIB.addReg(Reg, RegState::Undef) @@ -8190,6 +8214,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( case X86::AVX1_SETALLONES: case X86::AVX_SET0: case X86::AVX512_256_SET0: + case X86::AVX512_256_SETALLONES: Alignment = Align(32); break; case X86::V_SET0: @@ -8197,6 +8222,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( case X86::AVX512_128_SET0: case X86::FsFLD0F128: case X86::AVX512_FsFLD0F128: + case X86::AVX512_128_SETALLONES: Alignment = Align(16); break; case X86::MMX_SET0: @@ -8255,6 +8281,8 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( case X86::AVX512_128_SET0: case X86::AVX512_256_SET0: case X86::AVX512_512_SET0: + case X86::AVX512_128_SETALLONES: + case X86::AVX512_256_SETALLONES: case X86::AVX512_512_SETALLONES: case X86::FsFLD0SH: case X86::AVX512_FsFLD0SH: @@ -8315,6 +8343,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( break; case X86::AVX1_SETALLONES: case X86::AVX2_SETALLONES: + case X86::AVX512_256_SETALLONES: IsAllOnes = true; [[fallthrough]]; case X86::AVX512_256_SET0: @@ -8328,6 +8357,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( 2); break; case X86::V_SETALLONES: + case X86::AVX512_128_SETALLONES: IsAllOnes = true; [[fallthrough]]; case X86::V_SET0: diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 8883a527c2226..4ac6cb247bd13 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -665,7 +665,10 @@ static void followUsesInMBEC(AAType &AA, Attributor &A, StateType &S, return; SmallVector BrInsts; + SmallPtrSet Visited; auto Pred = [&](const Instruction *I) { + if (!Visited.insert(I).second) + return false; if (const BranchInst *Br = dyn_cast(I)) if (Br->isConditional()) BrInsts.push_back(Br); @@ -684,28 +687,10 @@ static void followUsesInMBEC(AAType &AA, Attributor &A, StateType &S, // ParentS_m = ChildS_{m, 1} /\ ChildS_{m, 2} /\ ... /\ ChildS_{m, n_m} // // Known State |= ParentS_1 \/ ParentS_2 \/... \/ ParentS_m - // - // FIXME: Currently, recursive branches are not handled. For example, we - // can't deduce that ptr must be dereferenced in below function. - // - // void f(int a, int c, int *ptr) { - // if(a) - // if (b) { - // *ptr = 0; - // } else { - // *ptr = 1; - // } - // else { - // if (b) { - // *ptr = 0; - // } else { - // *ptr = 1; - // } - // } - // } Explorer->checkForAllContext(&CtxI, Pred); - for (const BranchInst *Br : BrInsts) { + while (!BrInsts.empty()) { + const BranchInst *Br = BrInsts.pop_back_val(); StateType ParentState; // The known state of the parent state is a conjunction of children's @@ -714,15 +699,18 @@ static void followUsesInMBEC(AAType &AA, Attributor &A, StateType &S, for (const BasicBlock *BB : Br->successors()) { StateType ChildState; - size_t BeforeSize = Uses.size(); - followUsesInContext(AA, A, *Explorer, &BB->front(), Uses, ChildState); + const Instruction *I = &BB->front(); + followUsesInContext(AA, A, *Explorer, I, Uses, ChildState); // Erase uses which only appear in the child. for (auto It = Uses.begin() + BeforeSize; It != Uses.end();) It = Uses.erase(It); ParentState &= ChildState; + + // Check for recursive conditional branches. + Explorer->checkForAllContext(I, Pred); } // Use only known state. diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 405f83a6ce8e5..8a435accfedfe 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1096,6 +1096,8 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags, // Calculates the first active lane index of the vector predicate operands. // It produces the lane index across all unrolled iterations. Unrolling will // add all copies of its original operand as additional operands. + // Implemented with @llvm.experimental.cttz.elts, but returns the expected + // result even with operands that are all zeroes. FirstActiveLane, // The opcodes below are used for VPInstructionWithType. diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index b27f2f8a3c8cb..5ea9dd349e06f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -1015,7 +1015,7 @@ Value *VPInstruction::generate(VPTransformState &State) { if (getNumOperands() == 1) { Value *Mask = State.get(getOperand(0)); return Builder.CreateCountTrailingZeroElems(Builder.getInt64Ty(), Mask, - true, Name); + /*ZeroIsPoison=*/false, Name); } // If there are multiple operands, create a chain of selects to pick the // first operand with an active lane and add the number of lanes of the @@ -1031,9 +1031,9 @@ Value *VPInstruction::generate(VPTransformState &State) { Builder.CreateICmpEQ(State.get(getOperand(Idx)), Builder.getFalse()), Builder.getInt64Ty()) - : Builder.CreateCountTrailingZeroElems(Builder.getInt64Ty(), - State.get(getOperand(Idx)), - true, Name); + : Builder.CreateCountTrailingZeroElems( + Builder.getInt64Ty(), State.get(getOperand(Idx)), + /*ZeroIsPoison=*/false, Name); Value *Current = Builder.CreateAdd( Builder.CreateMul(RuntimeVF, Builder.getInt64(Idx)), TrailingZeros); if (Res) { diff --git a/llvm/test/Analysis/CostModel/AArch64/fshl.ll b/llvm/test/Analysis/CostModel/AArch64/fshl.ll index 9d06b4bdec9b4..cd6068d382169 100644 --- a/llvm/test/Analysis/CostModel/AArch64/fshl.ll +++ b/llvm/test/Analysis/CostModel/AArch64/fshl.ll @@ -5,277 +5,544 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" define i8 @fshl_i8_3rd_arg_const(i8 %a, i8 %b) { ; CHECK-LABEL: 'fshl_i8_3rd_arg_const' -; CHECK-NEXT: Cost Model: Found costs of 2 for: %fshl = tail call i8 @llvm.fshl.i8(i8 %a, i8 %b, i8 9) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i8 %fshl +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = tail call i8 @llvm.fshl.i8(i8 %a, i8 %b, i8 9) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i8 %r ; entry: - %fshl = tail call i8 @llvm.fshl.i8(i8 %a, i8 %b, i8 9) - ret i8 %fshl + %r = tail call i8 @llvm.fshl.i8(i8 %a, i8 %b, i8 9) + ret i8 %r } define i8 @fshl_i8_3rd_arg_var(i8 %a, i8 %b, i8 %c) { ; CHECK-LABEL: 'fshl_i8_3rd_arg_var' -; CHECK-NEXT: Cost Model: Found costs of 7 for: %fshl = tail call i8 @llvm.fshl.i8(i8 %a, i8 %b, i8 %c) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i8 %fshl +; CHECK-NEXT: Cost Model: Found costs of 7 for: %r = tail call i8 @llvm.fshl.i8(i8 %a, i8 %b, i8 %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i8 %r ; entry: - %fshl = tail call i8 @llvm.fshl.i8(i8 %a, i8 %b, i8 %c) - ret i8 %fshl + %r = tail call i8 @llvm.fshl.i8(i8 %a, i8 %b, i8 %c) + ret i8 %r } -declare i8 @llvm.fshl.i8(i8, i8, i8) - -define i16 @fshl_i16(i16 %a, i16 %b) { -; CHECK-LABEL: 'fshl_i16' -; CHECK-NEXT: Cost Model: Found costs of 2 for: %fshl = tail call i16 @llvm.fshl.i16(i16 %a, i16 %b, i16 9) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i16 %fshl +define i16 @fshl_i16_3rd_arg_const(i16 %a, i16 %b) { +; CHECK-LABEL: 'fshl_i16_3rd_arg_const' +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = tail call i16 @llvm.fshl.i16(i16 %a, i16 %b, i16 9) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i16 %r ; entry: - %fshl = tail call i16 @llvm.fshl.i16(i16 %a, i16 %b, i16 9) - ret i16 %fshl + %r = tail call i16 @llvm.fshl.i16(i16 %a, i16 %b, i16 9) + ret i16 %r } -declare i16 @llvm.fshl.i16(i16, i16, i16) +define i16 @fshl_i16_3rd_arg_var(i16 %a, i16 %b, i16 %c) { +; CHECK-LABEL: 'fshl_i16_3rd_arg_var' +; CHECK-NEXT: Cost Model: Found costs of 7 for: %r = tail call i16 @llvm.fshl.i16(i16 %a, i16 %b, i16 %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i16 %r +; +entry: + %r = tail call i16 @llvm.fshl.i16(i16 %a, i16 %b, i16 %c) + ret i16 %r +} define i32 @fshl_i32_3rd_arg_const(i32 %a, i32 %b) { ; CHECK-LABEL: 'fshl_i32_3rd_arg_const' -; CHECK-NEXT: Cost Model: Found costs of 1 for: %fshl = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 9) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %fshl +; CHECK-NEXT: Cost Model: Found costs of 1 for: %r = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 9) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %r ; entry: - %fshl = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 9) - ret i32 %fshl + %r = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 9) + ret i32 %r } define i32 @fshl_i32_3rd_arg_var(i32 %a, i32 %b, i32 %c) { ; CHECK-LABEL: 'fshl_i32_3rd_arg_var' -; CHECK-NEXT: Cost Model: Found costs of 7 for: %fshl = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %fshl +; CHECK-NEXT: Cost Model: Found costs of 7 for: %r = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %r ; entry: - %fshl = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) - ret i32 %fshl + %r = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) + ret i32 %r } -declare i32 @llvm.fshl.i32(i32, i32, i32) - define i64 @fshl_i64_3rd_arg_const(i64 %a, i64 %b) { ; CHECK-LABEL: 'fshl_i64_3rd_arg_const' -; CHECK-NEXT: Cost Model: Found costs of 1 for: %fshl = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 9) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %fshl +; CHECK-NEXT: Cost Model: Found costs of 1 for: %r = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 9) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %r ; entry: - %fshl = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 9) - ret i64 %fshl + %r = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 9) + ret i64 %r } define i64 @fshl_i64_3rd_arg_var(i64 %a, i64 %b, i64 %c) { ; CHECK-LABEL: 'fshl_i64_3rd_arg_var' -; CHECK-NEXT: Cost Model: Found costs of 7 for: %fshl = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %c) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %fshl +; CHECK-NEXT: Cost Model: Found costs of 7 for: %r = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %r +; +entry: + %r = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %c) + ret i64 %r +} + +define i128 @fshl_i128_3rd_arg_const(i128 %a, i128 %b) { +; CHECK-LABEL: 'fshl_i128_3rd_arg_const' +; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:8 Lat:8 SizeLat:8 for: %r = tail call i128 @llvm.fshl.i128(i128 %a, i128 %b, i128 9) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i128 %r ; entry: - %fshl = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %c) - ret i64 %fshl + %r = tail call i128 @llvm.fshl.i128(i128 %a, i128 %b, i128 9) + ret i128 %r } -declare i64 @llvm.fshl.i64(i64, i64, i64) +define i128 @fshl_i128_3rd_arg_var(i128 %a, i128 %b, i128 %c) { +; CHECK-LABEL: 'fshl_i128_3rd_arg_var' +; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:9 Lat:9 SizeLat:9 for: %r = tail call i128 @llvm.fshl.i128(i128 %a, i128 %b, i128 %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i128 %r +; +entry: + %r = tail call i128 @llvm.fshl.i128(i128 %a, i128 %b, i128 %c) + ret i128 %r +} define i19 @fshl_i19(i19 %a, i19 %b) { ; CHECK-LABEL: 'fshl_i19' -; CHECK-NEXT: Cost Model: Found costs of 2 for: %fshl = tail call i19 @llvm.fshl.i19(i19 %a, i19 %b, i19 9) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i19 %fshl +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = tail call i19 @llvm.fshl.i19(i19 %a, i19 %b, i19 9) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i19 %r ; entry: - %fshl = tail call i19 @llvm.fshl.i19(i19 %a, i19 %b, i19 9) - ret i19 %fshl + %r = tail call i19 @llvm.fshl.i19(i19 %a, i19 %b, i19 9) + ret i19 %r } -declare i19 @llvm.fshl.i19(i19, i19, i19) +define i66 @fshl_i66(i66 %a, i66 %b) { +; CHECK-LABEL: 'fshl_i66' +; CHECK-NEXT: Cost Model: Found costs of 3 for: %r = tail call i66 @llvm.fshl.i66(i66 %a, i66 %b, i66 9) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i66 %r +; +entry: + %r = tail call i66 @llvm.fshl.i66(i66 %a, i66 %b, i66 9) + ret i66 %r +} define <16 x i8> @fshl_v16i8_3rd_arg_vec_const_all_lanes_same(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: 'fshl_v16i8_3rd_arg_vec_const_all_lanes_same' -; CHECK-NEXT: Cost Model: Found costs of 2 for: %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 3)) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %fshl +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 3)) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %r ; entry: - %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) - ret <16 x i8> %fshl + %r = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) + ret <16 x i8> %r } define <16 x i8> @fshl_v16i8_3rd_arg_vec_const_lanes_different(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: 'fshl_v16i8_3rd_arg_vec_const_lanes_different' -; CHECK-NEXT: Cost Model: Found costs of 6 for: %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %fshl +; CHECK-NEXT: Cost Model: Found costs of 6 for: %r = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %r ; entry: - %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) - ret <16 x i8> %fshl + %r = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) + ret <16 x i8> %r } define <16 x i8> @fshl_v16i8_3rd_arg_var(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { ; CHECK-LABEL: 'fshl_v16i8_3rd_arg_var' -; CHECK-NEXT: Cost Model: Found costs of 7 for: %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %fshl +; CHECK-NEXT: Cost Model: Found costs of 7 for: %r = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %r ; entry: - %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) - ret <16 x i8> %fshl + %r = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) + ret <16 x i8> %r } -declare <16 x i8> @llvm.fshl.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) - define <8 x i16> @fshl_v8i16_3rd_arg_vec_const_all_lanes_same(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: 'fshl_v8i16_3rd_arg_vec_const_all_lanes_same' -; CHECK-NEXT: Cost Model: Found costs of 2 for: %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> splat (i16 3)) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %fshl +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> splat (i16 3)) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %r ; entry: - %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> ) - ret <8 x i16> %fshl + %r = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> ) + ret <8 x i16> %r } define <8 x i16> @fshl_v8i16_3rd_arg_vec_const_lanes_different(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: 'fshl_v8i16_3rd_arg_vec_const_lanes_different' -; CHECK-NEXT: Cost Model: Found costs of 6 for: %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> ) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %fshl +; CHECK-NEXT: Cost Model: Found costs of 6 for: %r = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> ) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %r ; entry: - %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> ) - ret <8 x i16> %fshl + %r = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> ) + ret <8 x i16> %r } define <8 x i16> @fshl_v8i16_3rd_arg_var(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) { ; CHECK-LABEL: 'fshl_v8i16_3rd_arg_var' -; CHECK-NEXT: Cost Model: Found costs of 7 for: %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %fshl +; CHECK-NEXT: Cost Model: Found costs of 7 for: %r = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %r ; entry: - %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) - ret <8 x i16> %fshl + %r = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) + ret <8 x i16> %r } -declare <8 x i16> @llvm.fshl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) - define <4 x i32> @fshl_v4i32_3rd_arg_vec_const_all_lanes_same(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: 'fshl_v4i32_3rd_arg_vec_const_all_lanes_same' -; CHECK-NEXT: Cost Model: Found costs of 2 for: %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> splat (i32 3)) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %fshl +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> splat (i32 3)) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %r ; entry: - %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> ) - ret <4 x i32> %fshl + %r = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> ) + ret <4 x i32> %r } define <4 x i32> @fshl_v4i32_3rd_arg_vec_const_lanes_different(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: 'fshl_v4i32_3rd_arg_vec_const_lanes_different' -; CHECK-NEXT: Cost Model: Found costs of 6 for: %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> ) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %fshl +; CHECK-NEXT: Cost Model: Found costs of 6 for: %r = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> ) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %r ; entry: - %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> ) - ret <4 x i32> %fshl + %r = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> ) + ret <4 x i32> %r } define <4 x i32> @fshl_v4i32_3rd_arg_var(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: 'fshl_v4i32_3rd_arg_var' -; CHECK-NEXT: Cost Model: Found costs of 7 for: %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %fshl +; CHECK-NEXT: Cost Model: Found costs of 7 for: %r = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %r ; entry: - %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) - ret <4 x i32> %fshl + %r = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) + ret <4 x i32> %r } -declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) - define <2 x i64> @fshl_v2i64_3rd_arg_vec_const_all_lanes_same(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: 'fshl_v2i64_3rd_arg_vec_const_all_lanes_same' -; CHECK-NEXT: Cost Model: Found costs of 2 for: %fshl = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> splat (i64 1)) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %fshl +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> splat (i64 1)) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %r ; entry: - %fshl = tail call <2 x i64> @llvm.fshl.v4i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> ) - ret <2 x i64> %fshl + %r = tail call <2 x i64> @llvm.fshl.v4i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> ) + ret <2 x i64> %r } define <2 x i64> @fshl_v2i64_3rd_arg_vec_const_lanes_different(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: 'fshl_v2i64_3rd_arg_vec_const_lanes_different' -; CHECK-NEXT: Cost Model: Found costs of 6 for: %fshl = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> ) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %fshl +; CHECK-NEXT: Cost Model: Found costs of 6 for: %r = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> ) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %r ; entry: - %fshl = tail call <2 x i64> @llvm.fshl.v4i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> ) - ret <2 x i64> %fshl + %r = tail call <2 x i64> @llvm.fshl.v4i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> ) + ret <2 x i64> %r } define <2 x i64> @fshl_v2i64_3rd_arg_var(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) { ; CHECK-LABEL: 'fshl_v2i64_3rd_arg_var' -; CHECK-NEXT: Cost Model: Found costs of 7 for: %fshl = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %fshl +; CHECK-NEXT: Cost Model: Found costs of 7 for: %r = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %r ; entry: - %fshl = tail call <2 x i64> @llvm.fshl.v4i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) - ret <2 x i64> %fshl + %r = tail call <2 x i64> @llvm.fshl.v4i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) + ret <2 x i64> %r } -declare <2 x i64> @llvm.fshl.v4i64(<2 x i64>, <2 x i64>, <2 x i64>) - define <4 x i30> @fshl_v4i30_3rd_arg_var(<4 x i30> %a, <4 x i30> %b, <4 x i30> %c) { ; CHECK-LABEL: 'fshl_v4i30_3rd_arg_var' -; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:10 Lat:10 SizeLat:10 for: %fshl = tail call <4 x i30> @llvm.fshl.v4i30(<4 x i30> %a, <4 x i30> %b, <4 x i30> %c) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i30> %fshl +; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:10 Lat:10 SizeLat:10 for: %r = tail call <4 x i30> @llvm.fshl.v4i30(<4 x i30> %a, <4 x i30> %b, <4 x i30> %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i30> %r ; entry: - %fshl = tail call <4 x i30> @llvm.fshl.v4i30(<4 x i30> %a, <4 x i30> %b, <4 x i30> %c) - ret <4 x i30> %fshl + %r = tail call <4 x i30> @llvm.fshl.v4i30(<4 x i30> %a, <4 x i30> %b, <4 x i30> %c) + ret <4 x i30> %r } -declare <4 x i30> @llvm.fshl.v4i30(<4 x i30>, <4 x i30>, <4 x i30>) - define <2 x i66> @fshl_v2i66_3rd_arg_vec_const_lanes_different(<2 x i66> %a, <2 x i66> %b) { ; CHECK-LABEL: 'fshl_v2i66_3rd_arg_vec_const_lanes_different' -; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:20 SizeLat:20 for: %fshl = tail call <2 x i66> @llvm.fshl.v2i66(<2 x i66> %a, <2 x i66> %b, <2 x i66> ) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i66> %fshl +; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:20 SizeLat:20 for: %r = tail call <2 x i66> @llvm.fshl.v2i66(<2 x i66> %a, <2 x i66> %b, <2 x i66> ) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i66> %r ; entry: - %fshl = tail call <2 x i66> @llvm.fshl.v4i66(<2 x i66> %a, <2 x i66> %b, <2 x i66> ) - ret <2 x i66> %fshl + %r = tail call <2 x i66> @llvm.fshl.v4i66(<2 x i66> %a, <2 x i66> %b, <2 x i66> ) + ret <2 x i66> %r } -declare <2 x i66> @llvm.fshl.v4i66(<2 x i66>, <2 x i66>, <2 x i66>) -define i66 @fshl_i66(i66 %a, i66 %b) { -; CHECK-LABEL: 'fshl_i66' -; CHECK-NEXT: Cost Model: Found costs of 3 for: %fshl = tail call i66 @llvm.fshl.i66(i66 %a, i66 %b, i66 9) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i66 %fshl +define <2 x i128> @fshl_v2i128_3rd_arg_vec_const_all_lanes_same(<2 x i128> %a, <2 x i128> %b) { +; CHECK-LABEL: 'fshl_v2i128_3rd_arg_vec_const_all_lanes_same' +; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:20 SizeLat:20 for: %r = tail call <2 x i128> @llvm.fshl.v2i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> splat (i128 1)) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i128> %r ; entry: - %fshl = tail call i66 @llvm.fshl.i66(i66 %a, i66 %b, i66 9) - ret i66 %fshl + %r = tail call <2 x i128> @llvm.fshl.v4i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> ) + ret <2 x i128> %r } -declare i66 @llvm.fshl.i66(i66, i66, i66) - define <2 x i128> @fshl_v2i128_3rd_arg_vec_const_lanes_different(<2 x i128> %a, <2 x i128> %b) { ; CHECK-LABEL: 'fshl_v2i128_3rd_arg_vec_const_lanes_different' -; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:20 SizeLat:20 for: %fshl = tail call <2 x i128> @llvm.fshl.v2i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> ) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i128> %fshl +; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:20 SizeLat:20 for: %r = tail call <2 x i128> @llvm.fshl.v2i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> ) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i128> %r +; +entry: + %r = tail call <2 x i128> @llvm.fshl.v4i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> ) + ret <2 x i128> %r +} + +define <2 x i128> @fshl_v2i128_3rd_arg_var(<2 x i128> %a, <2 x i128> %b, <2 x i128> %c) { +; CHECK-LABEL: 'fshl_v2i128_3rd_arg_var' +; CHECK-NEXT: Cost Model: Found costs of RThru:36 CodeSize:17 Lat:21 SizeLat:21 for: %r = tail call <2 x i128> @llvm.fshl.v2i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i128> %r +; +entry: + %r = tail call <2 x i128> @llvm.fshl.v4i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> %c) + ret <2 x i128> %r +} + + +; Rotate tests + +define i8 @rotl_i8_3rd_arg_const(i8 %a) { +; CHECK-LABEL: 'rotl_i8_3rd_arg_const' +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = tail call i8 @llvm.fshl.i8(i8 %a, i8 %a, i8 9) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i8 %r +; +entry: + %r = tail call i8 @llvm.fshl.i8(i8 %a, i8 %a, i8 9) + ret i8 %r +} + +define i8 @rotl_i8_3rd_arg_var(i8 %a, i8 %c) { +; CHECK-LABEL: 'rotl_i8_3rd_arg_var' +; CHECK-NEXT: Cost Model: Found costs of 5 for: %r = tail call i8 @llvm.fshl.i8(i8 %a, i8 %a, i8 %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i8 %r +; +entry: + %r = tail call i8 @llvm.fshl.i8(i8 %a, i8 %a, i8 %c) + ret i8 %r +} + +define i16 @rotl_i16_3rd_arg_const(i16 %a) { +; CHECK-LABEL: 'rotl_i16_3rd_arg_const' +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = tail call i16 @llvm.fshl.i16(i16 %a, i16 %a, i16 9) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i16 %r +; +entry: + %r = tail call i16 @llvm.fshl.i16(i16 %a, i16 %a, i16 9) + ret i16 %r +} + +define i16 @rotl_i16_3rd_arg_var(i16 %a, i16 %c) { +; CHECK-LABEL: 'rotl_i16_3rd_arg_var' +; CHECK-NEXT: Cost Model: Found costs of 5 for: %r = tail call i16 @llvm.fshl.i16(i16 %a, i16 %a, i16 %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i16 %r +; +entry: + %r = tail call i16 @llvm.fshl.i16(i16 %a, i16 %a, i16 %c) + ret i16 %r +} + +define i32 @rotl_i32_3rd_arg_const(i32 %a) { +; CHECK-LABEL: 'rotl_i32_3rd_arg_const' +; CHECK-NEXT: Cost Model: Found costs of 1 for: %r = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 9) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %r +; +entry: + %r = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 9) + ret i32 %r +} + +define i32 @rotl_i32_3rd_arg_var(i32 %a, i32 %c) { +; CHECK-LABEL: 'rotl_i32_3rd_arg_var' +; CHECK-NEXT: Cost Model: Found costs of 5 for: %r = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %r +; +entry: + %r = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 %c) + ret i32 %r +} + +define i64 @rotl_i64_3rd_arg_const(i64 %a) { +; CHECK-LABEL: 'rotl_i64_3rd_arg_const' +; CHECK-NEXT: Cost Model: Found costs of 1 for: %r = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 9) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %r +; +entry: + %r = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 9) + ret i64 %r +} + +define i64 @rotl_i64_3rd_arg_var(i64 %a, i64 %c) { +; CHECK-LABEL: 'rotl_i64_3rd_arg_var' +; CHECK-NEXT: Cost Model: Found costs of 5 for: %r = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %r ; entry: - %fshl = tail call <2 x i128> @llvm.fshl.v4i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> ) - ret <2 x i128> %fshl + %r = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %c) + ret i64 %r } -declare <2 x i128> @llvm.fshl.v4i128(<2 x i128>, <2 x i128>, <2 x i128>) -define i128 @fshl_i128(i128 %a, i128 %b) { -; CHECK-LABEL: 'fshl_i128' -; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:8 Lat:8 SizeLat:8 for: %fshl = tail call i128 @llvm.fshl.i128(i128 %a, i128 %b, i128 9) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i128 %fshl +define i128 @rotl_i128_3rd_arg_const(i128 %a) { +; CHECK-LABEL: 'rotl_i128_3rd_arg_const' +; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %r = tail call i128 @llvm.fshl.i128(i128 %a, i128 %a, i128 9) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i128 %r ; entry: - %fshl = tail call i128 @llvm.fshl.i128(i128 %a, i128 %b, i128 9) - ret i128 %fshl + %r = tail call i128 @llvm.fshl.i128(i128 %a, i128 %a, i128 9) + ret i128 %r } -declare i128 @llvm.fshl.i128(i128, i128, i128) +define i128 @rotl_i128_3rd_arg_var(i128 %a, i128 %c) { +; CHECK-LABEL: 'rotl_i128_3rd_arg_var' +; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:5 Lat:5 SizeLat:5 for: %r = tail call i128 @llvm.fshl.i128(i128 %a, i128 %a, i128 %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i128 %r +; +entry: + %r = tail call i128 @llvm.fshl.i128(i128 %a, i128 %a, i128 %c) + ret i128 %r +} + +define <16 x i8> @rotl_v16i8_3rd_arg_vec_const_all_lanes_same(<16 x i8> %a) { +; CHECK-LABEL: 'rotl_v16i8_3rd_arg_vec_const_all_lanes_same' +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %a, <16 x i8> splat (i8 3)) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %r +; +entry: + %r = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %a, <16 x i8> ) + ret <16 x i8> %r +} + +define <16 x i8> @rotl_v16i8_3rd_arg_vec_const_lanes_different(<16 x i8> %a) { +; CHECK-LABEL: 'rotl_v16i8_3rd_arg_vec_const_lanes_different' +; CHECK-NEXT: Cost Model: Found costs of 4 for: %r = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %a, <16 x i8> ) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %r +; +entry: + %r = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %a, <16 x i8> ) + ret <16 x i8> %r +} + +define <16 x i8> @rotl_v16i8_3rd_arg_var(<16 x i8> %a, <16 x i8> %c) { +; CHECK-LABEL: 'rotl_v16i8_3rd_arg_var' +; CHECK-NEXT: Cost Model: Found costs of 5 for: %r = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %a, <16 x i8> %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %r +; +entry: + %r = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %a, <16 x i8> %c) + ret <16 x i8> %r +} + +define <8 x i16> @rotl_v8i16_3rd_arg_vec_const_all_lanes_same(<8 x i16> %a) { +; CHECK-LABEL: 'rotl_v8i16_3rd_arg_vec_const_all_lanes_same' +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %a, <8 x i16> splat (i16 3)) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %r +; +entry: + %r = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %a, <8 x i16> ) + ret <8 x i16> %r +} + +define <8 x i16> @rotl_v8i16_3rd_arg_vec_const_lanes_different(<8 x i16> %a) { +; CHECK-LABEL: 'rotl_v8i16_3rd_arg_vec_const_lanes_different' +; CHECK-NEXT: Cost Model: Found costs of 4 for: %r = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %a, <8 x i16> ) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %r +; +entry: + %r = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %a, <8 x i16> ) + ret <8 x i16> %r +} + +define <8 x i16> @rotl_v8i16_3rd_arg_var(<8 x i16> %a, <8 x i16> %c) { +; CHECK-LABEL: 'rotl_v8i16_3rd_arg_var' +; CHECK-NEXT: Cost Model: Found costs of 5 for: %r = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %a, <8 x i16> %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %r +; +entry: + %r = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %a, <8 x i16> %c) + ret <8 x i16> %r +} + +define <4 x i32> @rotl_v4i32_3rd_arg_vec_const_all_lanes_same(<4 x i32> %a) { +; CHECK-LABEL: 'rotl_v4i32_3rd_arg_vec_const_all_lanes_same' +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %a, <4 x i32> splat (i32 3)) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %r +; +entry: + %r = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %a, <4 x i32> ) + ret <4 x i32> %r +} + +define <4 x i32> @rotl_v4i32_3rd_arg_vec_const_lanes_different(<4 x i32> %a) { +; CHECK-LABEL: 'rotl_v4i32_3rd_arg_vec_const_lanes_different' +; CHECK-NEXT: Cost Model: Found costs of 4 for: %r = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %a, <4 x i32> ) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %r +; +entry: + %r = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %a, <4 x i32> ) + ret <4 x i32> %r +} + +define <4 x i32> @rotl_v4i32_3rd_arg_var(<4 x i32> %a, <4 x i32> %c) { +; CHECK-LABEL: 'rotl_v4i32_3rd_arg_var' +; CHECK-NEXT: Cost Model: Found costs of 5 for: %r = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %a, <4 x i32> %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %r +; +entry: + %r = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %a, <4 x i32> %c) + ret <4 x i32> %r +} + +define <2 x i64> @rotl_v2i64_3rd_arg_vec_const_all_lanes_same(<2 x i64> %a) { +; CHECK-LABEL: 'rotl_v2i64_3rd_arg_vec_const_all_lanes_same' +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %a, <2 x i64> splat (i64 1)) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %r +; +entry: + %r = tail call <2 x i64> @llvm.fshl.v4i64(<2 x i64> %a, <2 x i64> %a, <2 x i64> ) + ret <2 x i64> %r +} + +define <2 x i64> @rotl_v2i64_3rd_arg_vec_const_lanes_different(<2 x i64> %a) { +; CHECK-LABEL: 'rotl_v2i64_3rd_arg_vec_const_lanes_different' +; CHECK-NEXT: Cost Model: Found costs of 4 for: %r = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %a, <2 x i64> ) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %r +; +entry: + %r = tail call <2 x i64> @llvm.fshl.v4i64(<2 x i64> %a, <2 x i64> %a, <2 x i64> ) + ret <2 x i64> %r +} + +define <2 x i64> @rotl_v2i64_3rd_arg_var(<2 x i64> %a, <2 x i64> %c) { +; CHECK-LABEL: 'rotl_v2i64_3rd_arg_var' +; CHECK-NEXT: Cost Model: Found costs of 5 for: %r = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %a, <2 x i64> %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %r +; +entry: + %r = tail call <2 x i64> @llvm.fshl.v4i64(<2 x i64> %a, <2 x i64> %a, <2 x i64> %c) + ret <2 x i64> %r +} + +define <2 x i128> @rotl_v2i128_3rd_arg_vec_const_all_lanes_same(<2 x i128> %a) { +; CHECK-LABEL: 'rotl_v2i128_3rd_arg_vec_const_all_lanes_same' +; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %r = tail call <2 x i128> @llvm.fshl.v2i128(<2 x i128> %a, <2 x i128> %a, <2 x i128> splat (i128 1)) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i128> %r +; +entry: + %r = tail call <2 x i128> @llvm.fshl.v4i128(<2 x i128> %a, <2 x i128> %a, <2 x i128> ) + ret <2 x i128> %r +} + +define <2 x i128> @rotl_v2i128_3rd_arg_vec_const_lanes_different(<2 x i128> %a) { +; CHECK-LABEL: 'rotl_v2i128_3rd_arg_vec_const_lanes_different' +; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %r = tail call <2 x i128> @llvm.fshl.v2i128(<2 x i128> %a, <2 x i128> %a, <2 x i128> ) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i128> %r +; +entry: + %r = tail call <2 x i128> @llvm.fshl.v4i128(<2 x i128> %a, <2 x i128> %a, <2 x i128> ) + ret <2 x i128> %r +} + +define <2 x i128> @rotl_v2i128_3rd_arg_var(<2 x i128> %a, <2 x i128> %c) { +; CHECK-LABEL: 'rotl_v2i128_3rd_arg_var' +; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:5 Lat:5 SizeLat:5 for: %r = tail call <2 x i128> @llvm.fshl.v2i128(<2 x i128> %a, <2 x i128> %a, <2 x i128> %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i128> %r +; +entry: + %r = tail call <2 x i128> @llvm.fshl.v4i128(<2 x i128> %a, <2 x i128> %a, <2 x i128> %c) + ret <2 x i128> %r +} diff --git a/llvm/test/Analysis/CostModel/AArch64/fshr.ll b/llvm/test/Analysis/CostModel/AArch64/fshr.ll index b31806b647868..795371e9f3f68 100644 --- a/llvm/test/Analysis/CostModel/AArch64/fshr.ll +++ b/llvm/test/Analysis/CostModel/AArch64/fshr.ll @@ -5,277 +5,544 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" define i8 @fshr_i8_3rd_arg_const(i8 %a, i8 %b) { ; CHECK-LABEL: 'fshr_i8_3rd_arg_const' -; CHECK-NEXT: Cost Model: Found costs of 2 for: %fshr = tail call i8 @llvm.fshr.i8(i8 %a, i8 %b, i8 9) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i8 %fshr +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = tail call i8 @llvm.fshr.i8(i8 %a, i8 %b, i8 9) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i8 %r ; entry: - %fshr = tail call i8 @llvm.fshr.i8(i8 %a, i8 %b, i8 9) - ret i8 %fshr + %r = tail call i8 @llvm.fshr.i8(i8 %a, i8 %b, i8 9) + ret i8 %r } define i8 @fshr_i8_3rd_arg_var(i8 %a, i8 %b, i8 %c) { ; CHECK-LABEL: 'fshr_i8_3rd_arg_var' -; CHECK-NEXT: Cost Model: Found costs of 7 for: %fshr = tail call i8 @llvm.fshr.i8(i8 %a, i8 %b, i8 %c) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i8 %fshr +; CHECK-NEXT: Cost Model: Found costs of 7 for: %r = tail call i8 @llvm.fshr.i8(i8 %a, i8 %b, i8 %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i8 %r ; entry: - %fshr = tail call i8 @llvm.fshr.i8(i8 %a, i8 %b, i8 %c) - ret i8 %fshr + %r = tail call i8 @llvm.fshr.i8(i8 %a, i8 %b, i8 %c) + ret i8 %r } -declare i8 @llvm.fshr.i8(i8, i8, i8) - -define i16 @fshr_i16(i16 %a, i16 %b) { -; CHECK-LABEL: 'fshr_i16' -; CHECK-NEXT: Cost Model: Found costs of 2 for: %fshr = tail call i16 @llvm.fshr.i16(i16 %a, i16 %b, i16 9) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i16 %fshr +define i16 @fshr_i16_3rd_arg_const(i16 %a, i16 %b) { +; CHECK-LABEL: 'fshr_i16_3rd_arg_const' +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = tail call i16 @llvm.fshr.i16(i16 %a, i16 %b, i16 9) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i16 %r ; entry: - %fshr = tail call i16 @llvm.fshr.i16(i16 %a, i16 %b, i16 9) - ret i16 %fshr + %r = tail call i16 @llvm.fshr.i16(i16 %a, i16 %b, i16 9) + ret i16 %r } -declare i16 @llvm.fshr.i16(i16, i16, i16) +define i16 @fshr_i16_3rd_arg_var(i16 %a, i16 %b, i16 %c) { +; CHECK-LABEL: 'fshr_i16_3rd_arg_var' +; CHECK-NEXT: Cost Model: Found costs of 7 for: %r = tail call i16 @llvm.fshr.i16(i16 %a, i16 %b, i16 %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i16 %r +; +entry: + %r = tail call i16 @llvm.fshr.i16(i16 %a, i16 %b, i16 %c) + ret i16 %r +} define i32 @fshr_i32_3rd_arg_const(i32 %a, i32 %b) { ; CHECK-LABEL: 'fshr_i32_3rd_arg_const' -; CHECK-NEXT: Cost Model: Found costs of 1 for: %fshr = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 9) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %fshr +; CHECK-NEXT: Cost Model: Found costs of 1 for: %r = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 9) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %r ; entry: - %fshr = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 9) - ret i32 %fshr + %r = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 9) + ret i32 %r } define i32 @fshr_i32_3rd_arg_var(i32 %a, i32 %b, i32 %c) { ; CHECK-LABEL: 'fshr_i32_3rd_arg_var' -; CHECK-NEXT: Cost Model: Found costs of 7 for: %fshr = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %c) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %fshr +; CHECK-NEXT: Cost Model: Found costs of 7 for: %r = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %r ; entry: - %fshr = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %c) - ret i32 %fshr + %r = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %c) + ret i32 %r } -declare i32 @llvm.fshr.i32(i32, i32, i32) - define i64 @fshr_i64_3rd_arg_const(i64 %a, i64 %b) { ; CHECK-LABEL: 'fshr_i64_3rd_arg_const' -; CHECK-NEXT: Cost Model: Found costs of 1 for: %fshr = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 9) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %fshr +; CHECK-NEXT: Cost Model: Found costs of 1 for: %r = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 9) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %r ; entry: - %fshr = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 9) - ret i64 %fshr + %r = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 9) + ret i64 %r } define i64 @fshr_i64_3rd_arg_var(i64 %a, i64 %b, i64 %c) { ; CHECK-LABEL: 'fshr_i64_3rd_arg_var' -; CHECK-NEXT: Cost Model: Found costs of 7 for: %fshr = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %c) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %fshr +; CHECK-NEXT: Cost Model: Found costs of 7 for: %r = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %r +; +entry: + %r = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %c) + ret i64 %r +} + +define i128 @fshr_i128_3rd_arg_const(i128 %a, i128 %b) { +; CHECK-LABEL: 'fshr_i128_3rd_arg_const' +; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:8 Lat:8 SizeLat:8 for: %r = tail call i128 @llvm.fshr.i128(i128 %a, i128 %b, i128 9) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i128 %r ; entry: - %fshr = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %c) - ret i64 %fshr + %r = tail call i128 @llvm.fshr.i128(i128 %a, i128 %b, i128 9) + ret i128 %r } -declare i64 @llvm.fshr.i64(i64, i64, i64) +define i128 @fshr_i128_3rd_arg_var(i128 %a, i128 %b, i128 %c) { +; CHECK-LABEL: 'fshr_i128_3rd_arg_var' +; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:9 Lat:9 SizeLat:9 for: %r = tail call i128 @llvm.fshr.i128(i128 %a, i128 %b, i128 %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i128 %r +; +entry: + %r = tail call i128 @llvm.fshr.i128(i128 %a, i128 %b, i128 %c) + ret i128 %r +} define i19 @fshr_i19(i19 %a, i19 %b) { ; CHECK-LABEL: 'fshr_i19' -; CHECK-NEXT: Cost Model: Found costs of 2 for: %fshr = tail call i19 @llvm.fshr.i19(i19 %a, i19 %b, i19 9) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i19 %fshr +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = tail call i19 @llvm.fshr.i19(i19 %a, i19 %b, i19 9) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i19 %r ; entry: - %fshr = tail call i19 @llvm.fshr.i19(i19 %a, i19 %b, i19 9) - ret i19 %fshr + %r = tail call i19 @llvm.fshr.i19(i19 %a, i19 %b, i19 9) + ret i19 %r } -declare i19 @llvm.fshr.i19(i19, i19, i19) +define i66 @fshr_i66(i66 %a, i66 %b) { +; CHECK-LABEL: 'fshr_i66' +; CHECK-NEXT: Cost Model: Found costs of 3 for: %r = tail call i66 @llvm.fshr.i66(i66 %a, i66 %b, i66 9) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i66 %r +; +entry: + %r = tail call i66 @llvm.fshr.i66(i66 %a, i66 %b, i66 9) + ret i66 %r +} define <16 x i8> @fshr_v16i8_3rd_arg_vec_const_all_lanes_same(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: 'fshr_v16i8_3rd_arg_vec_const_all_lanes_same' -; CHECK-NEXT: Cost Model: Found costs of 2 for: %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 3)) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %fshr +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 3)) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %r ; entry: - %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) - ret <16 x i8> %fshr + %r = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) + ret <16 x i8> %r } define <16 x i8> @fshr_v16i8_3rd_arg_vec_const_lanes_different(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: 'fshr_v16i8_3rd_arg_vec_const_lanes_different' -; CHECK-NEXT: Cost Model: Found costs of 6 for: %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %fshr +; CHECK-NEXT: Cost Model: Found costs of 6 for: %r = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %r ; entry: - %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) - ret <16 x i8> %fshr + %r = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) + ret <16 x i8> %r } define <16 x i8> @fshr_v16i8_3rd_arg_var(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { ; CHECK-LABEL: 'fshr_v16i8_3rd_arg_var' -; CHECK-NEXT: Cost Model: Found costs of 7 for: %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %fshr +; CHECK-NEXT: Cost Model: Found costs of 7 for: %r = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %r ; entry: - %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) - ret <16 x i8> %fshr + %r = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) + ret <16 x i8> %r } -declare <16 x i8> @llvm.fshr.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) - define <8 x i16> @fshr_v8i16_3rd_arg_vec_const_all_lanes_same(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: 'fshr_v8i16_3rd_arg_vec_const_all_lanes_same' -; CHECK-NEXT: Cost Model: Found costs of 2 for: %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> splat (i16 3)) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %fshr +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> splat (i16 3)) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %r ; entry: - %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> ) - ret <8 x i16> %fshr + %r = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> ) + ret <8 x i16> %r } define <8 x i16> @fshr_v8i16_3rd_arg_vec_const_lanes_different(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: 'fshr_v8i16_3rd_arg_vec_const_lanes_different' -; CHECK-NEXT: Cost Model: Found costs of 6 for: %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> ) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %fshr +; CHECK-NEXT: Cost Model: Found costs of 6 for: %r = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> ) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %r ; entry: - %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> ) - ret <8 x i16> %fshr + %r = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> ) + ret <8 x i16> %r } define <8 x i16> @fshr_v8i16_3rd_arg_var(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) { ; CHECK-LABEL: 'fshr_v8i16_3rd_arg_var' -; CHECK-NEXT: Cost Model: Found costs of 7 for: %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %fshr +; CHECK-NEXT: Cost Model: Found costs of 7 for: %r = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %r ; entry: - %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) - ret <8 x i16> %fshr + %r = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) + ret <8 x i16> %r } -declare <8 x i16> @llvm.fshr.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) - define <4 x i32> @fshr_v4i32_3rd_arg_vec_const_all_lanes_same(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: 'fshr_v4i32_3rd_arg_vec_const_all_lanes_same' -; CHECK-NEXT: Cost Model: Found costs of 2 for: %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> splat (i32 3)) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %fshr +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> splat (i32 3)) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %r ; entry: - %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> ) - ret <4 x i32> %fshr + %r = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> ) + ret <4 x i32> %r } define <4 x i32> @fshr_v4i32_3rd_arg_vec_const_lanes_different(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: 'fshr_v4i32_3rd_arg_vec_const_lanes_different' -; CHECK-NEXT: Cost Model: Found costs of 6 for: %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> ) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %fshr +; CHECK-NEXT: Cost Model: Found costs of 6 for: %r = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> ) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %r ; entry: - %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> ) - ret <4 x i32> %fshr + %r = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> ) + ret <4 x i32> %r } define <4 x i32> @fshr_v4i32_3rd_arg_var(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: 'fshr_v4i32_3rd_arg_var' -; CHECK-NEXT: Cost Model: Found costs of 7 for: %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %fshr +; CHECK-NEXT: Cost Model: Found costs of 7 for: %r = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %r ; entry: - %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) - ret <4 x i32> %fshr + %r = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) + ret <4 x i32> %r } -declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) - define <2 x i64> @fshr_v2i64_3rd_arg_vec_const_all_lanes_same(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: 'fshr_v2i64_3rd_arg_vec_const_all_lanes_same' -; CHECK-NEXT: Cost Model: Found costs of 2 for: %fshr = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> splat (i64 1)) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %fshr +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> splat (i64 1)) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %r ; entry: - %fshr = tail call <2 x i64> @llvm.fshr.v4i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> ) - ret <2 x i64> %fshr + %r = tail call <2 x i64> @llvm.fshr.v4i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> ) + ret <2 x i64> %r } define <2 x i64> @fshr_v2i64_3rd_arg_vec_const_lanes_different(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: 'fshr_v2i64_3rd_arg_vec_const_lanes_different' -; CHECK-NEXT: Cost Model: Found costs of 6 for: %fshr = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> ) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %fshr +; CHECK-NEXT: Cost Model: Found costs of 6 for: %r = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> ) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %r ; entry: - %fshr = tail call <2 x i64> @llvm.fshr.v4i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> ) - ret <2 x i64> %fshr + %r = tail call <2 x i64> @llvm.fshr.v4i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> ) + ret <2 x i64> %r } define <2 x i64> @fshr_v2i64_3rd_arg_var(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) { ; CHECK-LABEL: 'fshr_v2i64_3rd_arg_var' -; CHECK-NEXT: Cost Model: Found costs of 7 for: %fshr = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %fshr +; CHECK-NEXT: Cost Model: Found costs of 7 for: %r = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %r ; entry: - %fshr = tail call <2 x i64> @llvm.fshr.v4i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) - ret <2 x i64> %fshr + %r = tail call <2 x i64> @llvm.fshr.v4i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) + ret <2 x i64> %r } -declare <2 x i64> @llvm.fshr.v4i64(<2 x i64>, <2 x i64>, <2 x i64>) - define <4 x i30> @fshr_v4i30_3rd_arg_var(<4 x i30> %a, <4 x i30> %b, <4 x i30> %c) { ; CHECK-LABEL: 'fshr_v4i30_3rd_arg_var' -; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:10 Lat:10 SizeLat:10 for: %fshr = tail call <4 x i30> @llvm.fshr.v4i30(<4 x i30> %a, <4 x i30> %b, <4 x i30> %c) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i30> %fshr +; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:10 Lat:10 SizeLat:10 for: %r = tail call <4 x i30> @llvm.fshr.v4i30(<4 x i30> %a, <4 x i30> %b, <4 x i30> %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i30> %r ; entry: - %fshr = tail call <4 x i30> @llvm.fshr.v4i30(<4 x i30> %a, <4 x i30> %b, <4 x i30> %c) - ret <4 x i30> %fshr + %r = tail call <4 x i30> @llvm.fshr.v4i30(<4 x i30> %a, <4 x i30> %b, <4 x i30> %c) + ret <4 x i30> %r } -declare <4 x i30> @llvm.fshr.v4i30(<4 x i30>, <4 x i30>, <4 x i30>) - define <2 x i66> @fshr_v2i66_3rd_arg_vec_const_lanes_different(<2 x i66> %a, <2 x i66> %b) { ; CHECK-LABEL: 'fshr_v2i66_3rd_arg_vec_const_lanes_different' -; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:20 SizeLat:20 for: %fshr = tail call <2 x i66> @llvm.fshr.v2i66(<2 x i66> %a, <2 x i66> %b, <2 x i66> ) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i66> %fshr +; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:20 SizeLat:20 for: %r = tail call <2 x i66> @llvm.fshr.v2i66(<2 x i66> %a, <2 x i66> %b, <2 x i66> ) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i66> %r ; entry: - %fshr = tail call <2 x i66> @llvm.fshr.v4i66(<2 x i66> %a, <2 x i66> %b, <2 x i66> ) - ret <2 x i66> %fshr + %r = tail call <2 x i66> @llvm.fshr.v4i66(<2 x i66> %a, <2 x i66> %b, <2 x i66> ) + ret <2 x i66> %r } -declare <2 x i66> @llvm.fshr.v4i66(<2 x i66>, <2 x i66>, <2 x i66>) -define i66 @fshr_i66(i66 %a, i66 %b) { -; CHECK-LABEL: 'fshr_i66' -; CHECK-NEXT: Cost Model: Found costs of 3 for: %fshr = tail call i66 @llvm.fshr.i66(i66 %a, i66 %b, i66 9) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i66 %fshr +define <2 x i128> @fshr_v2i128_3rd_arg_vec_const_all_lanes_same(<2 x i128> %a, <2 x i128> %b) { +; CHECK-LABEL: 'fshr_v2i128_3rd_arg_vec_const_all_lanes_same' +; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:20 SizeLat:20 for: %r = tail call <2 x i128> @llvm.fshr.v2i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> splat (i128 1)) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i128> %r ; entry: - %fshr = tail call i66 @llvm.fshr.i66(i66 %a, i66 %b, i66 9) - ret i66 %fshr + %r = tail call <2 x i128> @llvm.fshr.v4i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> ) + ret <2 x i128> %r } -declare i66 @llvm.fshr.i66(i66, i66, i66) - define <2 x i128> @fshr_v2i128_3rd_arg_vec_const_lanes_different(<2 x i128> %a, <2 x i128> %b) { ; CHECK-LABEL: 'fshr_v2i128_3rd_arg_vec_const_lanes_different' -; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:20 SizeLat:20 for: %fshr = tail call <2 x i128> @llvm.fshr.v2i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> ) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i128> %fshr +; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:20 SizeLat:20 for: %r = tail call <2 x i128> @llvm.fshr.v2i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> ) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i128> %r +; +entry: + %r = tail call <2 x i128> @llvm.fshr.v4i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> ) + ret <2 x i128> %r +} + +define <2 x i128> @fshr_v2i128_3rd_arg_var(<2 x i128> %a, <2 x i128> %b, <2 x i128> %c) { +; CHECK-LABEL: 'fshr_v2i128_3rd_arg_var' +; CHECK-NEXT: Cost Model: Found costs of RThru:36 CodeSize:17 Lat:21 SizeLat:21 for: %r = tail call <2 x i128> @llvm.fshr.v2i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i128> %r +; +entry: + %r = tail call <2 x i128> @llvm.fshr.v4i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> %c) + ret <2 x i128> %r +} + + +; Rotate tests + +define i8 @rotl_i8_3rd_arg_const(i8 %a) { +; CHECK-LABEL: 'rotl_i8_3rd_arg_const' +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = tail call i8 @llvm.fshr.i8(i8 %a, i8 %a, i8 9) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i8 %r +; +entry: + %r = tail call i8 @llvm.fshr.i8(i8 %a, i8 %a, i8 9) + ret i8 %r +} + +define i8 @rotl_i8_3rd_arg_var(i8 %a, i8 %c) { +; CHECK-LABEL: 'rotl_i8_3rd_arg_var' +; CHECK-NEXT: Cost Model: Found costs of 5 for: %r = tail call i8 @llvm.fshr.i8(i8 %a, i8 %a, i8 %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i8 %r +; +entry: + %r = tail call i8 @llvm.fshr.i8(i8 %a, i8 %a, i8 %c) + ret i8 %r +} + +define i16 @rotl_i16_3rd_arg_const(i16 %a) { +; CHECK-LABEL: 'rotl_i16_3rd_arg_const' +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = tail call i16 @llvm.fshr.i16(i16 %a, i16 %a, i16 9) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i16 %r +; +entry: + %r = tail call i16 @llvm.fshr.i16(i16 %a, i16 %a, i16 9) + ret i16 %r +} + +define i16 @rotl_i16_3rd_arg_var(i16 %a, i16 %c) { +; CHECK-LABEL: 'rotl_i16_3rd_arg_var' +; CHECK-NEXT: Cost Model: Found costs of 5 for: %r = tail call i16 @llvm.fshr.i16(i16 %a, i16 %a, i16 %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i16 %r +; +entry: + %r = tail call i16 @llvm.fshr.i16(i16 %a, i16 %a, i16 %c) + ret i16 %r +} + +define i32 @rotl_i32_3rd_arg_const(i32 %a) { +; CHECK-LABEL: 'rotl_i32_3rd_arg_const' +; CHECK-NEXT: Cost Model: Found costs of 1 for: %r = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 9) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %r +; +entry: + %r = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 9) + ret i32 %r +} + +define i32 @rotl_i32_3rd_arg_var(i32 %a, i32 %c) { +; CHECK-LABEL: 'rotl_i32_3rd_arg_var' +; CHECK-NEXT: Cost Model: Found costs of 5 for: %r = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %r +; +entry: + %r = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 %c) + ret i32 %r +} + +define i64 @rotl_i64_3rd_arg_const(i64 %a) { +; CHECK-LABEL: 'rotl_i64_3rd_arg_const' +; CHECK-NEXT: Cost Model: Found costs of 1 for: %r = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 9) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %r +; +entry: + %r = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 9) + ret i64 %r +} + +define i64 @rotl_i64_3rd_arg_var(i64 %a, i64 %c) { +; CHECK-LABEL: 'rotl_i64_3rd_arg_var' +; CHECK-NEXT: Cost Model: Found costs of 5 for: %r = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %r ; entry: - %fshr = tail call <2 x i128> @llvm.fshr.v4i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> ) - ret <2 x i128> %fshr + %r = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %c) + ret i64 %r } -declare <2 x i128> @llvm.fshr.v4i128(<2 x i128>, <2 x i128>, <2 x i128>) -define i128 @fshr_i128(i128 %a, i128 %b) { -; CHECK-LABEL: 'fshr_i128' -; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:8 Lat:8 SizeLat:8 for: %fshr = tail call i128 @llvm.fshr.i128(i128 %a, i128 %b, i128 9) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i128 %fshr +define i128 @rotl_i128_3rd_arg_const(i128 %a) { +; CHECK-LABEL: 'rotl_i128_3rd_arg_const' +; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %r = tail call i128 @llvm.fshr.i128(i128 %a, i128 %a, i128 9) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i128 %r ; entry: - %fshr = tail call i128 @llvm.fshr.i128(i128 %a, i128 %b, i128 9) - ret i128 %fshr + %r = tail call i128 @llvm.fshr.i128(i128 %a, i128 %a, i128 9) + ret i128 %r } -declare i128 @llvm.fshr.i128(i128, i128, i128) +define i128 @rotl_i128_3rd_arg_var(i128 %a, i128 %c) { +; CHECK-LABEL: 'rotl_i128_3rd_arg_var' +; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:5 Lat:5 SizeLat:5 for: %r = tail call i128 @llvm.fshr.i128(i128 %a, i128 %a, i128 %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i128 %r +; +entry: + %r = tail call i128 @llvm.fshr.i128(i128 %a, i128 %a, i128 %c) + ret i128 %r +} + +define <16 x i8> @rotl_v16i8_3rd_arg_vec_const_all_lanes_same(<16 x i8> %a) { +; CHECK-LABEL: 'rotl_v16i8_3rd_arg_vec_const_all_lanes_same' +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %a, <16 x i8> splat (i8 3)) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %r +; +entry: + %r = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %a, <16 x i8> ) + ret <16 x i8> %r +} + +define <16 x i8> @rotl_v16i8_3rd_arg_vec_const_lanes_different(<16 x i8> %a) { +; CHECK-LABEL: 'rotl_v16i8_3rd_arg_vec_const_lanes_different' +; CHECK-NEXT: Cost Model: Found costs of 4 for: %r = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %a, <16 x i8> ) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %r +; +entry: + %r = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %a, <16 x i8> ) + ret <16 x i8> %r +} + +define <16 x i8> @rotl_v16i8_3rd_arg_var(<16 x i8> %a, <16 x i8> %c) { +; CHECK-LABEL: 'rotl_v16i8_3rd_arg_var' +; CHECK-NEXT: Cost Model: Found costs of 5 for: %r = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %a, <16 x i8> %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %r +; +entry: + %r = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %a, <16 x i8> %c) + ret <16 x i8> %r +} + +define <8 x i16> @rotl_v8i16_3rd_arg_vec_const_all_lanes_same(<8 x i16> %a) { +; CHECK-LABEL: 'rotl_v8i16_3rd_arg_vec_const_all_lanes_same' +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %a, <8 x i16> splat (i16 3)) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %r +; +entry: + %r = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %a, <8 x i16> ) + ret <8 x i16> %r +} + +define <8 x i16> @rotl_v8i16_3rd_arg_vec_const_lanes_different(<8 x i16> %a) { +; CHECK-LABEL: 'rotl_v8i16_3rd_arg_vec_const_lanes_different' +; CHECK-NEXT: Cost Model: Found costs of 4 for: %r = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %a, <8 x i16> ) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %r +; +entry: + %r = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %a, <8 x i16> ) + ret <8 x i16> %r +} + +define <8 x i16> @rotl_v8i16_3rd_arg_var(<8 x i16> %a, <8 x i16> %c) { +; CHECK-LABEL: 'rotl_v8i16_3rd_arg_var' +; CHECK-NEXT: Cost Model: Found costs of 5 for: %r = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %a, <8 x i16> %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %r +; +entry: + %r = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %a, <8 x i16> %c) + ret <8 x i16> %r +} + +define <4 x i32> @rotl_v4i32_3rd_arg_vec_const_all_lanes_same(<4 x i32> %a) { +; CHECK-LABEL: 'rotl_v4i32_3rd_arg_vec_const_all_lanes_same' +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %a, <4 x i32> splat (i32 3)) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %r +; +entry: + %r = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %a, <4 x i32> ) + ret <4 x i32> %r +} + +define <4 x i32> @rotl_v4i32_3rd_arg_vec_const_lanes_different(<4 x i32> %a) { +; CHECK-LABEL: 'rotl_v4i32_3rd_arg_vec_const_lanes_different' +; CHECK-NEXT: Cost Model: Found costs of 4 for: %r = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %a, <4 x i32> ) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %r +; +entry: + %r = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %a, <4 x i32> ) + ret <4 x i32> %r +} + +define <4 x i32> @rotl_v4i32_3rd_arg_var(<4 x i32> %a, <4 x i32> %c) { +; CHECK-LABEL: 'rotl_v4i32_3rd_arg_var' +; CHECK-NEXT: Cost Model: Found costs of 5 for: %r = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %a, <4 x i32> %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %r +; +entry: + %r = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %a, <4 x i32> %c) + ret <4 x i32> %r +} + +define <2 x i64> @rotl_v2i64_3rd_arg_vec_const_all_lanes_same(<2 x i64> %a) { +; CHECK-LABEL: 'rotl_v2i64_3rd_arg_vec_const_all_lanes_same' +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %a, <2 x i64> splat (i64 1)) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %r +; +entry: + %r = tail call <2 x i64> @llvm.fshr.v4i64(<2 x i64> %a, <2 x i64> %a, <2 x i64> ) + ret <2 x i64> %r +} + +define <2 x i64> @rotl_v2i64_3rd_arg_vec_const_lanes_different(<2 x i64> %a) { +; CHECK-LABEL: 'rotl_v2i64_3rd_arg_vec_const_lanes_different' +; CHECK-NEXT: Cost Model: Found costs of 4 for: %r = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %a, <2 x i64> ) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %r +; +entry: + %r = tail call <2 x i64> @llvm.fshr.v4i64(<2 x i64> %a, <2 x i64> %a, <2 x i64> ) + ret <2 x i64> %r +} + +define <2 x i64> @rotl_v2i64_3rd_arg_var(<2 x i64> %a, <2 x i64> %c) { +; CHECK-LABEL: 'rotl_v2i64_3rd_arg_var' +; CHECK-NEXT: Cost Model: Found costs of 5 for: %r = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %a, <2 x i64> %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %r +; +entry: + %r = tail call <2 x i64> @llvm.fshr.v4i64(<2 x i64> %a, <2 x i64> %a, <2 x i64> %c) + ret <2 x i64> %r +} + +define <2 x i128> @rotl_v2i128_3rd_arg_vec_const_all_lanes_same(<2 x i128> %a) { +; CHECK-LABEL: 'rotl_v2i128_3rd_arg_vec_const_all_lanes_same' +; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %r = tail call <2 x i128> @llvm.fshr.v2i128(<2 x i128> %a, <2 x i128> %a, <2 x i128> splat (i128 1)) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i128> %r +; +entry: + %r = tail call <2 x i128> @llvm.fshr.v4i128(<2 x i128> %a, <2 x i128> %a, <2 x i128> ) + ret <2 x i128> %r +} + +define <2 x i128> @rotl_v2i128_3rd_arg_vec_const_lanes_different(<2 x i128> %a) { +; CHECK-LABEL: 'rotl_v2i128_3rd_arg_vec_const_lanes_different' +; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %r = tail call <2 x i128> @llvm.fshr.v2i128(<2 x i128> %a, <2 x i128> %a, <2 x i128> ) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i128> %r +; +entry: + %r = tail call <2 x i128> @llvm.fshr.v4i128(<2 x i128> %a, <2 x i128> %a, <2 x i128> ) + ret <2 x i128> %r +} + +define <2 x i128> @rotl_v2i128_3rd_arg_var(<2 x i128> %a, <2 x i128> %c) { +; CHECK-LABEL: 'rotl_v2i128_3rd_arg_var' +; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:5 Lat:5 SizeLat:5 for: %r = tail call <2 x i128> @llvm.fshr.v2i128(<2 x i128> %a, <2 x i128> %a, <2 x i128> %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i128> %r +; +entry: + %r = tail call <2 x i128> @llvm.fshr.v4i128(<2 x i128> %a, <2 x i128> %a, <2 x i128> %c) + ret <2 x i128> %r +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform-in-vgpr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform-in-vgpr.ll index 4361e5c113708..27005e7aa175e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform-in-vgpr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform-in-vgpr.ll @@ -1070,9 +1070,6 @@ define amdgpu_ps void @load_divergent_P3_i16(ptr addrspace(3) inreg %ptra, ptr a ; GFX11-True16-NEXT: v_mov_b32_e32 v1, s0 ; GFX11-True16-NEXT: ds_load_u16_d16 v1, v1 ; GFX11-True16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-True16-NEXT: v_readfirstlane_b32 s0, v1 -; GFX11-True16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-True16-NEXT: v_mov_b16_e32 v1.l, s0 ; GFX11-True16-NEXT: ds_store_b16 v0, v1 ; GFX11-True16-NEXT: s_endpgm ; @@ -1089,10 +1086,6 @@ define amdgpu_ps void @load_divergent_P3_i16(ptr addrspace(3) inreg %ptra, ptr a ; GFX12-True16-NEXT: v_mov_b32_e32 v1, s0 ; GFX12-True16-NEXT: ds_load_u16_d16 v1, v1 ; GFX12-True16-NEXT: s_wait_dscnt 0x0 -; GFX12-True16-NEXT: v_readfirstlane_b32 s0, v1 -; GFX12-True16-NEXT: s_wait_alu 0xf1ff -; GFX12-True16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-True16-NEXT: v_mov_b16_e32 v1.l, s0 ; GFX12-True16-NEXT: ds_store_b16 v0, v1 ; GFX12-True16-NEXT: s_endpgm ; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform.ll index bf36deac33380..9bf140cf744db 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform.ll @@ -13,9 +13,6 @@ define amdgpu_ps void @load_uniform_P1_i16_gfx12(ptr addrspace(1) inreg %ptra, p ; GFX11-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-NEXT: global_load_d16_b16 v2, v2, s[0:1] ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_readfirstlane_b32 s0, v2 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_mov_b16_e32 v2.l, s0 ; GFX11-NEXT: global_store_b16 v[0:1], v2, off ; GFX11-NEXT: s_endpgm ; @@ -312,9 +309,6 @@ define amdgpu_ps void @load_uniform_P4_i16_gfx12(ptr addrspace(4) inreg %ptra, p ; GFX11-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-NEXT: global_load_d16_b16 v2, v2, s[0:1] ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_readfirstlane_b32 s0, v2 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_mov_b16_e32 v2.l, s0 ; GFX11-NEXT: global_store_b16 v[0:1], v2, off ; GFX11-NEXT: s_endpgm ; diff --git a/llvm/test/CodeGen/SPIRV/semantics/position.ps.ll b/llvm/test/CodeGen/SPIRV/semantics/position.ps.ll new file mode 100644 index 0000000000000..2c02987f73928 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/semantics/position.ps.ll @@ -0,0 +1,32 @@ +; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv-vulkan-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-vulkan-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK-DAG: OpDecorate %[[#INPUT:]] BuiltIn FragCoord +; CHECK-DAG: OpDecorate %[[#OUTPUT:]] Location 0 + +; CHECK-DAG: %[[#float:]] = OpTypeFloat 32 +; CHECK-DAG: %[[#v4:]] = OpTypeVector %[[#float]] 4 +; CHECK-DAG: %[[#ptr_i:]] = OpTypePointer Input %[[#v4]] +; CHECK-DAG: %[[#ptr_o:]] = OpTypePointer Output %[[#v4]] + +; CHECK-DAG: %[[#INPUT]] = OpVariable %[[#ptr_i]] Input +; CHECK-DAG: %[[#OUTPUT]] = OpVariable %[[#ptr_o]] Output + +@SV_Position = external hidden thread_local addrspace(7) externally_initialized constant <4 x float>, !spirv.Decorations !0 +@A0 = external hidden thread_local addrspace(8) global <4 x float>, !spirv.Decorations !2 + +define void @main() #1 { +entry: + %0 = load <4 x float>, ptr addrspace(7) @SV_Position, align 16 + store <4 x float> %0, ptr addrspace(8) @A0, align 16 + ret void + +; CHECK: %[[#TMP:]] = OpLoad %[[#v4]] %[[#INPUT]] Aligned 16 +; CHECK: OpStore %[[#OUTPUT]] %[[#TMP]] Aligned 16 +} + +!0 = !{!1} +!1 = !{i32 11, i32 15} +!2 = !{!3} +!3 = !{i32 30, i32 0} + diff --git a/llvm/test/CodeGen/SPIRV/semantics/position.vs.ll b/llvm/test/CodeGen/SPIRV/semantics/position.vs.ll new file mode 100644 index 0000000000000..73165f3719a97 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/semantics/position.vs.ll @@ -0,0 +1,31 @@ +; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv-vulkan-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-vulkan-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK-DAG: OpDecorate %[[#INPUT:]] Location 0 +; CHECK-DAG: OpDecorate %[[#OUTPUT:]] BuiltIn Position + +; CHECK-DAG: %[[#float:]] = OpTypeFloat 32 +; CHECK-DAG: %[[#v4:]] = OpTypeVector %[[#float]] 4 +; CHECK-DAG: %[[#ptr_i:]] = OpTypePointer Input %[[#v4]] +; CHECK-DAG: %[[#ptr_o:]] = OpTypePointer Output %[[#v4]] + +; CHECK-DAG: %[[#INPUT]] = OpVariable %[[#ptr_i]] Input +; CHECK-DAG: %[[#OUTPUT]] = OpVariable %[[#ptr_o]] Output + +@SV_Position0 = external hidden thread_local addrspace(7) externally_initialized constant <4 x float>, !spirv.Decorations !0 +@SV_Position = external hidden thread_local addrspace(8) global <4 x float>, !spirv.Decorations !2 + +define void @main() #1 { +entry: + %0 = load <4 x float>, ptr addrspace(7) @SV_Position0, align 16 + store <4 x float> %0, ptr addrspace(8) @SV_Position, align 16 + ret void + +; CHECK: %[[#TMP:]] = OpLoad %[[#v4]] %[[#INPUT]] Aligned 16 +; CHECK: OpStore %[[#OUTPUT]] %[[#TMP]] Aligned 16 +} + +!0 = !{!1} +!1 = !{i32 30, i32 0} +!2 = !{!3} +!3 = !{i32 11, i32 0} diff --git a/llvm/test/CodeGen/X86/apx/no-rex2-general.ll b/llvm/test/CodeGen/X86/apx/no-rex2-general.ll index 2b34739fa80e3..6f31aef9aee98 100644 --- a/llvm/test/CodeGen/X86/apx/no-rex2-general.ll +++ b/llvm/test/CodeGen/X86/apx/no-rex2-general.ll @@ -1,17 +1,17 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2,+ssse3,+egpr | FileCheck %s --check-prefixes=CHECK,SSE -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2,+ssse3,+egpr,+avx | FileCheck %s --check-prefixes=CHECK,AVX +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2,+ssse3,+egpr --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,SSE +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2,+ssse3,+egpr,+avx --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,AVX define i32 @map0(ptr nocapture noundef readonly %a, i64 noundef %b) { ; CHECK-LABEL: map0: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movq %rsi, %r16 -; CHECK-NEXT: movq %rdi, %r17 +; CHECK-NEXT: movq %rsi, %r16 # encoding: [0xd5,0x18,0x89,0xf0] +; CHECK-NEXT: movq %rdi, %r17 # encoding: [0xd5,0x18,0x89,0xf9] ; CHECK-NEXT: #APP -; CHECK-NEXT: nop +; CHECK-NEXT: nop # encoding: [0x90] ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: movl (%r17,%r16,4), %eax -; CHECK-NEXT: retq +; CHECK-NEXT: movl (%r17,%r16,4), %eax # encoding: [0xd5,0x30,0x8b,0x04,0x81] +; CHECK-NEXT: retq # encoding: [0xc3] entry: %add.ptr = getelementptr inbounds i32, ptr %a, i64 %b tail call void asm sideeffect "nop", "~{eax},~{ecx},~{edx},~{esi},~{edi},~{r8},~{r9},~{r10},~{r11}"() @@ -22,23 +22,23 @@ entry: define i32 @map1_or_vex(<2 x double> noundef %a) nounwind { ; SSE-LABEL: map1_or_vex: ; SSE: # %bb.0: # %entry -; SSE-NEXT: cvtsd2si %xmm0, %r16d +; SSE-NEXT: cvtsd2si %xmm0, %r16d # encoding: [0xf2,0xd5,0xc0,0x2d,0xc0] ; SSE-NEXT: #APP -; SSE-NEXT: nop +; SSE-NEXT: nop # encoding: [0x90] ; SSE-NEXT: #NO_APP -; SSE-NEXT: movl %r16d, %eax -; SSE-NEXT: retq +; SSE-NEXT: movl %r16d, %eax # encoding: [0xd5,0x40,0x89,0xc0] +; SSE-NEXT: retq # encoding: [0xc3] ; ; AVX-LABEL: map1_or_vex: ; AVX: # %bb.0: # %entry -; AVX-NEXT: pushq %rbx -; AVX-NEXT: vcvtsd2si %xmm0, %ebx +; AVX-NEXT: pushq %rbx # encoding: [0x53] +; AVX-NEXT: vcvtsd2si %xmm0, %ebx # encoding: [0xc5,0xfb,0x2d,0xd8] ; AVX-NEXT: #APP -; AVX-NEXT: nop +; AVX-NEXT: nop # encoding: [0x90] ; AVX-NEXT: #NO_APP -; AVX-NEXT: movl %ebx, %eax -; AVX-NEXT: popq %rbx -; AVX-NEXT: retq +; AVX-NEXT: movl %ebx, %eax # encoding: [0x89,0xd8] +; AVX-NEXT: popq %rbx # encoding: [0x5b] +; AVX-NEXT: retq # encoding: [0xc3] entry: %0 = tail call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a) tail call void asm sideeffect "nop", "~{eax},~{ecx},~{edx},~{esi},~{edi},~{r8},~{r9},~{r10},~{r11}"() @@ -48,31 +48,31 @@ entry: define <2 x i64> @map2_or_vex(ptr nocapture noundef readonly %b, i64 noundef %c) nounwind { ; SSE-LABEL: map2_or_vex: ; SSE: # %bb.0: # %entry -; SSE-NEXT: pushq %r14 -; SSE-NEXT: pushq %rbx -; SSE-NEXT: movq %rsi, %rbx -; SSE-NEXT: movq %rdi, %r14 +; SSE-NEXT: pushq %r14 # encoding: [0x41,0x56] +; SSE-NEXT: pushq %rbx # encoding: [0x53] +; SSE-NEXT: movq %rsi, %rbx # encoding: [0x48,0x89,0xf3] +; SSE-NEXT: movq %rdi, %r14 # encoding: [0x49,0x89,0xfe] ; SSE-NEXT: #APP -; SSE-NEXT: nop +; SSE-NEXT: nop # encoding: [0x90] ; SSE-NEXT: #NO_APP -; SSE-NEXT: pabsb (%r14,%rbx,4), %xmm0 -; SSE-NEXT: popq %rbx -; SSE-NEXT: popq %r14 -; SSE-NEXT: retq +; SSE-NEXT: pabsb (%r14,%rbx,4), %xmm0 # encoding: [0x66,0x41,0x0f,0x38,0x1c,0x04,0x9e] +; SSE-NEXT: popq %rbx # encoding: [0x5b] +; SSE-NEXT: popq %r14 # encoding: [0x41,0x5e] +; SSE-NEXT: retq # encoding: [0xc3] ; ; AVX-LABEL: map2_or_vex: ; AVX: # %bb.0: # %entry -; AVX-NEXT: pushq %r14 -; AVX-NEXT: pushq %rbx -; AVX-NEXT: movq %rsi, %rbx -; AVX-NEXT: movq %rdi, %r14 +; AVX-NEXT: pushq %r14 # encoding: [0x41,0x56] +; AVX-NEXT: pushq %rbx # encoding: [0x53] +; AVX-NEXT: movq %rsi, %rbx # encoding: [0x48,0x89,0xf3] +; AVX-NEXT: movq %rdi, %r14 # encoding: [0x49,0x89,0xfe] ; AVX-NEXT: #APP -; AVX-NEXT: nop +; AVX-NEXT: nop # encoding: [0x90] ; AVX-NEXT: #NO_APP -; AVX-NEXT: vpabsb (%r14,%rbx,4), %xmm0 -; AVX-NEXT: popq %rbx -; AVX-NEXT: popq %r14 -; AVX-NEXT: retq +; AVX-NEXT: vpabsb (%r14,%rbx,4), %xmm0 # encoding: [0xc4,0xc2,0x79,0x1c,0x04,0x9e] +; AVX-NEXT: popq %rbx # encoding: [0x5b] +; AVX-NEXT: popq %r14 # encoding: [0x41,0x5e] +; AVX-NEXT: retq # encoding: [0xc3] entry: tail call void asm sideeffect "nop", "~{eax},~{ecx},~{edx},~{esi},~{edi},~{r8},~{r9},~{r10},~{r11}"() %add.ptr = getelementptr inbounds i32, ptr %b, i64 %c diff --git a/llvm/test/CodeGen/X86/apx/no-rex2-pseudo-amx.ll b/llvm/test/CodeGen/X86/apx/no-rex2-pseudo-amx.ll index c193680607f76..a6ab98f8bf03e 100644 --- a/llvm/test/CodeGen/X86/apx/no-rex2-pseudo-amx.ll +++ b/llvm/test/CodeGen/X86/apx/no-rex2-pseudo-amx.ll @@ -1,18 +1,18 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+amx-tile,+egpr | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+amx-tile,+egpr --show-mc-encoding | FileCheck %s define dso_local void @amx(ptr noundef %data) nounwind { ; CHECK-LABEL: amx: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: pushq %rbx # encoding: [0x53] +; CHECK-NEXT: movq %rdi, %rbx # encoding: [0x48,0x89,0xfb] ; CHECK-NEXT: #APP -; CHECK-NEXT: nop +; CHECK-NEXT: nop # encoding: [0x90] ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: movl $8, %eax -; CHECK-NEXT: tileloadd (%rbx,%rax), %tmm4 -; CHECK-NEXT: popq %rbx -; CHECK-NEXT: retq +; CHECK-NEXT: movl $8, %eax # encoding: [0xb8,0x08,0x00,0x00,0x00] +; CHECK-NEXT: tileloadd (%rbx,%rax), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4b,0x24,0x03] +; CHECK-NEXT: popq %rbx # encoding: [0x5b] +; CHECK-NEXT: retq # encoding: [0xc3] entry: tail call void asm sideeffect "nop", "~{eax},~{ecx},~{edx},~{esi},~{edi},~{r8},~{r9},~{r10},~{r11}"() call void @llvm.x86.tileloadd64(i8 4, ptr %data, i64 8) diff --git a/llvm/test/CodeGen/X86/apx/no-rex2-pseudo-x87.ll b/llvm/test/CodeGen/X86/apx/no-rex2-pseudo-x87.ll index 4692a58d095a6..e7bc0c362cad3 100644 --- a/llvm/test/CodeGen/X86/apx/no-rex2-pseudo-x87.ll +++ b/llvm/test/CodeGen/X86/apx/no-rex2-pseudo-x87.ll @@ -1,21 +1,21 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=-sse,+egpr | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=-sse,+egpr --show-mc-encoding | FileCheck %s define void @x87(ptr %0, ptr %1) nounwind { ; CHECK-LABEL: x87: ; CHECK: # %bb.0: -; CHECK-NEXT: pushq %r14 -; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: movq %rsi, %rbx -; CHECK-NEXT: movq %rdi, %r14 +; CHECK-NEXT: pushq %r14 # encoding: [0x41,0x56] +; CHECK-NEXT: pushq %rbx # encoding: [0x53] +; CHECK-NEXT: movq %rsi, %rbx # encoding: [0x48,0x89,0xf3] +; CHECK-NEXT: movq %rdi, %r14 # encoding: [0x49,0x89,0xfe] ; CHECK-NEXT: #APP -; CHECK-NEXT: nop +; CHECK-NEXT: nop # encoding: [0x90] ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: flds (%r14) -; CHECK-NEXT: fstps (%rbx) -; CHECK-NEXT: popq %rbx -; CHECK-NEXT: popq %r14 -; CHECK-NEXT: retq +; CHECK-NEXT: flds (%r14) # encoding: [0x41,0xd9,0x06] +; CHECK-NEXT: fstps (%rbx) # encoding: [0xd9,0x1b] +; CHECK-NEXT: popq %rbx # encoding: [0x5b] +; CHECK-NEXT: popq %r14 # encoding: [0x41,0x5e] +; CHECK-NEXT: retq # encoding: [0xc3] tail call void asm sideeffect "nop", "~{eax},~{ecx},~{edx},~{esi},~{edi},~{r8},~{r9},~{r10},~{r11}"() %3 = load float, ptr %0 store float %3, ptr %1 diff --git a/llvm/test/CodeGen/X86/apx/no-rex2-special.ll b/llvm/test/CodeGen/X86/apx/no-rex2-special.ll index f2025b5c8cbf8..9b89bce283b15 100644 --- a/llvm/test/CodeGen/X86/apx/no-rex2-special.ll +++ b/llvm/test/CodeGen/X86/apx/no-rex2-special.ll @@ -1,20 +1,20 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+xsave,+egpr | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+xsave,+egpr --show-mc-encoding | FileCheck %s define void @test_xsave(ptr %ptr, i32 %hi, i32 %lo) nounwind { ; CHECK-LABEL: test_xsave: ; CHECK: # %bb.0: -; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: movl %edx, %r16d -; CHECK-NEXT: movl %esi, %edx -; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: pushq %rbx # encoding: [0x53] +; CHECK-NEXT: movl %edx, %r16d # encoding: [0xd5,0x10,0x89,0xd0] +; CHECK-NEXT: movl %esi, %edx # encoding: [0x89,0xf2] +; CHECK-NEXT: movq %rdi, %rbx # encoding: [0x48,0x89,0xfb] ; CHECK-NEXT: #APP -; CHECK-NEXT: nop +; CHECK-NEXT: nop # encoding: [0x90] ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: movl %r16d, %eax -; CHECK-NEXT: xsave (%rbx) -; CHECK-NEXT: popq %rbx -; CHECK-NEXT: retq +; CHECK-NEXT: movl %r16d, %eax # encoding: [0xd5,0x40,0x89,0xc0] +; CHECK-NEXT: xsave (%rbx) # encoding: [0x0f,0xae,0x23] +; CHECK-NEXT: popq %rbx # encoding: [0x5b] +; CHECK-NEXT: retq # encoding: [0xc3] tail call void asm sideeffect "nop", "~{eax},~{ecx},~{esi},~{edi},~{r8},~{r9},~{r10},~{r11}"() call void @llvm.x86.xsave(ptr %ptr, i32 %hi, i32 %lo) ret void; @@ -24,17 +24,17 @@ declare void @llvm.x86.xsave(ptr, i32, i32) define void @test_xsave64(ptr %ptr, i32 %hi, i32 %lo) nounwind { ; CHECK-LABEL: test_xsave64: ; CHECK: # %bb.0: -; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: movl %edx, %r16d -; CHECK-NEXT: movl %esi, %edx -; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: pushq %rbx # encoding: [0x53] +; CHECK-NEXT: movl %edx, %r16d # encoding: [0xd5,0x10,0x89,0xd0] +; CHECK-NEXT: movl %esi, %edx # encoding: [0x89,0xf2] +; CHECK-NEXT: movq %rdi, %rbx # encoding: [0x48,0x89,0xfb] ; CHECK-NEXT: #APP -; CHECK-NEXT: nop +; CHECK-NEXT: nop # encoding: [0x90] ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: movl %r16d, %eax -; CHECK-NEXT: xsave64 (%rbx) -; CHECK-NEXT: popq %rbx -; CHECK-NEXT: retq +; CHECK-NEXT: movl %r16d, %eax # encoding: [0xd5,0x40,0x89,0xc0] +; CHECK-NEXT: xsave64 (%rbx) # encoding: [0x48,0x0f,0xae,0x23] +; CHECK-NEXT: popq %rbx # encoding: [0x5b] +; CHECK-NEXT: retq # encoding: [0xc3] tail call void asm sideeffect "nop", "~{eax},~{ecx},~{esi},~{edi},~{r8},~{r9},~{r10},~{r11}"() call void @llvm.x86.xsave64(ptr %ptr, i32 %hi, i32 %lo) ret void; @@ -44,17 +44,17 @@ declare void @llvm.x86.xsave64(ptr, i32, i32) define void @test_xrstor(ptr %ptr, i32 %hi, i32 %lo) nounwind { ; CHECK-LABEL: test_xrstor: ; CHECK: # %bb.0: -; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: movl %edx, %r16d -; CHECK-NEXT: movl %esi, %edx -; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: pushq %rbx # encoding: [0x53] +; CHECK-NEXT: movl %edx, %r16d # encoding: [0xd5,0x10,0x89,0xd0] +; CHECK-NEXT: movl %esi, %edx # encoding: [0x89,0xf2] +; CHECK-NEXT: movq %rdi, %rbx # encoding: [0x48,0x89,0xfb] ; CHECK-NEXT: #APP -; CHECK-NEXT: nop +; CHECK-NEXT: nop # encoding: [0x90] ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: movl %r16d, %eax -; CHECK-NEXT: xrstor (%rbx) -; CHECK-NEXT: popq %rbx -; CHECK-NEXT: retq +; CHECK-NEXT: movl %r16d, %eax # encoding: [0xd5,0x40,0x89,0xc0] +; CHECK-NEXT: xrstor (%rbx) # encoding: [0x0f,0xae,0x2b] +; CHECK-NEXT: popq %rbx # encoding: [0x5b] +; CHECK-NEXT: retq # encoding: [0xc3] tail call void asm sideeffect "nop", "~{eax},~{ecx},~{esi},~{edi},~{r8},~{r9},~{r10},~{r11}"() call void @llvm.x86.xrstor(ptr %ptr, i32 %hi, i32 %lo) ret void; @@ -64,17 +64,17 @@ declare void @llvm.x86.xrstor(ptr, i32, i32) define void @test_xrstor64(ptr %ptr, i32 %hi, i32 %lo) nounwind { ; CHECK-LABEL: test_xrstor64: ; CHECK: # %bb.0: -; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: movl %edx, %r16d -; CHECK-NEXT: movl %esi, %edx -; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: pushq %rbx # encoding: [0x53] +; CHECK-NEXT: movl %edx, %r16d # encoding: [0xd5,0x10,0x89,0xd0] +; CHECK-NEXT: movl %esi, %edx # encoding: [0x89,0xf2] +; CHECK-NEXT: movq %rdi, %rbx # encoding: [0x48,0x89,0xfb] ; CHECK-NEXT: #APP -; CHECK-NEXT: nop +; CHECK-NEXT: nop # encoding: [0x90] ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: movl %r16d, %eax -; CHECK-NEXT: xrstor64 (%rbx) -; CHECK-NEXT: popq %rbx -; CHECK-NEXT: retq +; CHECK-NEXT: movl %r16d, %eax # encoding: [0xd5,0x40,0x89,0xc0] +; CHECK-NEXT: xrstor64 (%rbx) # encoding: [0x48,0x0f,0xae,0x2b] +; CHECK-NEXT: popq %rbx # encoding: [0x5b] +; CHECK-NEXT: retq # encoding: [0xc3] tail call void asm sideeffect "nop", "~{eax},~{ecx},~{esi},~{edi},~{r8},~{r9},~{r10},~{r11}"() call void @llvm.x86.xrstor64(ptr %ptr, i32 %hi, i32 %lo) ret void; diff --git a/llvm/test/CodeGen/X86/avx512-i386-setallones-pseudo.mir b/llvm/test/CodeGen/X86/avx512-i386-setallones-pseudo.mir new file mode 100644 index 0000000000000..0d8f2177aaa30 --- /dev/null +++ b/llvm/test/CodeGen/X86/avx512-i386-setallones-pseudo.mir @@ -0,0 +1,26 @@ +# NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +# RUN: llc %s -mtriple=i386-- -start-before=postrapseudos -o - | FileCheck %s + +--- | + target triple = "i386-unknown-unknown" + + define void @setallones() #0 { + ; CHECK-LABEL: setallones: + ; CHECK: # %bb.0: + ; CHECK-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 + ; CHECK-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 + entry: + unreachable + } + + attributes #0 = { "target-features"="+avx512f,+avx512vl" } +--- +name: setallones +tracksRegLiveness: true +liveins: [] +body: | + bb.0: + $xmm0 = AVX512_128_SETALLONES + $ymm1 = AVX512_256_SETALLONES + +... diff --git a/llvm/test/CodeGen/X86/avx512-setallones-pseudo.mir b/llvm/test/CodeGen/X86/avx512-setallones-pseudo.mir new file mode 100644 index 0000000000000..7e5ddc4cd632f --- /dev/null +++ b/llvm/test/CodeGen/X86/avx512-setallones-pseudo.mir @@ -0,0 +1,30 @@ +# NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +# RUN: llc %s -mtriple=x86_64-- -start-before=postrapseudos -o - | FileCheck %s + +--- | + target triple = "x86_64-unknown-unknown" + + define void @setallones() #0 { + ; CHECK-LABEL: setallones: + ; CHECK: # %bb.0: + ; CHECK-NEXT: vpcmpeqd %xmm14, %xmm14, %xmm14 + ; CHECK-NEXT: vpternlogd {{.*#+}} xmm16 = -1 + ; CHECK-NEXT: vpcmpeqd %ymm15, %ymm15, %ymm15 + ; CHECK-NEXT: vpternlogd {{.*#+}} ymm17 = -1 + entry: + unreachable + } + + attributes #0 = { "target-features"="+avx512f,+avx512vl" } +--- +name: setallones +tracksRegLiveness: true +liveins: [] +body: | + bb.0: + $xmm14 = AVX512_128_SETALLONES + $xmm16 = AVX512_128_SETALLONES + $ymm15 = AVX512_256_SETALLONES + $ymm17 = AVX512_256_SETALLONES + +... diff --git a/llvm/test/CodeGen/X86/eq-or-eq-range-of-2.ll b/llvm/test/CodeGen/X86/eq-or-eq-range-of-2.ll index 3243d950740ca..e2400fbe2c4ff 100644 --- a/llvm/test/CodeGen/X86/eq-or-eq-range-of-2.ll +++ b/llvm/test/CodeGen/X86/eq-or-eq-range-of-2.ll @@ -106,7 +106,8 @@ define <4 x i32> @eq_or_eq_ult_2_fail_multiuse(<4 x i32> %x) { ; AVX512: # %bb.0: ; AVX512-NEXT: subq $24, %rsp ; AVX512-NEXT: .cfi_def_cfa_offset 32 -; AVX512-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 +; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill ; AVX512-NEXT: callq use.v4.i32@PLT ; AVX512-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop1.s index f1438532d7c5e..5b4689b2954df 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop1.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop1.s @@ -1,8 +1,195 @@ -// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --unique --version 5 +// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16,+wavefrontsize32 -show-encoding %s | FileCheck --check-prefix=GFX11 %s // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=GFX11 %s -v_bfrev_b32_e32 v5, v1 +// INSTS= +// v_bfrev_b32 OPS32 +// v_ceil_f16 OPS16 +// v_ceil_f32 OPS32 +// v_ceil_f64 OPS64 +// v_cls_i32 OPS32 +// v_clz_i32_u32 OPS32 +// v_cos_f16 OPS16 +// v_cos_f32 OPS32 +// v_ctz_i32_b32 OPS32 +// v_cvt_f16_f32 v5.l, SRC32 +// v_cvt_f16_f32 v127.h, 0xaf123456 +// v_cvt_f16_f32 v127.l, 0.5 +// v_cvt_f16_i16 OPS16 +// v_cvt_f16_u16 OPS16 +// v_cvt_f32_f16 OPS_32_16 +// v_cvt_f32_f64 OPS_32_64 +// v_cvt_f32_i32 OPS32 +// v_cvt_f32_u32 OPS32 +// v_cvt_f32_ubyte0 OPS32 +// v_cvt_f32_ubyte1 OPS32 +// v_cvt_f32_ubyte2 OPS32 +// v_cvt_f32_ubyte3 OPS32 +// v_cvt_f64_f32 OPS_64_32 +// v_cvt_f64_i32 OPS_64_32 +// v_cvt_f64_u32 OPS_64_32 +// v_cvt_floor_i32_f32 OPS32 +// v_cvt_flr_i32_f32 OPS32 +// v_cvt_i16_f16 OPS16 +// v_cvt_i32_f32 OPS32 +// v_cvt_i32_f64 OPS_32_64 +// v_cvt_i32_i16 OPS_32_16 +// v_cvt_nearest_i32_f32 OPS32 +// v_cvt_norm_i16_f16 OPS16 +// v_cvt_norm_u16_f16 OPS16 +// v_cvt_off_f32_i4 v5, SRC32 +// v_cvt_off_f32_i4 v255, 0x4f +// v_cvt_rpi_i32_f32 OPS32 +// v_cvt_u16_f16 OPS16 +// v_cvt_u32_f32 OPS32 +// v_cvt_u32_f64 OPS_32_64 +// v_cvt_u32_u16 OPS_32_16 +// v_exp_f16 OPS16 +// v_exp_f32 OPS32 +// v_ffbh_i32 OPS32 +// v_ffbh_u32 OPS32 +// v_ffbl_b32 OPS32 +// v_floor_f16 OPS16 +// v_floor_f32 OPS32 +// v_floor_f64 OPS64 +// v_fract_f16 OPS16 +// v_fract_f32 OPS32 +// v_fract_f64 OPS64 +// v_frexp_exp_i16_f16 OPS16 +// v_frexp_exp_i32_f32 OPS32 +// v_frexp_exp_i32_f64 OPS_32_64 +// v_frexp_mant_f16 OPS16 +// v_frexp_mant_f32 OPS32 +// v_frexp_mant_f64 OPS64 +// v_log_f16 OPS16 +// v_log_f32 OPS32 +// v_mov_b16_e32 OPS16 +// v_mov_b16_e64 OPS16 +// v_mov_b32 OPS32 +// v_movreld_b32 OPS32 +// v_movrels_b32 v5, v1 +// v_movrels_b32 v255, v255 +// v_movrelsd_2_b32 v5, v1 +// v_movrelsd_2_b32 v255, v255 +// v_movrelsd_b32 v5, v1 +// v_movrelsd_b32 v255, v255 +// v_nop +// v_not_b16 OPS16 +// v_not_b32 OPS32 +// v_permlane64_b32 v5, v1 +// v_permlane64_b32 v255, v255 +// v_pipeflush +// v_rcp_f16 OPS16 +// v_rcp_f32 OPS32 +// v_rcp_f64 OPS64 +// v_rcp_iflag_f32 OPS32 +// v_readfirstlane_b32 s5, v1 +// v_readfirstlane_b32 s105, v1 +// v_readfirstlane_b32 vcc_lo, v1 +// v_readfirstlane_b32 vcc_hi, v1 +// v_readfirstlane_b32 ttmp15, v1 +// v_readfirstlane_b32 null, v255 +// v_rndne_f16 OPS16 +// v_rndne_f32 OPS32 +// v_rndne_f64 OPS64 +// v_rsq_f16 OPS16 +// v_rsq_f32 OPS32 +// v_rsq_f64 OPS64 +// v_sat_pk_u8_i16 v5.l, SRC32 +// v_sat_pk_u8_i16 v127.l, 0xfe0b +// v_sat_pk_u8_i16 v127.l, 0.5 +// v_sat_pk_u8_i16 v5.h, src_scc +// v_sat_pk_u8_i16 v127.h, 0xfe0b +// v_sin_f16 OPS16 +// v_sin_f32 OPS32 +// v_sqrt_f16 OPS16 +// v_sqrt_f32 OPS32 +// v_sqrt_f64 OPS64 +// v_swap_b16 v5.l, v1.h +// v_swap_b16 v5.h, v1.l +// v_swap_b16 v127.l, v127.l +// v_swap_b32 v5, v1 +// v_swap_b32 v255, v255 +// v_swaprel_b32 v5, v1 +// v_swaprel_b32 v255, v255 +// v_trunc_f16 OPS16 +// v_trunc_f32 OPS32 +// v_trunc_f64 OPS64 +// +// SRC16= +// v1.l +// v127.l +// v1.h +// v127.h +// s1 +// s105 +// vcc_lo +// vcc_hi +// ttmp15 +// m0 +// exec_lo +// exec_hi +// null +// -1 +// 0.5 +// src_scc +// +// OPS16= +// v5.l, SRC16 +// v5.l, 0xfe0b +// v5.h, src_scc +// v127.h, 0xfe0b +// +// SRC32= +// v1 +// v255 +// s1 +// s105 +// vcc_lo +// vcc_hi +// ttmp15 +// m0 +// exec_lo +// exec_hi +// null +// -1 +// 0.5 +// src_scc +// +// OPS32= +// v5, SRC32 +// v255, 0xaf123456 +// +// SRC64= +// v[1:2] +// v[254:255] +// s[2:3] +// s[104:105] +// vcc +// ttmp[14:15] +// exec +// null +// -1 +// 0.5 +// src_scc +// +// OPS64= +// v[5:6], SRC64 +// v[254:255], 0xaf123456 +// +// OPS_32_16= +// v5, SRC16 +// v255, 0xfe0b +// +// OPS_32_64= +// v5, SRC64 +// v255, 0xaf123456 +// +// OPS_64_32= +// v[5:6], SRC32 +// v[254:255], 0xaf123456 + +v_bfrev_b32 v5, v1 // GFX11: v_bfrev_b32_e32 v5, v1 ; encoding: [0x01,0x71,0x0a,0x7e] v_bfrev_b32 v5, v255 @@ -89,8 +276,14 @@ v_ceil_f16 v5.l, null v_ceil_f16 v5.l, -1 // GFX11: v_ceil_f16_e32 v5.l, -1 ; encoding: [0xc1,0xb8,0x0a,0x7e] -v_ceil_f16 v127.l, 0.5 -// GFX11: v_ceil_f16_e32 v127.l, 0.5 ; encoding: [0xf0,0xb8,0xfe,0x7e] +v_ceil_f16 v5.l, 0.5 +// GFX11: v_ceil_f16_e32 v5.l, 0.5 ; encoding: [0xf0,0xb8,0x0a,0x7e] + +v_ceil_f16 v5.l, src_scc +// GFX11: v_ceil_f16_e32 v5.l, src_scc ; encoding: [0xfd,0xb8,0x0a,0x7e] + +v_ceil_f16 v5.l, 0xfe0b +// GFX11: v_ceil_f16_e32 v5.l, 0xfe0b ; encoding: [0xff,0xb8,0x0a,0x7e,0x0b,0xfe,0x00,0x00] v_ceil_f16 v5.h, src_scc // GFX11: v_ceil_f16_e32 v5.h, src_scc ; encoding: [0xfd,0xb8,0x0a,0x7f] @@ -275,6 +468,12 @@ v_cos_f16 v5.l, v1.l v_cos_f16 v5.l, v127.l // GFX11: v_cos_f16_e32 v5.l, v127.l ; encoding: [0x7f,0xc3,0x0a,0x7e] +v_cos_f16 v5.l, v1.h +// GFX11: v_cos_f16_e32 v5.l, v1.h ; encoding: [0x81,0xc3,0x0a,0x7e] + +v_cos_f16 v5.l, v127.h +// GFX11: v_cos_f16_e32 v5.l, v127.h ; encoding: [0xff,0xc3,0x0a,0x7e] + v_cos_f16 v5.l, s1 // GFX11: v_cos_f16_e32 v5.l, s1 ; encoding: [0x01,0xc2,0x0a,0x7e] @@ -311,17 +510,8 @@ v_cos_f16 v5.l, 0.5 v_cos_f16 v5.l, src_scc // GFX11: v_cos_f16_e32 v5.l, src_scc ; encoding: [0xfd,0xc2,0x0a,0x7e] -v_cos_f16 v127.l, 0xfe0b -// GFX11: v_cos_f16_e32 v127.l, 0xfe0b ; encoding: [0xff,0xc2,0xfe,0x7e,0x0b,0xfe,0x00,0x00] - -v_cos_f16 v5.l, v1.h -// GFX11: v_cos_f16_e32 v5.l, v1.h ; encoding: [0x81,0xc3,0x0a,0x7e] - -v_cos_f16 v5.l, v127.h -// GFX11: v_cos_f16_e32 v5.l, v127.h ; encoding: [0xff,0xc3,0x0a,0x7e] - -v_cos_f16 v127.l, 0.5 -// GFX11: v_cos_f16_e32 v127.l, 0.5 ; encoding: [0xf0,0xc2,0xfe,0x7e] +v_cos_f16 v5.l, 0xfe0b +// GFX11: v_cos_f16_e32 v5.l, 0xfe0b ; encoding: [0xff,0xc2,0x0a,0x7e,0x0b,0xfe,0x00,0x00] v_cos_f16 v5.h, src_scc // GFX11: v_cos_f16_e32 v5.h, src_scc ; encoding: [0xfd,0xc2,0x0a,0x7f] @@ -458,8 +648,8 @@ v_cvt_f16_f32 v5.l, -1 v_cvt_f16_f32 v5.l, 0.5 // GFX11: v_cvt_f16_f32_e32 v5.l, 0.5 ; encoding: [0xf0,0x14,0x0a,0x7e] -v_cvt_f16_f32 v5.h, src_scc -// GFX11: v_cvt_f16_f32_e32 v5.h, src_scc ; encoding: [0xfd,0x14,0x0a,0x7f] +v_cvt_f16_f32 v5.l, src_scc +// GFX11: v_cvt_f16_f32_e32 v5.l, src_scc ; encoding: [0xfd,0x14,0x0a,0x7e] v_cvt_f16_f32 v127.h, 0xaf123456 // GFX11: v_cvt_f16_f32_e32 v127.h, 0xaf123456 ; encoding: [0xff,0x14,0xfe,0x7f,0x56,0x34,0x12,0xaf] @@ -509,12 +699,15 @@ v_cvt_f16_i16 v5.l, null v_cvt_f16_i16 v5.l, -1 // GFX11: v_cvt_f16_i16_e32 v5.l, -1 ; encoding: [0xc1,0xa2,0x0a,0x7e] -v_cvt_f16_i16 v127.l, 0.5 -// GFX11: v_cvt_f16_i16_e32 v127.l, 0.5 ; encoding: [0xf0,0xa2,0xfe,0x7e] - v_cvt_f16_i16 v5.l, 0.5 // GFX11: v_cvt_f16_i16_e32 v5.l, 0.5 ; encoding: [0xf0,0xa2,0x0a,0x7e] +v_cvt_f16_i16 v5.l, src_scc +// GFX11: v_cvt_f16_i16_e32 v5.l, src_scc ; encoding: [0xfd,0xa2,0x0a,0x7e] + +v_cvt_f16_i16 v5.l, 0xfe0b +// GFX11: v_cvt_f16_i16_e32 v5.l, 0xfe0b ; encoding: [0xff,0xa2,0x0a,0x7e,0x0b,0xfe,0x00,0x00] + v_cvt_f16_i16 v5.h, src_scc // GFX11: v_cvt_f16_i16_e32 v5.h, src_scc ; encoding: [0xfd,0xa2,0x0a,0x7f] @@ -563,11 +756,14 @@ v_cvt_f16_u16 v5.l, null v_cvt_f16_u16 v5.l, -1 // GFX11: v_cvt_f16_u16_e32 v5.l, -1 ; encoding: [0xc1,0xa0,0x0a,0x7e] -v_cvt_f16_u16 v127.l, 0.5 -// GFX11: v_cvt_f16_u16_e32 v127.l, 0.5 ; encoding: [0xf0,0xa0,0xfe,0x7e] +v_cvt_f16_u16 v5.l, 0.5 +// GFX11: v_cvt_f16_u16_e32 v5.l, 0.5 ; encoding: [0xf0,0xa0,0x0a,0x7e] + +v_cvt_f16_u16 v5.l, src_scc +// GFX11: v_cvt_f16_u16_e32 v5.l, src_scc ; encoding: [0xfd,0xa0,0x0a,0x7e] -v_cvt_f16_u16 v5, 0.5 -// GFX11: v_cvt_f16_u16_e32 v5, 0.5 ; encoding: [0xf0,0xa0,0x0a,0x7e] +v_cvt_f16_u16 v5.l, 0xfe0b +// GFX11: v_cvt_f16_u16_e32 v5.l, 0xfe0b ; encoding: [0xff,0xa0,0x0a,0x7e,0x0b,0xfe,0x00,0x00] v_cvt_f16_u16 v5.h, src_scc // GFX11: v_cvt_f16_u16_e32 v5.h, src_scc ; encoding: [0xfd,0xa0,0x0a,0x7f] @@ -1199,8 +1395,14 @@ v_cvt_i16_f16 v5.l, null v_cvt_i16_f16 v5.l, -1 // GFX11: v_cvt_i16_f16_e32 v5.l, -1 ; encoding: [0xc1,0xa6,0x0a,0x7e] -v_cvt_i16_f16 v127.l, 0.5 -// GFX11: v_cvt_i16_f16_e32 v127.l, 0.5 ; encoding: [0xf0,0xa6,0xfe,0x7e] +v_cvt_i16_f16 v5.l, 0.5 +// GFX11: v_cvt_i16_f16_e32 v5.l, 0.5 ; encoding: [0xf0,0xa6,0x0a,0x7e] + +v_cvt_i16_f16 v5.l, src_scc +// GFX11: v_cvt_i16_f16_e32 v5.l, src_scc ; encoding: [0xfd,0xa6,0x0a,0x7e] + +v_cvt_i16_f16 v5.l, 0xfe0b +// GFX11: v_cvt_i16_f16_e32 v5.l, 0xfe0b ; encoding: [0xff,0xa6,0x0a,0x7e,0x0b,0xfe,0x00,0x00] v_cvt_i16_f16 v5.h, src_scc // GFX11: v_cvt_i16_f16_e32 v5.h, src_scc ; encoding: [0xfd,0xa6,0x0a,0x7f] @@ -1295,6 +1497,12 @@ v_cvt_i32_i16 v5, v1.l v_cvt_i32_i16 v5, v127.l // GFX11: v_cvt_i32_i16_e32 v5, v127.l ; encoding: [0x7f,0xd5,0x0a,0x7e] +v_cvt_i32_i16 v5, v1.h +// GFX11: v_cvt_i32_i16_e32 v5, v1.h ; encoding: [0x81,0xd5,0x0a,0x7e] + +v_cvt_i32_i16 v5, v127.h +// GFX11: v_cvt_i32_i16_e32 v5, v127.h ; encoding: [0xff,0xd5,0x0a,0x7e] + v_cvt_i32_i16 v5, s1 // GFX11: v_cvt_i32_i16_e32 v5, s1 ; encoding: [0x01,0xd4,0x0a,0x7e] @@ -1334,12 +1542,6 @@ v_cvt_i32_i16 v5, src_scc v_cvt_i32_i16 v255, 0xfe0b // GFX11: v_cvt_i32_i16_e32 v255, 0xfe0b ; encoding: [0xff,0xd4,0xfe,0x7f,0x0b,0xfe,0x00,0x00] -v_cvt_i32_i16 v5, v1.h -// GFX11: v_cvt_i32_i16_e32 v5, v1.h ; encoding: [0x81,0xd5,0x0a,0x7e] - -v_cvt_i32_i16 v5, v127.h -// GFX11: v_cvt_i32_i16_e32 v5, v127.h ; encoding: [0xff,0xd5,0x0a,0x7e] - v_cvt_nearest_i32_f32 v5, v1 // GFX11: v_cvt_nearest_i32_f32_e32 v5, v1 ; encoding: [0x01,0x19,0x0a,0x7e] @@ -1427,8 +1629,14 @@ v_cvt_norm_i16_f16 v5.l, null v_cvt_norm_i16_f16 v5.l, -1 // GFX11: v_cvt_norm_i16_f16_e32 v5.l, -1 ; encoding: [0xc1,0xc6,0x0a,0x7e] -v_cvt_norm_i16_f16 v127.l, 0.5 -// GFX11: v_cvt_norm_i16_f16_e32 v127.l, 0.5 ; encoding: [0xf0,0xc6,0xfe,0x7e] +v_cvt_norm_i16_f16 v5.l, 0.5 +// GFX11: v_cvt_norm_i16_f16_e32 v5.l, 0.5 ; encoding: [0xf0,0xc6,0x0a,0x7e] + +v_cvt_norm_i16_f16 v5.l, src_scc +// GFX11: v_cvt_norm_i16_f16_e32 v5.l, src_scc ; encoding: [0xfd,0xc6,0x0a,0x7e] + +v_cvt_norm_i16_f16 v5.l, 0xfe0b +// GFX11: v_cvt_norm_i16_f16_e32 v5.l, 0xfe0b ; encoding: [0xff,0xc6,0x0a,0x7e,0x0b,0xfe,0x00,0x00] v_cvt_norm_i16_f16 v5.h, src_scc // GFX11: v_cvt_norm_i16_f16_e32 v5.h, src_scc ; encoding: [0xfd,0xc6,0x0a,0x7f] @@ -1478,8 +1686,14 @@ v_cvt_norm_u16_f16 v5.l, null v_cvt_norm_u16_f16 v5.l, -1 // GFX11: v_cvt_norm_u16_f16_e32 v5.l, -1 ; encoding: [0xc1,0xc8,0x0a,0x7e] -v_cvt_norm_u16_f16 v127.l, 0.5 -// GFX11: v_cvt_norm_u16_f16_e32 v127.l, 0.5 ; encoding: [0xf0,0xc8,0xfe,0x7e] +v_cvt_norm_u16_f16 v5.l, 0.5 +// GFX11: v_cvt_norm_u16_f16_e32 v5.l, 0.5 ; encoding: [0xf0,0xc8,0x0a,0x7e] + +v_cvt_norm_u16_f16 v5.l, src_scc +// GFX11: v_cvt_norm_u16_f16_e32 v5.l, src_scc ; encoding: [0xfd,0xc8,0x0a,0x7e] + +v_cvt_norm_u16_f16 v5.l, 0xfe0b +// GFX11: v_cvt_norm_u16_f16_e32 v5.l, 0xfe0b ; encoding: [0xff,0xc8,0x0a,0x7e,0x0b,0xfe,0x00,0x00] v_cvt_norm_u16_f16 v5.h, src_scc // GFX11: v_cvt_norm_u16_f16_e32 v5.h, src_scc ; encoding: [0xfd,0xc8,0x0a,0x7f] @@ -1619,8 +1833,14 @@ v_cvt_u16_f16 v5.l, null v_cvt_u16_f16 v5.l, -1 // GFX11: v_cvt_u16_f16_e32 v5.l, -1 ; encoding: [0xc1,0xa4,0x0a,0x7e] -v_cvt_u16_f16 v127.l, 0.5 -// GFX11: v_cvt_u16_f16_e32 v127.l, 0.5 ; encoding: [0xf0,0xa4,0xfe,0x7e] +v_cvt_u16_f16 v5.l, 0.5 +// GFX11: v_cvt_u16_f16_e32 v5.l, 0.5 ; encoding: [0xf0,0xa4,0x0a,0x7e] + +v_cvt_u16_f16 v5.l, src_scc +// GFX11: v_cvt_u16_f16_e32 v5.l, src_scc ; encoding: [0xfd,0xa4,0x0a,0x7e] + +v_cvt_u16_f16 v5.l, 0xfe0b +// GFX11: v_cvt_u16_f16_e32 v5.l, 0xfe0b ; encoding: [0xff,0xa4,0x0a,0x7e,0x0b,0xfe,0x00,0x00] v_cvt_u16_f16 v5.h, src_scc // GFX11: v_cvt_u16_f16_e32 v5.h, src_scc ; encoding: [0xfd,0xa4,0x0a,0x7f] @@ -1715,6 +1935,12 @@ v_cvt_u32_u16 v5, v1.l v_cvt_u32_u16 v5, v127.l // GFX11: v_cvt_u32_u16_e32 v5, v127.l ; encoding: [0x7f,0xd7,0x0a,0x7e] +v_cvt_u32_u16 v5, v1.h +// GFX11: v_cvt_u32_u16_e32 v5, v1.h ; encoding: [0x81,0xd7,0x0a,0x7e] + +v_cvt_u32_u16 v5, v127.h +// GFX11: v_cvt_u32_u16_e32 v5, v127.h ; encoding: [0xff,0xd7,0x0a,0x7e] + v_cvt_u32_u16 v5, s1 // GFX11: v_cvt_u32_u16_e32 v5, s1 ; encoding: [0x01,0xd6,0x0a,0x7e] @@ -1754,12 +1980,6 @@ v_cvt_u32_u16 v5, src_scc v_cvt_u32_u16 v255, 0xfe0b // GFX11: v_cvt_u32_u16_e32 v255, 0xfe0b ; encoding: [0xff,0xd6,0xfe,0x7f,0x0b,0xfe,0x00,0x00] -v_cvt_u32_u16 v5, v1.h -// GFX11: v_cvt_u32_u16_e32 v5, v1.h ; encoding: [0x81,0xd7,0x0a,0x7e] - -v_cvt_u32_u16 v5, v127.h -// GFX11: v_cvt_u32_u16_e32 v5, v127.h ; encoding: [0xff,0xd7,0x0a,0x7e] - v_exp_f16 v5.l, v1.l // GFX11: v_exp_f16_e32 v5.l, v1.l ; encoding: [0x01,0xb1,0x0a,0x7e] @@ -1802,8 +2022,14 @@ v_exp_f16 v5.l, null v_exp_f16 v5.l, -1 // GFX11: v_exp_f16_e32 v5.l, -1 ; encoding: [0xc1,0xb0,0x0a,0x7e] -v_exp_f16 v127.l, 0.5 -// GFX11: v_exp_f16_e32 v127.l, 0.5 ; encoding: [0xf0,0xb0,0xfe,0x7e] +v_exp_f16 v5.l, 0.5 +// GFX11: v_exp_f16_e32 v5.l, 0.5 ; encoding: [0xf0,0xb0,0x0a,0x7e] + +v_exp_f16 v5.l, src_scc +// GFX11: v_exp_f16_e32 v5.l, src_scc ; encoding: [0xfd,0xb0,0x0a,0x7e] + +v_exp_f16 v5.l, 0xfe0b +// GFX11: v_exp_f16_e32 v5.l, 0xfe0b ; encoding: [0xff,0xb0,0x0a,0x7e,0x0b,0xfe,0x00,0x00] v_exp_f16 v5.h, src_scc // GFX11: v_exp_f16_e32 v5.h, src_scc ; encoding: [0xfd,0xb0,0x0a,0x7f] @@ -2033,8 +2259,14 @@ v_floor_f16 v5.l, null v_floor_f16 v5.l, -1 // GFX11: v_floor_f16_e32 v5.l, -1 ; encoding: [0xc1,0xb6,0x0a,0x7e] -v_floor_f16 v127.l, 0.5 -// GFX11: v_floor_f16_e32 v127.l, 0.5 ; encoding: [0xf0,0xb6,0xfe,0x7e] +v_floor_f16 v5.l, 0.5 +// GFX11: v_floor_f16_e32 v5.l, 0.5 ; encoding: [0xf0,0xb6,0x0a,0x7e] + +v_floor_f16 v5.l, src_scc +// GFX11: v_floor_f16_e32 v5.l, src_scc ; encoding: [0xfd,0xb6,0x0a,0x7e] + +v_floor_f16 v5.l, 0xfe0b +// GFX11: v_floor_f16_e32 v5.l, 0xfe0b ; encoding: [0xff,0xb6,0x0a,0x7e,0x0b,0xfe,0x00,0x00] v_floor_f16 v5.h, src_scc // GFX11: v_floor_f16_e32 v5.h, src_scc ; encoding: [0xfd,0xb6,0x0a,0x7f] @@ -2129,6 +2361,12 @@ v_fract_f16 v5.l, v1.l v_fract_f16 v5.l, v127.l // GFX11: v_fract_f16_e32 v5.l, v127.l ; encoding: [0x7f,0xbf,0x0a,0x7e] +v_fract_f16 v5.l, v1.h +// GFX11: v_fract_f16_e32 v5.l, v1.h ; encoding: [0x81,0xbf,0x0a,0x7e] + +v_fract_f16 v5.l, v127.h +// GFX11: v_fract_f16_e32 v5.l, v127.h ; encoding: [0xff,0xbf,0x0a,0x7e] + v_fract_f16 v5.l, s1 // GFX11: v_fract_f16_e32 v5.l, s1 ; encoding: [0x01,0xbe,0x0a,0x7e] @@ -2165,17 +2403,8 @@ v_fract_f16 v5.l, 0.5 v_fract_f16 v5.l, src_scc // GFX11: v_fract_f16_e32 v5.l, src_scc ; encoding: [0xfd,0xbe,0x0a,0x7e] -v_fract_f16 v127.l, 0xfe0b -// GFX11: v_fract_f16_e32 v127.l, 0xfe0b ; encoding: [0xff,0xbe,0xfe,0x7e,0x0b,0xfe,0x00,0x00] - -v_fract_f16 v5.l, v1.h -// GFX11: v_fract_f16_e32 v5.l, v1.h ; encoding: [0x81,0xbf,0x0a,0x7e] - -v_fract_f16 v5.l, v127.h -// GFX11: v_fract_f16_e32 v5.l, v127.h ; encoding: [0xff,0xbf,0x0a,0x7e] - -v_fract_f16 v127.l, 0.5 -// GFX11: v_fract_f16_e32 v127.l, 0.5 ; encoding: [0xf0,0xbe,0xfe,0x7e] +v_fract_f16 v5.l, 0xfe0b +// GFX11: v_fract_f16_e32 v5.l, 0xfe0b ; encoding: [0xff,0xbe,0x0a,0x7e,0x0b,0xfe,0x00,0x00] v_fract_f16 v5.h, src_scc // GFX11: v_fract_f16_e32 v5.h, src_scc ; encoding: [0xfd,0xbe,0x0a,0x7f] @@ -2306,8 +2535,14 @@ v_frexp_exp_i16_f16 v5.l, null v_frexp_exp_i16_f16 v5.l, -1 // GFX11: v_frexp_exp_i16_f16_e32 v5.l, -1 ; encoding: [0xc1,0xb4,0x0a,0x7e] -v_frexp_exp_i16_f16 v127.l, 0.5 -// GFX11: v_frexp_exp_i16_f16_e32 v127.l, 0.5 ; encoding: [0xf0,0xb4,0xfe,0x7e] +v_frexp_exp_i16_f16 v5.l, 0.5 +// GFX11: v_frexp_exp_i16_f16_e32 v5.l, 0.5 ; encoding: [0xf0,0xb4,0x0a,0x7e] + +v_frexp_exp_i16_f16 v5.l, src_scc +// GFX11: v_frexp_exp_i16_f16_e32 v5.l, src_scc ; encoding: [0xfd,0xb4,0x0a,0x7e] + +v_frexp_exp_i16_f16 v5.l, 0xfe0b +// GFX11: v_frexp_exp_i16_f16_e32 v5.l, 0xfe0b ; encoding: [0xff,0xb4,0x0a,0x7e,0x0b,0xfe,0x00,0x00] v_frexp_exp_i16_f16 v5.h, src_scc // GFX11: v_frexp_exp_i16_f16_e32 v5.h, src_scc ; encoding: [0xfd,0xb4,0x0a,0x7f] @@ -2402,6 +2637,12 @@ v_frexp_mant_f16 v5.l, v1.l v_frexp_mant_f16 v5.l, v127.l // GFX11: v_frexp_mant_f16_e32 v5.l, v127.l ; encoding: [0x7f,0xb3,0x0a,0x7e] +v_frexp_mant_f16 v5.l, v1.h +// GFX11: v_frexp_mant_f16_e32 v5.l, v1.h ; encoding: [0x81,0xb3,0x0a,0x7e] + +v_frexp_mant_f16 v5.l, v127.h +// GFX11: v_frexp_mant_f16_e32 v5.l, v127.h ; encoding: [0xff,0xb3,0x0a,0x7e] + v_frexp_mant_f16 v5.l, s1 // GFX11: v_frexp_mant_f16_e32 v5.l, s1 ; encoding: [0x01,0xb2,0x0a,0x7e] @@ -2438,17 +2679,8 @@ v_frexp_mant_f16 v5.l, 0.5 v_frexp_mant_f16 v5.l, src_scc // GFX11: v_frexp_mant_f16_e32 v5.l, src_scc ; encoding: [0xfd,0xb2,0x0a,0x7e] -v_frexp_mant_f16 v127.l, 0xfe0b -// GFX11: v_frexp_mant_f16_e32 v127.l, 0xfe0b ; encoding: [0xff,0xb2,0xfe,0x7e,0x0b,0xfe,0x00,0x00] - -v_frexp_mant_f16 v5.l, v1.h -// GFX11: v_frexp_mant_f16_e32 v5.l, v1.h ; encoding: [0x81,0xb3,0x0a,0x7e] - -v_frexp_mant_f16 v5.l, v127.h -// GFX11: v_frexp_mant_f16_e32 v5.l, v127.h ; encoding: [0xff,0xb3,0x0a,0x7e] - -v_frexp_mant_f16 v127.l, 0.5 -// GFX11: v_frexp_mant_f16_e32 v127.l, 0.5 ; encoding: [0xf0,0xb2,0xfe,0x7e] +v_frexp_mant_f16 v5.l, 0xfe0b +// GFX11: v_frexp_mant_f16_e32 v5.l, 0xfe0b ; encoding: [0xff,0xb2,0x0a,0x7e,0x0b,0xfe,0x00,0x00] v_frexp_mant_f16 v5.h, src_scc // GFX11: v_frexp_mant_f16_e32 v5.h, src_scc ; encoding: [0xfd,0xb2,0x0a,0x7f] @@ -2579,8 +2811,14 @@ v_log_f16 v5.l, null v_log_f16 v5.l, -1 // GFX11: v_log_f16_e32 v5.l, -1 ; encoding: [0xc1,0xae,0x0a,0x7e] -v_log_f16 v127.l, 0.5 -// GFX11: v_log_f16_e32 v127.l, 0.5 ; encoding: [0xf0,0xae,0xfe,0x7e] +v_log_f16 v5.l, 0.5 +// GFX11: v_log_f16_e32 v5.l, 0.5 ; encoding: [0xf0,0xae,0x0a,0x7e] + +v_log_f16 v5.l, src_scc +// GFX11: v_log_f16_e32 v5.l, src_scc ; encoding: [0xfd,0xae,0x0a,0x7e] + +v_log_f16 v5.l, 0xfe0b +// GFX11: v_log_f16_e32 v5.l, 0xfe0b ; encoding: [0xff,0xae,0x0a,0x7e,0x0b,0xfe,0x00,0x00] v_log_f16 v5.h, src_scc // GFX11: v_log_f16_e32 v5.h, src_scc ; encoding: [0xfd,0xae,0x0a,0x7f] @@ -2633,35 +2871,119 @@ v_log_f32 v5, src_scc v_log_f32 v255, 0xaf123456 // GFX11: v_log_f32_e32 v255, 0xaf123456 ; encoding: [0xff,0x4e,0xfe,0x7f,0x56,0x34,0x12,0xaf] -v_mov_b16_e32 v0.l, v1.l -// GFX11: v_mov_b16_e32 v0.l, v1.l ; encoding: [0x01,0x39,0x00,0x7e] +v_mov_b16_e32 v5.l, v1.l +// GFX11: v_mov_b16_e32 v5.l, v1.l ; encoding: [0x01,0x39,0x0a,0x7e] + +v_mov_b16_e32 v5.l, v127.l +// GFX11: v_mov_b16_e32 v5.l, v127.l ; encoding: [0x7f,0x39,0x0a,0x7e] + +v_mov_b16_e32 v5.l, v1.h +// GFX11: v_mov_b16_e32 v5.l, v1.h ; encoding: [0x81,0x39,0x0a,0x7e] + +v_mov_b16_e32 v5.l, v127.h +// GFX11: v_mov_b16_e32 v5.l, v127.h ; encoding: [0xff,0x39,0x0a,0x7e] -v_mov_b16_e32 v0.l, s1 -// GFX11: v_mov_b16_e32 v0.l, s1 ; encoding: [0x01,0x38,0x00,0x7e] +v_mov_b16_e32 v5.l, s1 +// GFX11: v_mov_b16_e32 v5.l, s1 ; encoding: [0x01,0x38,0x0a,0x7e] -v_mov_b16_e32 v0.h, 0 -// GFX11: v_mov_b16_e32 v0.h, 0 ; encoding: [0x80,0x38,0x00,0x7f] +v_mov_b16_e32 v5.l, s105 +// GFX11: v_mov_b16_e32 v5.l, s105 ; encoding: [0x69,0x38,0x0a,0x7e] -v_mov_b16_e32 v0.h, 1.0 -// GFX11: v_mov_b16_e32 v0.h, 1.0 ; encoding: [0xf2,0x38,0x00,0x7f] +v_mov_b16_e32 v5.l, vcc_lo +// GFX11: v_mov_b16_e32 v5.l, vcc_lo ; encoding: [0x6a,0x38,0x0a,0x7e] -v_mov_b16_e32 v0.l, 0x1234 -// GFX11: v_mov_b16_e32 v0.l, 0x1234 ; encoding: [0xff,0x38,0x00,0x7e,0x34,0x12,0x00,0x00] +v_mov_b16_e32 v5.l, vcc_hi +// GFX11: v_mov_b16_e32 v5.l, vcc_hi ; encoding: [0x6b,0x38,0x0a,0x7e] -v_mov_b16_e64 v0.l, v1.l -// GFX11: v_mov_b16_e64 v0.l, v1.l ; encoding: [0x00,0x00,0x9c,0xd5,0x01,0x01,0x00,0x00] +v_mov_b16_e32 v5.l, ttmp15 +// GFX11: v_mov_b16_e32 v5.l, ttmp15 ; encoding: [0x7b,0x38,0x0a,0x7e] -v_mov_b16_e64 v200.l, v1.h -// GFX11: v_mov_b16_e64 v200.l, v1.h op_sel:[1,0] ; encoding: [0xc8,0x08,0x9c,0xd5,0x01,0x01,0x00,0x00] +v_mov_b16_e32 v5.l, m0 +// GFX11: v_mov_b16_e32 v5.l, m0 ; encoding: [0x7d,0x38,0x0a,0x7e] -v_mov_b16_e64 v0.l, s1 -// GFX11: v_mov_b16_e64 v0.l, s1 ; encoding: [0x00,0x00,0x9c,0xd5,0x01,0x00,0x00,0x00] +v_mov_b16_e32 v5.l, exec_lo +// GFX11: v_mov_b16_e32 v5.l, exec_lo ; encoding: [0x7e,0x38,0x0a,0x7e] -v_mov_b16_e64 v200.h, 1 -// GFX11: v_mov_b16_e64 v200.h, 1 op_sel:[0,1] ; encoding: [0xc8,0x40,0x9c,0xd5,0x81,0x00,0x00,0x00] +v_mov_b16_e32 v5.l, exec_hi +// GFX11: v_mov_b16_e32 v5.l, exec_hi ; encoding: [0x7f,0x38,0x0a,0x7e] -v_mov_b16_e64 v0.l, 0x1234 -// GFX11: v_mov_b16_e64 v0.l, 0x1234 ; encoding: [0x00,0x00,0x9c,0xd5,0xff,0x00,0x00,0x00,0x34,0x12,0x00,0x00] +v_mov_b16_e32 v5.l, null +// GFX11: v_mov_b16_e32 v5.l, null ; encoding: [0x7c,0x38,0x0a,0x7e] + +v_mov_b16_e32 v5.l, -1 +// GFX11: v_mov_b16_e32 v5.l, -1 ; encoding: [0xc1,0x38,0x0a,0x7e] + +v_mov_b16_e32 v5.l, 0.5 +// GFX11: v_mov_b16_e32 v5.l, 0.5 ; encoding: [0xf0,0x38,0x0a,0x7e] + +v_mov_b16_e32 v5.l, src_scc +// GFX11: v_mov_b16_e32 v5.l, src_scc ; encoding: [0xfd,0x38,0x0a,0x7e] + +v_mov_b16_e32 v5.l, 0xfe0b +// GFX11: v_mov_b16_e32 v5.l, 0xfe0b ; encoding: [0xff,0x38,0x0a,0x7e,0x0b,0xfe,0x00,0x00] + +v_mov_b16_e32 v5.h, src_scc +// GFX11: v_mov_b16_e32 v5.h, src_scc ; encoding: [0xfd,0x38,0x0a,0x7f] + +v_mov_b16_e32 v127.h, 0xfe0b +// GFX11: v_mov_b16_e32 v127.h, 0xfe0b ; encoding: [0xff,0x38,0xfe,0x7f,0x0b,0xfe,0x00,0x00] + +v_mov_b16_e64 v5.l, v1.l +// GFX11: v_mov_b16_e64 v5.l, v1.l ; encoding: [0x05,0x00,0x9c,0xd5,0x01,0x01,0x00,0x00] + +v_mov_b16_e64 v5.l, v127.l +// GFX11: v_mov_b16_e64 v5.l, v127.l ; encoding: [0x05,0x00,0x9c,0xd5,0x7f,0x01,0x00,0x00] + +v_mov_b16_e64 v5.l, v1.h +// GFX11: v_mov_b16_e64 v5.l, v1.h op_sel:[1,0] ; encoding: [0x05,0x08,0x9c,0xd5,0x01,0x01,0x00,0x00] + +v_mov_b16_e64 v5.l, v127.h +// GFX11: v_mov_b16_e64 v5.l, v127.h op_sel:[1,0] ; encoding: [0x05,0x08,0x9c,0xd5,0x7f,0x01,0x00,0x00] + +v_mov_b16_e64 v5.l, s1 +// GFX11: v_mov_b16_e64 v5.l, s1 ; encoding: [0x05,0x00,0x9c,0xd5,0x01,0x00,0x00,0x00] + +v_mov_b16_e64 v5.l, s105 +// GFX11: v_mov_b16_e64 v5.l, s105 ; encoding: [0x05,0x00,0x9c,0xd5,0x69,0x00,0x00,0x00] + +v_mov_b16_e64 v5.l, vcc_lo +// GFX11: v_mov_b16_e64 v5.l, vcc_lo ; encoding: [0x05,0x00,0x9c,0xd5,0x6a,0x00,0x00,0x00] + +v_mov_b16_e64 v5.l, vcc_hi +// GFX11: v_mov_b16_e64 v5.l, vcc_hi ; encoding: [0x05,0x00,0x9c,0xd5,0x6b,0x00,0x00,0x00] + +v_mov_b16_e64 v5.l, ttmp15 +// GFX11: v_mov_b16_e64 v5.l, ttmp15 ; encoding: [0x05,0x00,0x9c,0xd5,0x7b,0x00,0x00,0x00] + +v_mov_b16_e64 v5.l, m0 +// GFX11: v_mov_b16_e64 v5.l, m0 ; encoding: [0x05,0x00,0x9c,0xd5,0x7d,0x00,0x00,0x00] + +v_mov_b16_e64 v5.l, exec_lo +// GFX11: v_mov_b16_e64 v5.l, exec_lo ; encoding: [0x05,0x00,0x9c,0xd5,0x7e,0x00,0x00,0x00] + +v_mov_b16_e64 v5.l, exec_hi +// GFX11: v_mov_b16_e64 v5.l, exec_hi ; encoding: [0x05,0x00,0x9c,0xd5,0x7f,0x00,0x00,0x00] + +v_mov_b16_e64 v5.l, null +// GFX11: v_mov_b16_e64 v5.l, null ; encoding: [0x05,0x00,0x9c,0xd5,0x7c,0x00,0x00,0x00] + +v_mov_b16_e64 v5.l, -1 +// GFX11: v_mov_b16_e64 v5.l, -1 ; encoding: [0x05,0x00,0x9c,0xd5,0xc1,0x00,0x00,0x00] + +v_mov_b16_e64 v5.l, 0.5 +// GFX11: v_mov_b16_e64 v5.l, 0.5 ; encoding: [0x05,0x00,0x9c,0xd5,0xf0,0x00,0x00,0x00] + +v_mov_b16_e64 v5.l, src_scc +// GFX11: v_mov_b16_e64 v5.l, src_scc ; encoding: [0x05,0x00,0x9c,0xd5,0xfd,0x00,0x00,0x00] + +v_mov_b16_e64 v5.l, 0xfe0b +// GFX11: v_mov_b16_e64 v5.l, 0xfe0b ; encoding: [0x05,0x00,0x9c,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_mov_b16_e64 v5.h, src_scc +// GFX11: v_mov_b16_e64 v5.h, src_scc op_sel:[0,1] ; encoding: [0x05,0x40,0x9c,0xd5,0xfd,0x00,0x00,0x00] + +v_mov_b16_e64 v127.h, 0xfe0b +// GFX11: v_mov_b16_e64 v127.h, 0xfe0b op_sel:[0,1] ; encoding: [0x7f,0x40,0x9c,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00] v_mov_b32 v5, v1 // GFX11: v_mov_b32_e32 v5, v1 ; encoding: [0x01,0x03,0x0a,0x7e] @@ -2780,6 +3102,12 @@ v_not_b16 v5.l, v1.l v_not_b16 v5.l, v127.l // GFX11: v_not_b16_e32 v5.l, v127.l ; encoding: [0x7f,0xd3,0x0a,0x7e] +v_not_b16 v5.l, v1.h +// GFX11: v_not_b16_e32 v5.l, v1.h ; encoding: [0x81,0xd3,0x0a,0x7e] + +v_not_b16 v5.l, v127.h +// GFX11: v_not_b16_e32 v5.l, v127.h ; encoding: [0xff,0xd3,0x0a,0x7e] + v_not_b16 v5.l, s1 // GFX11: v_not_b16_e32 v5.l, s1 ; encoding: [0x01,0xd2,0x0a,0x7e] @@ -2816,17 +3144,8 @@ v_not_b16 v5.l, 0.5 v_not_b16 v5.l, src_scc // GFX11: v_not_b16_e32 v5.l, src_scc ; encoding: [0xfd,0xd2,0x0a,0x7e] -v_not_b16 v127.l, 0xfe0b -// GFX11: v_not_b16_e32 v127.l, 0xfe0b ; encoding: [0xff,0xd2,0xfe,0x7e,0x0b,0xfe,0x00,0x00] - -v_not_b16 v5.l, v1.h -// GFX11: v_not_b16_e32 v5.l, v1.h ; encoding: [0x81,0xd3,0x0a,0x7e] - -v_not_b16 v5.l, v127.h -// GFX11: v_not_b16_e32 v5.l, v127.h ; encoding: [0xff,0xd3,0x0a,0x7e] - -v_not_b16 v127.l, 0.5 -// GFX11: v_not_b16_e32 v127.l, 0.5 ; encoding: [0xf0,0xd2,0xfe,0x7e] +v_not_b16 v5.l, 0xfe0b +// GFX11: v_not_b16_e32 v5.l, 0xfe0b ; encoding: [0xff,0xd2,0x0a,0x7e,0x0b,0xfe,0x00,0x00] v_not_b16 v5.h, src_scc // GFX11: v_not_b16_e32 v5.h, src_scc ; encoding: [0xfd,0xd2,0x0a,0x7f] @@ -2930,8 +3249,14 @@ v_rcp_f16 v5.l, null v_rcp_f16 v5.l, -1 // GFX11: v_rcp_f16_e32 v5.l, -1 ; encoding: [0xc1,0xa8,0x0a,0x7e] -v_rcp_f16 v127.l, 0.5 -// GFX11: v_rcp_f16_e32 v127.l, 0.5 ; encoding: [0xf0,0xa8,0xfe,0x7e] +v_rcp_f16 v5.l, 0.5 +// GFX11: v_rcp_f16_e32 v5.l, 0.5 ; encoding: [0xf0,0xa8,0x0a,0x7e] + +v_rcp_f16 v5.l, src_scc +// GFX11: v_rcp_f16_e32 v5.l, src_scc ; encoding: [0xfd,0xa8,0x0a,0x7e] + +v_rcp_f16 v5.l, 0xfe0b +// GFX11: v_rcp_f16_e32 v5.l, 0xfe0b ; encoding: [0xff,0xa8,0x0a,0x7e,0x0b,0xfe,0x00,0x00] v_rcp_f16 v5.h, src_scc // GFX11: v_rcp_f16_e32 v5.h, src_scc ; encoding: [0xfd,0xa8,0x0a,0x7f] @@ -3089,6 +3414,12 @@ v_rndne_f16 v5.l, v1.l v_rndne_f16 v5.l, v127.l // GFX11: v_rndne_f16_e32 v5.l, v127.l ; encoding: [0x7f,0xbd,0x0a,0x7e] +v_rndne_f16 v5.l, v1.h +// GFX11: v_rndne_f16_e32 v5.l, v1.h ; encoding: [0x81,0xbd,0x0a,0x7e] + +v_rndne_f16 v5.l, v127.h +// GFX11: v_rndne_f16_e32 v5.l, v127.h ; encoding: [0xff,0xbd,0x0a,0x7e] + v_rndne_f16 v5.l, s1 // GFX11: v_rndne_f16_e32 v5.l, s1 ; encoding: [0x01,0xbc,0x0a,0x7e] @@ -3125,17 +3456,8 @@ v_rndne_f16 v5.l, 0.5 v_rndne_f16 v5.l, src_scc // GFX11: v_rndne_f16_e32 v5.l, src_scc ; encoding: [0xfd,0xbc,0x0a,0x7e] -v_rndne_f16 v127.l, 0xfe0b -// GFX11: v_rndne_f16_e32 v127.l, 0xfe0b ; encoding: [0xff,0xbc,0xfe,0x7e,0x0b,0xfe,0x00,0x00] - -v_rndne_f16 v5.l, v1.h -// GFX11: v_rndne_f16_e32 v5.l, v1.h ; encoding: [0x81,0xbd,0x0a,0x7e] - -v_rndne_f16 v5.l, v127.h -// GFX11: v_rndne_f16_e32 v5.l, v127.h ; encoding: [0xff,0xbd,0x0a,0x7e] - -v_rndne_f16 v127.l, 0.5 -// GFX11: v_rndne_f16_e32 v127.l, 0.5 ; encoding: [0xf0,0xbc,0xfe,0x7e] +v_rndne_f16 v5.l, 0xfe0b +// GFX11: v_rndne_f16_e32 v5.l, 0xfe0b ; encoding: [0xff,0xbc,0x0a,0x7e,0x0b,0xfe,0x00,0x00] v_rndne_f16 v5.h, src_scc // GFX11: v_rndne_f16_e32 v5.h, src_scc ; encoding: [0xfd,0xbc,0x0a,0x7f] @@ -3266,8 +3588,14 @@ v_rsq_f16 v5.l, null v_rsq_f16 v5.l, -1 // GFX11: v_rsq_f16_e32 v5.l, -1 ; encoding: [0xc1,0xac,0x0a,0x7e] -v_rsq_f16 v127.l, 0.5 -// GFX11: v_rsq_f16_e32 v127.l, 0.5 ; encoding: [0xf0,0xac,0xfe,0x7e] +v_rsq_f16 v5.l, 0.5 +// GFX11: v_rsq_f16_e32 v5.l, 0.5 ; encoding: [0xf0,0xac,0x0a,0x7e] + +v_rsq_f16 v5.l, src_scc +// GFX11: v_rsq_f16_e32 v5.l, src_scc ; encoding: [0xfd,0xac,0x0a,0x7e] + +v_rsq_f16 v5.l, 0xfe0b +// GFX11: v_rsq_f16_e32 v5.l, 0xfe0b ; encoding: [0xff,0xac,0x0a,0x7e,0x0b,0xfe,0x00,0x00] v_rsq_f16 v5.h, src_scc // GFX11: v_rsq_f16_e32 v5.h, src_scc ; encoding: [0xfd,0xac,0x0a,0x7f] @@ -3416,6 +3744,12 @@ v_sin_f16 v5.l, v1.l v_sin_f16 v5.l, v127.l // GFX11: v_sin_f16_e32 v5.l, v127.l ; encoding: [0x7f,0xc1,0x0a,0x7e] +v_sin_f16 v5.l, v1.h +// GFX11: v_sin_f16_e32 v5.l, v1.h ; encoding: [0x81,0xc1,0x0a,0x7e] + +v_sin_f16 v5.l, v127.h +// GFX11: v_sin_f16_e32 v5.l, v127.h ; encoding: [0xff,0xc1,0x0a,0x7e] + v_sin_f16 v5.l, s1 // GFX11: v_sin_f16_e32 v5.l, s1 ; encoding: [0x01,0xc0,0x0a,0x7e] @@ -3452,17 +3786,8 @@ v_sin_f16 v5.l, 0.5 v_sin_f16 v5.l, src_scc // GFX11: v_sin_f16_e32 v5.l, src_scc ; encoding: [0xfd,0xc0,0x0a,0x7e] -v_sin_f16 v127.l, 0xfe0b -// GFX11: v_sin_f16_e32 v127.l, 0xfe0b ; encoding: [0xff,0xc0,0xfe,0x7e,0x0b,0xfe,0x00,0x00] - -v_sin_f16 v5.l, v1.h -// GFX11: v_sin_f16_e32 v5.l, v1.h ; encoding: [0x81,0xc1,0x0a,0x7e] - -v_sin_f16 v5.l, v127.h -// GFX11: v_sin_f16_e32 v5.l, v127.h ; encoding: [0xff,0xc1,0x0a,0x7e] - -v_sin_f16 v127.l, 0.5 -// GFX11: v_sin_f16_e32 v127.l, 0.5 ; encoding: [0xf0,0xc0,0xfe,0x7e] +v_sin_f16 v5.l, 0xfe0b +// GFX11: v_sin_f16_e32 v5.l, 0xfe0b ; encoding: [0xff,0xc0,0x0a,0x7e,0x0b,0xfe,0x00,0x00] v_sin_f16 v5.h, src_scc // GFX11: v_sin_f16_e32 v5.h, src_scc ; encoding: [0xfd,0xc0,0x0a,0x7f] @@ -3557,8 +3882,14 @@ v_sqrt_f16 v5.l, null v_sqrt_f16 v5.l, -1 // GFX11: v_sqrt_f16_e32 v5.l, -1 ; encoding: [0xc1,0xaa,0x0a,0x7e] -v_sqrt_f16 v127.l, 0.5 -// GFX11: v_sqrt_f16_e32 v127.l, 0.5 ; encoding: [0xf0,0xaa,0xfe,0x7e] +v_sqrt_f16 v5.l, 0.5 +// GFX11: v_sqrt_f16_e32 v5.l, 0.5 ; encoding: [0xf0,0xaa,0x0a,0x7e] + +v_sqrt_f16 v5.l, src_scc +// GFX11: v_sqrt_f16_e32 v5.l, src_scc ; encoding: [0xfd,0xaa,0x0a,0x7e] + +v_sqrt_f16 v5.l, 0xfe0b +// GFX11: v_sqrt_f16_e32 v5.l, 0xfe0b ; encoding: [0xff,0xaa,0x0a,0x7e,0x0b,0xfe,0x00,0x00] v_sqrt_f16 v5.h, src_scc // GFX11: v_sqrt_f16_e32 v5.h, src_scc ; encoding: [0xfd,0xaa,0x0a,0x7f] @@ -3674,6 +4005,12 @@ v_trunc_f16 v5.l, v1.l v_trunc_f16 v5.l, v127.l // GFX11: v_trunc_f16_e32 v5.l, v127.l ; encoding: [0x7f,0xbb,0x0a,0x7e] +v_trunc_f16 v5.l, v1.h +// GFX11: v_trunc_f16_e32 v5.l, v1.h ; encoding: [0x81,0xbb,0x0a,0x7e] + +v_trunc_f16 v5.l, v127.h +// GFX11: v_trunc_f16_e32 v5.l, v127.h ; encoding: [0xff,0xbb,0x0a,0x7e] + v_trunc_f16 v5.l, s1 // GFX11: v_trunc_f16_e32 v5.l, s1 ; encoding: [0x01,0xba,0x0a,0x7e] @@ -3710,17 +4047,8 @@ v_trunc_f16 v5.l, 0.5 v_trunc_f16 v5.l, src_scc // GFX11: v_trunc_f16_e32 v5.l, src_scc ; encoding: [0xfd,0xba,0x0a,0x7e] -v_trunc_f16 v127.l, 0xfe0b -// GFX11: v_trunc_f16_e32 v127.l, 0xfe0b ; encoding: [0xff,0xba,0xfe,0x7e,0x0b,0xfe,0x00,0x00] - -v_trunc_f16 v5.l, v1.h -// GFX11: v_trunc_f16_e32 v5.l, v1.h ; encoding: [0x81,0xbb,0x0a,0x7e] - -v_trunc_f16 v5.l, v127.h -// GFX11: v_trunc_f16_e32 v5.l, v127.h ; encoding: [0xff,0xbb,0x0a,0x7e] - -v_trunc_f16 v127.l, 0.5 -// GFX11: v_trunc_f16_e32 v127.l, 0.5 ; encoding: [0xf0,0xba,0xfe,0x7e] +v_trunc_f16 v5.l, 0xfe0b +// GFX11: v_trunc_f16_e32 v5.l, 0xfe0b ; encoding: [0xff,0xba,0x0a,0x7e,0x0b,0xfe,0x00,0x00] v_trunc_f16 v5.h, src_scc // GFX11: v_trunc_f16_e32 v5.h, src_scc ; encoding: [0xfd,0xba,0x0a,0x7f] @@ -3808,9 +4136,3 @@ v_trunc_f64 v[5:6], src_scc v_trunc_f64 v[254:255], 0xaf123456 // GFX11: v_trunc_f64_e32 v[254:255], 0xaf123456 ; encoding: [0xff,0x2e,0xfc,0x7f,0x56,0x34,0x12,0xaf] - -v_trunc_f16 v[5].l, v[1].h -// GFX11: v_trunc_f16_e32 v5.l, v1.h ; encoding: [0x81,0xbb,0x0a,0x7e] - -v_trunc_f16 v[5:5].l, v[1:1].h -// GFX11: v_trunc_f16_e32 v5.l, v1.h ; encoding: [0x81,0xbb,0x0a,0x7e] diff --git a/llvm/test/Transforms/Attributor/dereferenceable-1.ll b/llvm/test/Transforms/Attributor/dereferenceable-1.ll index 5bff2a2e6b208..246a8c42ba912 100644 --- a/llvm/test/Transforms/Attributor/dereferenceable-1.ll +++ b/llvm/test/Transforms/Attributor/dereferenceable-1.ll @@ -555,12 +555,10 @@ cont2: ; *ptr = 4; ; } ; } -; -; FIXME: %ptr should be dereferenceable(4) define dso_local void @rec-branch-1(i32 %a, i32 %b, i32 %c, ptr %ptr) { ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@rec-branch-1 -; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]], ptr nofree writeonly captures(none) [[PTR:%.*]]) #[[ATTR3]] { +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]], ptr nofree nonnull writeonly align 4 captures(none) dereferenceable(4) [[PTR:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[A]], 0 ; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_ELSE3:%.*]], label [[IF_THEN:%.*]] @@ -630,11 +628,10 @@ if.end8: ; preds = %if.then5, %if.else6 ; rec-branch-2(1, 1, 1, ptr); ; } ; } -; FIXME: %ptr should be dereferenceable(4) define dso_local void @rec-branch-2(i32 %a, i32 %b, i32 %c, ptr %ptr) { ; CHECK: Function Attrs: nofree nosync nounwind memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@rec-branch-2 -; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]], ptr nofree writeonly captures(none) [[PTR:%.*]]) #[[ATTR5:[0-9]+]] { +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]], ptr nofree nonnull writeonly align 4 captures(none) dereferenceable(4) [[PTR:%.*]]) #[[ATTR5:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[A]], 0 ; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_ELSE3:%.*]], label [[IF_THEN:%.*]] @@ -654,7 +651,7 @@ define dso_local void @rec-branch-2(i32 %a, i32 %b, i32 %c, ptr %ptr) { ; CHECK-NEXT: store i32 3, ptr [[PTR]], align 4 ; CHECK-NEXT: br label [[IF_END8]] ; CHECK: if.else6: -; CHECK-NEXT: tail call void @rec-branch-2(i32 noundef 1, i32 noundef 1, i32 noundef 1, ptr nofree writeonly captures(none) [[PTR]]) #[[ATTR8:[0-9]+]] +; CHECK-NEXT: tail call void @rec-branch-2(i32 noundef 1, i32 noundef 1, i32 noundef 1, ptr nofree nonnull writeonly align 4 captures(none) dereferenceable(4) [[PTR]]) #[[ATTR8:[0-9]+]] ; CHECK-NEXT: br label [[IF_END8]] ; CHECK: if.end8: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/Attributor/nonnull.ll b/llvm/test/Transforms/Attributor/nonnull.ll index 2ff8a3fa3a688..57a6d09af64fa 100644 --- a/llvm/test/Transforms/Attributor/nonnull.ll +++ b/llvm/test/Transforms/Attributor/nonnull.ll @@ -32,16 +32,27 @@ define ptr @test2(ptr nonnull %p) { } define ptr @test2A(i1 %c, ptr %ret) { -; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: write) -; CHECK-LABEL: define {{[^@]+}}@test2A -; CHECK-SAME: (i1 noundef [[C:%.*]], ptr nofree nonnull readnone returned "no-capture-maybe-returned" [[RET:%.*]]) #[[ATTR2:[0-9]+]] { -; CHECK-NEXT: br i1 [[C]], label [[A:%.*]], label [[B:%.*]] -; CHECK: A: -; CHECK-NEXT: call void @llvm.assume(i1 noundef true) #[[ATTR16:[0-9]+]] [ "nonnull"(ptr [[RET]]) ] -; CHECK-NEXT: ret ptr [[RET]] -; CHECK: B: -; CHECK-NEXT: call void @llvm.assume(i1 noundef true) #[[ATTR16]] [ "nonnull"(ptr [[RET]]) ] -; CHECK-NEXT: ret ptr [[RET]] +; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: write) +; TUNIT-LABEL: define {{[^@]+}}@test2A +; TUNIT-SAME: (i1 noundef [[C:%.*]], ptr nofree nonnull readnone returned "no-capture-maybe-returned" [[RET:%.*]]) #[[ATTR2:[0-9]+]] { +; TUNIT-NEXT: br i1 [[C]], label [[A:%.*]], label [[B:%.*]] +; TUNIT: A: +; TUNIT-NEXT: call void @llvm.assume(i1 noundef true) #[[ATTR15:[0-9]+]] [ "nonnull"(ptr [[RET]]) ] +; TUNIT-NEXT: ret ptr [[RET]] +; TUNIT: B: +; TUNIT-NEXT: call void @llvm.assume(i1 noundef true) #[[ATTR15]] [ "nonnull"(ptr [[RET]]) ] +; TUNIT-NEXT: ret ptr [[RET]] +; +; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: write) +; CGSCC-LABEL: define {{[^@]+}}@test2A +; CGSCC-SAME: (i1 noundef [[C:%.*]], ptr nofree nonnull readnone returned "no-capture-maybe-returned" [[RET:%.*]]) #[[ATTR2:[0-9]+]] { +; CGSCC-NEXT: br i1 [[C]], label [[A:%.*]], label [[B:%.*]] +; CGSCC: A: +; CGSCC-NEXT: call void @llvm.assume(i1 noundef true) #[[ATTR16:[0-9]+]] [ "nonnull"(ptr [[RET]]) ] +; CGSCC-NEXT: ret ptr [[RET]] +; CGSCC: B: +; CGSCC-NEXT: call void @llvm.assume(i1 noundef true) #[[ATTR16]] [ "nonnull"(ptr [[RET]]) ] +; CGSCC-NEXT: ret ptr [[RET]] ; br i1 %c, label %A, label %B A: @@ -53,16 +64,27 @@ B: } define ptr @test2B(i1 %c, ptr %ret) { -; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: write) -; CHECK-LABEL: define {{[^@]+}}@test2B -; CHECK-SAME: (i1 noundef [[C:%.*]], ptr nofree nonnull readnone returned dereferenceable(4) "no-capture-maybe-returned" [[RET:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: br i1 [[C]], label [[A:%.*]], label [[B:%.*]] -; CHECK: A: -; CHECK-NEXT: call void @llvm.assume(i1 noundef true) #[[ATTR16]] [ "dereferenceable"(ptr [[RET]], i32 4) ] -; CHECK-NEXT: ret ptr [[RET]] -; CHECK: B: -; CHECK-NEXT: call void @llvm.assume(i1 noundef true) #[[ATTR16]] [ "dereferenceable"(ptr [[RET]], i32 4) ] -; CHECK-NEXT: ret ptr [[RET]] +; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: write) +; TUNIT-LABEL: define {{[^@]+}}@test2B +; TUNIT-SAME: (i1 noundef [[C:%.*]], ptr nofree nonnull readnone returned dereferenceable(4) "no-capture-maybe-returned" [[RET:%.*]]) #[[ATTR2]] { +; TUNIT-NEXT: br i1 [[C]], label [[A:%.*]], label [[B:%.*]] +; TUNIT: A: +; TUNIT-NEXT: call void @llvm.assume(i1 noundef true) #[[ATTR15]] [ "dereferenceable"(ptr [[RET]], i32 4) ] +; TUNIT-NEXT: ret ptr [[RET]] +; TUNIT: B: +; TUNIT-NEXT: call void @llvm.assume(i1 noundef true) #[[ATTR15]] [ "dereferenceable"(ptr [[RET]], i32 4) ] +; TUNIT-NEXT: ret ptr [[RET]] +; +; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: write) +; CGSCC-LABEL: define {{[^@]+}}@test2B +; CGSCC-SAME: (i1 noundef [[C:%.*]], ptr nofree nonnull readnone returned dereferenceable(4) "no-capture-maybe-returned" [[RET:%.*]]) #[[ATTR2]] { +; CGSCC-NEXT: br i1 [[C]], label [[A:%.*]], label [[B:%.*]] +; CGSCC: A: +; CGSCC-NEXT: call void @llvm.assume(i1 noundef true) #[[ATTR16]] [ "dereferenceable"(ptr [[RET]], i32 4) ] +; CGSCC-NEXT: ret ptr [[RET]] +; CGSCC: B: +; CGSCC-NEXT: call void @llvm.assume(i1 noundef true) #[[ATTR16]] [ "dereferenceable"(ptr [[RET]], i32 4) ] +; CGSCC-NEXT: ret ptr [[RET]] ; br i1 %c, label %A, label %B A: @@ -273,13 +295,21 @@ define ptr @test9(ptr %a, i64 %n) { ; ATTRIBUTOR_OPM: define ptr @test10 ; ATTRIBUTOR_NPM: define nonnull ptr @test10 define ptr @test10(ptr %a, i64 %n) { -; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: write) -; CHECK-LABEL: define {{[^@]+}}@test10 -; CHECK-SAME: (ptr nofree readnone "no-capture-maybe-returned" [[A:%.*]], i64 [[N:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 [[N]], 0 -; CHECK-NEXT: call void @llvm.assume(i1 noundef [[CMP]]) #[[ATTR16]] -; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[N]] -; CHECK-NEXT: ret ptr [[B]] +; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: write) +; TUNIT-LABEL: define {{[^@]+}}@test10 +; TUNIT-SAME: (ptr nofree readnone "no-capture-maybe-returned" [[A:%.*]], i64 [[N:%.*]]) #[[ATTR2]] { +; TUNIT-NEXT: [[CMP:%.*]] = icmp ne i64 [[N]], 0 +; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[CMP]]) #[[ATTR15]] +; TUNIT-NEXT: [[B:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[N]] +; TUNIT-NEXT: ret ptr [[B]] +; +; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: write) +; CGSCC-LABEL: define {{[^@]+}}@test10 +; CGSCC-SAME: (ptr nofree readnone "no-capture-maybe-returned" [[A:%.*]], i64 [[N:%.*]]) #[[ATTR2]] { +; CGSCC-NEXT: [[CMP:%.*]] = icmp ne i64 [[N]], 0 +; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[CMP]]) #[[ATTR16]] +; CGSCC-NEXT: [[B:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[N]] +; CGSCC-NEXT: ret ptr [[B]] ; %cmp = icmp ne i64 %n, 0 call void @llvm.assume(i1 %cmp) @@ -392,50 +422,22 @@ declare nonnull ptr @nonnull() define internal ptr @f1(ptr %arg) { -; FIXME: missing nonnull It should be nonnull @f1(ptr nonnull readonly %arg) -; TUNIT: Function Attrs: nofree nosync nounwind memory(argmem: read) -; TUNIT-LABEL: define {{[^@]+}}@f1 -; TUNIT-SAME: (ptr nofree readonly [[ARG:%.*]]) #[[ATTR6:[0-9]+]] { -; TUNIT-NEXT: bb: -; TUNIT-NEXT: [[TMP:%.*]] = icmp eq ptr [[ARG]], null -; TUNIT-NEXT: br i1 [[TMP]], label [[BB9:%.*]], label [[BB1:%.*]] -; TUNIT: bb1: -; TUNIT-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARG]], align 4 -; TUNIT-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0 -; TUNIT-NEXT: br i1 [[TMP3]], label [[BB6:%.*]], label [[BB4:%.*]] -; TUNIT: bb4: -; TUNIT-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[ARG]], i64 1 -; TUNIT-NEXT: [[TMP5B:%.*]] = tail call ptr @f3(ptr nofree nonnull readonly [[TMP5]]) #[[ATTR17:[0-9]+]] -; TUNIT-NEXT: [[TMP5C:%.*]] = getelementptr inbounds i32, ptr [[TMP5B]], i64 -1 -; TUNIT-NEXT: br label [[BB9]] -; TUNIT: bb6: -; TUNIT-NEXT: [[TMP7:%.*]] = tail call ptr @f2(ptr nofree nonnull readonly align 4 dereferenceable(4) [[ARG]]) #[[ATTR17]] -; TUNIT-NEXT: ret ptr [[TMP7]] -; TUNIT: bb9: -; TUNIT-NEXT: [[TMP10:%.*]] = phi ptr [ [[TMP5C]], [[BB4]] ], [ inttoptr (i64 4 to ptr), [[BB:%.*]] ] -; TUNIT-NEXT: ret ptr [[TMP10]] -; -; CGSCC: Function Attrs: nofree nosync nounwind memory(argmem: read) +; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(argmem: read) ; CGSCC-LABEL: define {{[^@]+}}@f1 -; CGSCC-SAME: (ptr nofree readonly [[ARG:%.*]]) #[[ATTR5:[0-9]+]] { +; CGSCC-SAME: (ptr nofree nonnull readonly align 4 captures(none) dereferenceable(4) [[ARG:%.*]]) #[[ATTR5:[0-9]+]] { ; CGSCC-NEXT: bb: -; CGSCC-NEXT: [[TMP:%.*]] = icmp eq ptr [[ARG]], null -; CGSCC-NEXT: br i1 [[TMP]], label [[BB9:%.*]], label [[BB1:%.*]] +; CGSCC-NEXT: br label [[BB1:%.*]] ; CGSCC: bb1: -; CGSCC-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARG]], align 4 +; CGSCC-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARG]], align 4, !invariant.load [[META0:![0-9]+]] ; CGSCC-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0 ; CGSCC-NEXT: br i1 [[TMP3]], label [[BB6:%.*]], label [[BB4:%.*]] ; CGSCC: bb4: -; CGSCC-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[ARG]], i64 1 -; CGSCC-NEXT: [[TMP5B:%.*]] = tail call ptr @f3(ptr nofree nonnull readonly [[TMP5]]) #[[ATTR17:[0-9]+]] -; CGSCC-NEXT: [[TMP5C:%.*]] = getelementptr inbounds i32, ptr [[TMP5B]], i64 -1 -; CGSCC-NEXT: br label [[BB9]] +; CGSCC-NEXT: [[TMP5C:%.*]] = getelementptr inbounds i32, ptr undef, i64 -1 +; CGSCC-NEXT: br label [[BB9:%.*]] ; CGSCC: bb6: -; CGSCC-NEXT: [[TMP7:%.*]] = tail call ptr @f2(ptr nofree nonnull readonly align 4 dereferenceable(4) [[ARG]]) #[[ATTR17]] -; CGSCC-NEXT: ret ptr [[TMP7]] +; CGSCC-NEXT: ret ptr undef ; CGSCC: bb9: -; CGSCC-NEXT: [[TMP10:%.*]] = phi ptr [ [[TMP5C]], [[BB4]] ], [ inttoptr (i64 4 to ptr), [[BB:%.*]] ] -; CGSCC-NEXT: ret ptr [[TMP10]] +; CGSCC-NEXT: ret ptr undef ; bb: @@ -463,19 +465,11 @@ bb9: ; preds = %bb4, %bb } define internal ptr @f2(ptr %arg) { -; TUNIT: Function Attrs: nofree nosync nounwind memory(argmem: read) -; TUNIT-LABEL: define {{[^@]+}}@f2 -; TUNIT-SAME: (ptr nofree nonnull readonly align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR6]] { -; TUNIT-NEXT: bb: -; TUNIT-NEXT: [[TMP:%.*]] = tail call ptr @f1(ptr nofree readonly [[ARG]]) #[[ATTR17]] -; TUNIT-NEXT: ret ptr [[TMP]] -; -; CGSCC: Function Attrs: nofree nosync nounwind memory(argmem: read) +; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@f2 -; CGSCC-SAME: (ptr nofree nonnull readonly align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR5]] { +; CGSCC-SAME: (ptr noalias nofree nonnull readnone align 4 captures(none) dereferenceable(4) [[ARG:%.*]]) #[[ATTR6:[0-9]+]] { ; CGSCC-NEXT: bb: -; CGSCC-NEXT: [[TMP:%.*]] = tail call ptr @f1(ptr nofree readonly [[ARG]]) #[[ATTR17]] -; CGSCC-NEXT: ret ptr [[TMP]] +; CGSCC-NEXT: ret ptr undef ; bb: %tmp = tail call ptr @f1(ptr %arg) @@ -484,19 +478,17 @@ bb: define dso_local noalias ptr @f3(ptr %arg) { ; FIXME: missing nonnull. It should be nonnull @f3(ptr nonnull readonly %arg) -; TUNIT: Function Attrs: nofree nosync nounwind memory(argmem: read) +; TUNIT: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@f3 -; TUNIT-SAME: (ptr nofree readonly [[ARG:%.*]]) #[[ATTR6]] { +; TUNIT-SAME: (ptr nofree readnone captures(none) [[ARG:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: bb: -; TUNIT-NEXT: [[TMP:%.*]] = call ptr @f1(ptr nofree readonly [[ARG]]) #[[ATTR17]] -; TUNIT-NEXT: ret ptr [[TMP]] +; TUNIT-NEXT: ret ptr undef ; -; CGSCC: Function Attrs: nofree nosync nounwind memory(argmem: read) +; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@f3 -; CGSCC-SAME: (ptr nofree readonly [[ARG:%.*]]) #[[ATTR5]] { +; CGSCC-SAME: (ptr nofree readnone captures(none) [[ARG:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: bb: -; CGSCC-NEXT: [[TMP:%.*]] = call ptr @f1(ptr nofree readonly [[ARG]]) #[[ATTR17]] -; CGSCC-NEXT: ret ptr [[TMP]] +; CGSCC-NEXT: ret ptr undef ; bb: ; FIXME: missing nonnull. It should be @f1(ptr nonnull readonly %arg) @@ -529,26 +521,26 @@ declare void @fun3(ptr, ptr, ptr) #1 define void @f16(ptr %a, ptr %b, i8 %c) { ; TUNIT: Function Attrs: mustprogress nounwind willreturn ; TUNIT-LABEL: define {{[^@]+}}@f16 -; TUNIT-SAME: (ptr nonnull [[A:%.*]], ptr [[B:%.*]], i8 [[C:%.*]]) #[[ATTR8:[0-9]+]] { +; TUNIT-SAME: (ptr nonnull [[A:%.*]], ptr [[B:%.*]], i8 [[C:%.*]]) #[[ATTR7:[0-9]+]] { ; TUNIT-NEXT: [[CMP:%.*]] = icmp eq i8 [[C]], 0 ; TUNIT-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; TUNIT: if.then: -; TUNIT-NEXT: tail call void @fun2(ptr nonnull [[A]], ptr nonnull [[B]]) #[[ATTR7:[0-9]+]] +; TUNIT-NEXT: tail call void @fun2(ptr nonnull [[A]], ptr nonnull [[B]]) #[[ATTR6:[0-9]+]] ; TUNIT-NEXT: ret void ; TUNIT: if.else: -; TUNIT-NEXT: tail call void @fun2(ptr nonnull [[A]], ptr [[B]]) #[[ATTR7]] +; TUNIT-NEXT: tail call void @fun2(ptr nonnull [[A]], ptr [[B]]) #[[ATTR6]] ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: mustprogress nounwind willreturn ; CGSCC-LABEL: define {{[^@]+}}@f16 -; CGSCC-SAME: (ptr nonnull [[A:%.*]], ptr [[B:%.*]], i8 [[C:%.*]]) #[[ATTR7:[0-9]+]] { +; CGSCC-SAME: (ptr nonnull [[A:%.*]], ptr [[B:%.*]], i8 [[C:%.*]]) #[[ATTR8:[0-9]+]] { ; CGSCC-NEXT: [[CMP:%.*]] = icmp eq i8 [[C]], 0 ; CGSCC-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CGSCC: if.then: -; CGSCC-NEXT: tail call void @fun2(ptr nonnull [[A]], ptr nonnull [[B]]) #[[ATTR6:[0-9]+]] +; CGSCC-NEXT: tail call void @fun2(ptr nonnull [[A]], ptr nonnull [[B]]) #[[ATTR7:[0-9]+]] ; CGSCC-NEXT: ret void ; CGSCC: if.else: -; CGSCC-NEXT: tail call void @fun2(ptr nonnull [[A]], ptr [[B]]) #[[ATTR6]] +; CGSCC-NEXT: tail call void @fun2(ptr nonnull [[A]], ptr [[B]]) #[[ATTR7]] ; CGSCC-NEXT: ret void ; %cmp = icmp eq i8 %c, 0 @@ -571,32 +563,32 @@ define void @f17(ptr %a, i8 %c) { ; ; TUNIT: Function Attrs: mustprogress nounwind willreturn ; TUNIT-LABEL: define {{[^@]+}}@f17 -; TUNIT-SAME: (ptr nonnull [[A:%.*]], i8 [[C:%.*]]) #[[ATTR8]] { +; TUNIT-SAME: (ptr nonnull [[A:%.*]], i8 [[C:%.*]]) #[[ATTR7]] { ; TUNIT-NEXT: [[CMP:%.*]] = icmp eq i8 [[C]], 0 ; TUNIT-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; TUNIT: if.then: -; TUNIT-NEXT: tail call void @fun0() #[[ATTR7]] +; TUNIT-NEXT: tail call void @fun0() #[[ATTR6]] ; TUNIT-NEXT: br label [[CONT:%.*]] ; TUNIT: if.else: -; TUNIT-NEXT: tail call void @fun0() #[[ATTR7]] +; TUNIT-NEXT: tail call void @fun0() #[[ATTR6]] ; TUNIT-NEXT: br label [[CONT]] ; TUNIT: cont: -; TUNIT-NEXT: tail call void @fun1(ptr nonnull [[A]]) #[[ATTR7]] +; TUNIT-NEXT: tail call void @fun1(ptr nonnull [[A]]) #[[ATTR6]] ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: mustprogress nounwind willreturn ; CGSCC-LABEL: define {{[^@]+}}@f17 -; CGSCC-SAME: (ptr nonnull [[A:%.*]], i8 [[C:%.*]]) #[[ATTR7]] { +; CGSCC-SAME: (ptr nonnull [[A:%.*]], i8 [[C:%.*]]) #[[ATTR8]] { ; CGSCC-NEXT: [[CMP:%.*]] = icmp eq i8 [[C]], 0 ; CGSCC-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CGSCC: if.then: -; CGSCC-NEXT: tail call void @fun0() #[[ATTR6]] +; CGSCC-NEXT: tail call void @fun0() #[[ATTR7]] ; CGSCC-NEXT: br label [[CONT:%.*]] ; CGSCC: if.else: -; CGSCC-NEXT: tail call void @fun0() #[[ATTR6]] +; CGSCC-NEXT: tail call void @fun0() #[[ATTR7]] ; CGSCC-NEXT: br label [[CONT]] ; CGSCC: cont: -; CGSCC-NEXT: tail call void @fun1(ptr nonnull [[A]]) #[[ATTR6]] +; CGSCC-NEXT: tail call void @fun1(ptr nonnull [[A]]) #[[ATTR7]] ; CGSCC-NEXT: ret void ; %cmp = icmp eq i8 %c, 0 @@ -625,50 +617,50 @@ cont: define void @f18(ptr %a, ptr %b, i8 %c) { ; TUNIT: Function Attrs: mustprogress nounwind willreturn ; TUNIT-LABEL: define {{[^@]+}}@f18 -; TUNIT-SAME: (ptr nonnull [[A:%.*]], ptr [[B:%.*]], i8 [[C:%.*]]) #[[ATTR8]] { +; TUNIT-SAME: (ptr nonnull [[A:%.*]], ptr [[B:%.*]], i8 [[C:%.*]]) #[[ATTR7]] { ; TUNIT-NEXT: [[CMP1:%.*]] = icmp eq i8 [[C]], 0 ; TUNIT-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; TUNIT: if.then: -; TUNIT-NEXT: tail call void @fun0() #[[ATTR7]] +; TUNIT-NEXT: tail call void @fun0() #[[ATTR6]] ; TUNIT-NEXT: br label [[CONT:%.*]] ; TUNIT: if.else: -; TUNIT-NEXT: tail call void @fun0() #[[ATTR7]] +; TUNIT-NEXT: tail call void @fun0() #[[ATTR6]] ; TUNIT-NEXT: br label [[CONT]] ; TUNIT: cont: ; TUNIT-NEXT: [[CMP2:%.*]] = icmp eq i8 [[C]], 1 ; TUNIT-NEXT: br i1 [[CMP2]], label [[CONT_THEN:%.*]], label [[CONT_ELSE:%.*]] ; TUNIT: cont.then: -; TUNIT-NEXT: tail call void @fun1(ptr nonnull [[B]]) #[[ATTR7]] +; TUNIT-NEXT: tail call void @fun1(ptr nonnull [[B]]) #[[ATTR6]] ; TUNIT-NEXT: br label [[CONT2:%.*]] ; TUNIT: cont.else: -; TUNIT-NEXT: tail call void @fun0() #[[ATTR7]] +; TUNIT-NEXT: tail call void @fun0() #[[ATTR6]] ; TUNIT-NEXT: br label [[CONT2]] ; TUNIT: cont2: -; TUNIT-NEXT: tail call void @fun1(ptr nonnull [[A]]) #[[ATTR7]] +; TUNIT-NEXT: tail call void @fun1(ptr nonnull [[A]]) #[[ATTR6]] ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: mustprogress nounwind willreturn ; CGSCC-LABEL: define {{[^@]+}}@f18 -; CGSCC-SAME: (ptr nonnull [[A:%.*]], ptr [[B:%.*]], i8 [[C:%.*]]) #[[ATTR7]] { +; CGSCC-SAME: (ptr nonnull [[A:%.*]], ptr [[B:%.*]], i8 [[C:%.*]]) #[[ATTR8]] { ; CGSCC-NEXT: [[CMP1:%.*]] = icmp eq i8 [[C]], 0 ; CGSCC-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CGSCC: if.then: -; CGSCC-NEXT: tail call void @fun0() #[[ATTR6]] +; CGSCC-NEXT: tail call void @fun0() #[[ATTR7]] ; CGSCC-NEXT: br label [[CONT:%.*]] ; CGSCC: if.else: -; CGSCC-NEXT: tail call void @fun0() #[[ATTR6]] +; CGSCC-NEXT: tail call void @fun0() #[[ATTR7]] ; CGSCC-NEXT: br label [[CONT]] ; CGSCC: cont: ; CGSCC-NEXT: [[CMP2:%.*]] = icmp eq i8 [[C]], 1 ; CGSCC-NEXT: br i1 [[CMP2]], label [[CONT_THEN:%.*]], label [[CONT_ELSE:%.*]] ; CGSCC: cont.then: -; CGSCC-NEXT: tail call void @fun1(ptr nonnull [[B]]) #[[ATTR6]] +; CGSCC-NEXT: tail call void @fun1(ptr nonnull [[B]]) #[[ATTR7]] ; CGSCC-NEXT: br label [[CONT2:%.*]] ; CGSCC: cont.else: -; CGSCC-NEXT: tail call void @fun0() #[[ATTR6]] +; CGSCC-NEXT: tail call void @fun0() #[[ATTR7]] ; CGSCC-NEXT: br label [[CONT2]] ; CGSCC: cont2: -; CGSCC-NEXT: tail call void @fun1(ptr nonnull [[A]]) #[[ATTR6]] +; CGSCC-NEXT: tail call void @fun1(ptr nonnull [[A]]) #[[ATTR7]] ; CGSCC-NEXT: ret void ; %cmp1 = icmp eq i8 %c, 0 @@ -857,11 +849,17 @@ define i8 @parent6(ptr %a, ptr %b) { ; The nonnull callsite is guaranteed to execute, so the argument must be nonnull throughout the parent. define i8 @parent7(ptr %a) { -; CHECK-LABEL: define {{[^@]+}}@parent7 -; CHECK-SAME: (ptr nonnull [[A:%.*]]) { -; CHECK-NEXT: [[RET:%.*]] = call i8 @use1safecall(ptr nonnull readonly [[A]]) #[[ATTR18:[0-9]+]] -; CHECK-NEXT: call void @use1nonnull(ptr nonnull [[A]]) -; CHECK-NEXT: ret i8 [[RET]] +; TUNIT-LABEL: define {{[^@]+}}@parent7 +; TUNIT-SAME: (ptr nonnull [[A:%.*]]) { +; TUNIT-NEXT: [[RET:%.*]] = call i8 @use1safecall(ptr nonnull readonly [[A]]) #[[ATTR16:[0-9]+]] +; TUNIT-NEXT: call void @use1nonnull(ptr nonnull [[A]]) +; TUNIT-NEXT: ret i8 [[RET]] +; +; CGSCC-LABEL: define {{[^@]+}}@parent7 +; CGSCC-SAME: (ptr nonnull [[A:%.*]]) { +; CGSCC-NEXT: [[RET:%.*]] = call i8 @use1safecall(ptr nonnull readonly [[A]]) #[[ATTR17:[0-9]+]] +; CGSCC-NEXT: call void @use1nonnull(ptr nonnull [[A]]) +; CGSCC-NEXT: ret i8 [[RET]] ; @@ -931,13 +929,13 @@ define ptr @gep1_no_null_opt(ptr %p) #0 { ; Should't be able to derive nonnull based on gep. ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind null_pointer_is_valid willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@gep1_no_null_opt -; TUNIT-SAME: (ptr nofree readnone "no-capture-maybe-returned" [[P:%.*]]) #[[ATTR10:[0-9]+]] { +; TUNIT-SAME: (ptr nofree readnone "no-capture-maybe-returned" [[P:%.*]]) #[[ATTR9:[0-9]+]] { ; TUNIT-NEXT: [[Q:%.*]] = getelementptr inbounds i32, ptr [[P]], i32 1 ; TUNIT-NEXT: ret ptr [[Q]] ; ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind null_pointer_is_valid willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@gep1_no_null_opt -; CGSCC-SAME: (ptr nofree readnone "no-capture-maybe-returned" [[P:%.*]]) #[[ATTR9:[0-9]+]] { +; CGSCC-SAME: (ptr nofree readnone "no-capture-maybe-returned" [[P:%.*]]) #[[ATTR10:[0-9]+]] { ; CGSCC-NEXT: [[Q:%.*]] = getelementptr inbounds i32, ptr [[P]], i32 1 ; CGSCC-NEXT: ret ptr [[Q]] ; @@ -983,8 +981,8 @@ define ptr @g1() { ; ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@g1 -; CGSCC-SAME: () #[[ATTR10:[0-9]+]] { -; CGSCC-NEXT: [[C:%.*]] = call noundef nonnull align 4 ptr @g2() #[[ATTR19:[0-9]+]] +; CGSCC-SAME: () #[[ATTR6]] { +; CGSCC-NEXT: [[C:%.*]] = call noundef nonnull align 4 ptr @g2() #[[ATTR18:[0-9]+]] ; CGSCC-NEXT: ret ptr [[C]] ; %c = call ptr @g2() @@ -1045,21 +1043,32 @@ define internal void @control(ptr dereferenceable(4) %a) { } ; Avoid nonnull as we do not touch naked functions define internal void @naked(ptr dereferenceable(4) %a) naked { -; CHECK: Function Attrs: naked -; CHECK-LABEL: define {{[^@]+}}@naked -; CHECK-SAME: (ptr noundef nonnull dereferenceable(4) [[A:%.*]]) #[[ATTR11:[0-9]+]] { -; CHECK-NEXT: ret void +; TUNIT: Function Attrs: naked +; TUNIT-LABEL: define {{[^@]+}}@naked +; TUNIT-SAME: (ptr noundef nonnull dereferenceable(4) [[A:%.*]]) #[[ATTR10:[0-9]+]] { +; TUNIT-NEXT: ret void +; +; CGSCC: Function Attrs: naked +; CGSCC-LABEL: define {{[^@]+}}@naked +; CGSCC-SAME: (ptr noundef nonnull dereferenceable(4) [[A:%.*]]) #[[ATTR11:[0-9]+]] { +; CGSCC-NEXT: ret void ; ret void } ; Avoid nonnull as we do not touch optnone define internal void @optnone(ptr dereferenceable(4) %a) optnone noinline { ; -; CHECK: Function Attrs: noinline optnone -; CHECK-LABEL: define {{[^@]+}}@optnone -; CHECK-SAME: (ptr noundef nonnull dereferenceable(4) [[A:%.*]]) #[[ATTR12:[0-9]+]] { -; CHECK-NEXT: call void @use_i32_ptr(ptr nofree noundef nonnull captures(none) [[A]]) -; CHECK-NEXT: ret void +; TUNIT: Function Attrs: noinline optnone +; TUNIT-LABEL: define {{[^@]+}}@optnone +; TUNIT-SAME: (ptr noundef nonnull dereferenceable(4) [[A:%.*]]) #[[ATTR11:[0-9]+]] { +; TUNIT-NEXT: call void @use_i32_ptr(ptr nofree noundef nonnull captures(none) [[A]]) +; TUNIT-NEXT: ret void +; +; CGSCC: Function Attrs: noinline optnone +; CGSCC-LABEL: define {{[^@]+}}@optnone +; CGSCC-SAME: (ptr noundef nonnull dereferenceable(4) [[A:%.*]]) #[[ATTR12:[0-9]+]] { +; CGSCC-NEXT: call void @use_i32_ptr(ptr nofree noundef nonnull captures(none) [[A]]) +; CGSCC-NEXT: ret void ; call void @use_i32_ptr(ptr %a) ret void @@ -1098,32 +1107,32 @@ define i32 @nonnull_exec_ctx_1(ptr %a, i32 %b) { ; ; TUNIT: Function Attrs: mustprogress nounwind willreturn ; TUNIT-LABEL: define {{[^@]+}}@nonnull_exec_ctx_1 -; TUNIT-SAME: (ptr [[A:%.*]], i32 [[B:%.*]]) #[[ATTR8]] { +; TUNIT-SAME: (ptr [[A:%.*]], i32 [[B:%.*]]) #[[ATTR7]] { ; TUNIT-NEXT: en: ; TUNIT-NEXT: [[TMP3:%.*]] = icmp eq i32 [[B]], 0 ; TUNIT-NEXT: br i1 [[TMP3]], label [[EX:%.*]], label [[HD:%.*]] ; TUNIT: ex: -; TUNIT-NEXT: [[TMP5:%.*]] = tail call i32 @g(ptr nonnull [[A]]) #[[ATTR7]] +; TUNIT-NEXT: [[TMP5:%.*]] = tail call i32 @g(ptr nonnull [[A]]) #[[ATTR6]] ; TUNIT-NEXT: ret i32 [[TMP5]] ; TUNIT: hd: ; TUNIT-NEXT: [[TMP7:%.*]] = phi i32 [ [[TMP8:%.*]], [[HD]] ], [ 0, [[EN:%.*]] ] -; TUNIT-NEXT: tail call void @h(ptr [[A]]) #[[ATTR7]] +; TUNIT-NEXT: tail call void @h(ptr [[A]]) #[[ATTR6]] ; TUNIT-NEXT: [[TMP8]] = add nuw i32 [[TMP7]], 1 ; TUNIT-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], [[B]] ; TUNIT-NEXT: br i1 [[TMP9]], label [[EX]], label [[HD]] ; ; CGSCC: Function Attrs: mustprogress nounwind willreturn ; CGSCC-LABEL: define {{[^@]+}}@nonnull_exec_ctx_1 -; CGSCC-SAME: (ptr [[A:%.*]], i32 [[B:%.*]]) #[[ATTR7]] { +; CGSCC-SAME: (ptr [[A:%.*]], i32 [[B:%.*]]) #[[ATTR8]] { ; CGSCC-NEXT: en: ; CGSCC-NEXT: [[TMP3:%.*]] = icmp eq i32 [[B]], 0 ; CGSCC-NEXT: br i1 [[TMP3]], label [[EX:%.*]], label [[HD:%.*]] ; CGSCC: ex: -; CGSCC-NEXT: [[TMP5:%.*]] = tail call i32 @g(ptr nonnull [[A]]) #[[ATTR6]] +; CGSCC-NEXT: [[TMP5:%.*]] = tail call i32 @g(ptr nonnull [[A]]) #[[ATTR7]] ; CGSCC-NEXT: ret i32 [[TMP5]] ; CGSCC: hd: ; CGSCC-NEXT: [[TMP7:%.*]] = phi i32 [ [[TMP8:%.*]], [[HD]] ], [ 0, [[EN:%.*]] ] -; CGSCC-NEXT: tail call void @h(ptr [[A]]) #[[ATTR6]] +; CGSCC-NEXT: tail call void @h(ptr [[A]]) #[[ATTR7]] ; CGSCC-NEXT: [[TMP8]] = add nuw i32 [[TMP7]], 1 ; CGSCC-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], [[B]] ; CGSCC-NEXT: br i1 [[TMP9]], label [[EX]], label [[HD]] @@ -1148,16 +1157,16 @@ define i32 @nonnull_exec_ctx_1b(ptr %a, i32 %b) { ; ; TUNIT: Function Attrs: mustprogress nounwind willreturn ; TUNIT-LABEL: define {{[^@]+}}@nonnull_exec_ctx_1b -; TUNIT-SAME: (ptr [[A:%.*]], i32 [[B:%.*]]) #[[ATTR8]] { +; TUNIT-SAME: (ptr [[A:%.*]], i32 [[B:%.*]]) #[[ATTR7]] { ; TUNIT-NEXT: en: ; TUNIT-NEXT: [[TMP3:%.*]] = icmp eq i32 [[B]], 0 ; TUNIT-NEXT: br i1 [[TMP3]], label [[EX:%.*]], label [[HD:%.*]] ; TUNIT: ex: -; TUNIT-NEXT: [[TMP5:%.*]] = tail call i32 @g(ptr nonnull [[A]]) #[[ATTR7]] +; TUNIT-NEXT: [[TMP5:%.*]] = tail call i32 @g(ptr nonnull [[A]]) #[[ATTR6]] ; TUNIT-NEXT: ret i32 [[TMP5]] ; TUNIT: hd: ; TUNIT-NEXT: [[TMP7:%.*]] = phi i32 [ [[TMP8:%.*]], [[HD2:%.*]] ], [ 0, [[EN:%.*]] ] -; TUNIT-NEXT: tail call void @h(ptr [[A]]) #[[ATTR7]] +; TUNIT-NEXT: tail call void @h(ptr [[A]]) #[[ATTR6]] ; TUNIT-NEXT: br label [[HD2]] ; TUNIT: hd2: ; TUNIT-NEXT: [[TMP8]] = add nuw i32 [[TMP7]], 1 @@ -1166,16 +1175,16 @@ define i32 @nonnull_exec_ctx_1b(ptr %a, i32 %b) { ; ; CGSCC: Function Attrs: mustprogress nounwind willreturn ; CGSCC-LABEL: define {{[^@]+}}@nonnull_exec_ctx_1b -; CGSCC-SAME: (ptr [[A:%.*]], i32 [[B:%.*]]) #[[ATTR7]] { +; CGSCC-SAME: (ptr [[A:%.*]], i32 [[B:%.*]]) #[[ATTR8]] { ; CGSCC-NEXT: en: ; CGSCC-NEXT: [[TMP3:%.*]] = icmp eq i32 [[B]], 0 ; CGSCC-NEXT: br i1 [[TMP3]], label [[EX:%.*]], label [[HD:%.*]] ; CGSCC: ex: -; CGSCC-NEXT: [[TMP5:%.*]] = tail call i32 @g(ptr nonnull [[A]]) #[[ATTR6]] +; CGSCC-NEXT: [[TMP5:%.*]] = tail call i32 @g(ptr nonnull [[A]]) #[[ATTR7]] ; CGSCC-NEXT: ret i32 [[TMP5]] ; CGSCC: hd: ; CGSCC-NEXT: [[TMP7:%.*]] = phi i32 [ [[TMP8:%.*]], [[HD2:%.*]] ], [ 0, [[EN:%.*]] ] -; CGSCC-NEXT: tail call void @h(ptr [[A]]) #[[ATTR6]] +; CGSCC-NEXT: tail call void @h(ptr [[A]]) #[[ATTR7]] ; CGSCC-NEXT: br label [[HD2]] ; CGSCC: hd2: ; CGSCC-NEXT: [[TMP8]] = add nuw i32 [[TMP7]], 1 @@ -1205,7 +1214,7 @@ define i32 @nonnull_exec_ctx_2(ptr %a, i32 %b) willreturn nounwind { ; ; TUNIT: Function Attrs: mustprogress nounwind willreturn ; TUNIT-LABEL: define {{[^@]+}}@nonnull_exec_ctx_2 -; TUNIT-SAME: (ptr nonnull [[A:%.*]], i32 [[B:%.*]]) #[[ATTR8]] { +; TUNIT-SAME: (ptr nonnull [[A:%.*]], i32 [[B:%.*]]) #[[ATTR7]] { ; TUNIT-NEXT: en: ; TUNIT-NEXT: [[TMP3:%.*]] = icmp eq i32 [[B]], 0 ; TUNIT-NEXT: br i1 [[TMP3]], label [[EX:%.*]], label [[HD:%.*]] @@ -1221,7 +1230,7 @@ define i32 @nonnull_exec_ctx_2(ptr %a, i32 %b) willreturn nounwind { ; ; CGSCC: Function Attrs: mustprogress nounwind willreturn ; CGSCC-LABEL: define {{[^@]+}}@nonnull_exec_ctx_2 -; CGSCC-SAME: (ptr nonnull [[A:%.*]], i32 [[B:%.*]]) #[[ATTR7]] { +; CGSCC-SAME: (ptr nonnull [[A:%.*]], i32 [[B:%.*]]) #[[ATTR8]] { ; CGSCC-NEXT: en: ; CGSCC-NEXT: [[TMP3:%.*]] = icmp eq i32 [[B]], 0 ; CGSCC-NEXT: br i1 [[TMP3]], label [[EX:%.*]], label [[HD:%.*]] @@ -1255,7 +1264,7 @@ define i32 @nonnull_exec_ctx_2b(ptr %a, i32 %b) willreturn nounwind { ; ; TUNIT: Function Attrs: mustprogress nounwind willreturn ; TUNIT-LABEL: define {{[^@]+}}@nonnull_exec_ctx_2b -; TUNIT-SAME: (ptr nonnull [[A:%.*]], i32 [[B:%.*]]) #[[ATTR8]] { +; TUNIT-SAME: (ptr nonnull [[A:%.*]], i32 [[B:%.*]]) #[[ATTR7]] { ; TUNIT-NEXT: en: ; TUNIT-NEXT: [[TMP3:%.*]] = icmp eq i32 [[B]], 0 ; TUNIT-NEXT: br i1 [[TMP3]], label [[EX:%.*]], label [[HD:%.*]] @@ -1273,7 +1282,7 @@ define i32 @nonnull_exec_ctx_2b(ptr %a, i32 %b) willreturn nounwind { ; ; CGSCC: Function Attrs: mustprogress nounwind willreturn ; CGSCC-LABEL: define {{[^@]+}}@nonnull_exec_ctx_2b -; CGSCC-SAME: (ptr nonnull [[A:%.*]], i32 [[B:%.*]]) #[[ATTR7]] { +; CGSCC-SAME: (ptr nonnull [[A:%.*]], i32 [[B:%.*]]) #[[ATTR8]] { ; CGSCC-NEXT: en: ; CGSCC-NEXT: [[TMP3:%.*]] = icmp eq i32 [[B]], 0 ; CGSCC-NEXT: br i1 [[TMP3]], label [[EX:%.*]], label [[HD:%.*]] @@ -1392,8 +1401,8 @@ declare ptr @strrchr(ptr %0, i32 %1) nofree nounwind readonly willreturn define ptr @mybasename(ptr nofree readonly %str) { ; TUNIT: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(read) ; TUNIT-LABEL: define {{[^@]+}}@mybasename -; TUNIT-SAME: (ptr nofree readonly [[STR:%.*]]) #[[ATTR14:[0-9]+]] { -; TUNIT-NEXT: [[CALL:%.*]] = call ptr @strrchr(ptr nofree readonly [[STR]], i32 noundef 47) #[[ATTR19:[0-9]+]] +; TUNIT-SAME: (ptr nofree readonly [[STR:%.*]]) #[[ATTR13:[0-9]+]] { +; TUNIT-NEXT: [[CALL:%.*]] = call ptr @strrchr(ptr nofree readonly [[STR]], i32 noundef 47) #[[ATTR17:[0-9]+]] ; TUNIT-NEXT: [[TOBOOL:%.*]] = icmp ne ptr [[CALL]], null ; TUNIT-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[CALL]], i64 1 ; TUNIT-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], ptr [[ADD_PTR]], ptr [[STR]] @@ -1402,7 +1411,7 @@ define ptr @mybasename(ptr nofree readonly %str) { ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(read) ; CGSCC-LABEL: define {{[^@]+}}@mybasename ; CGSCC-SAME: (ptr nofree readonly [[STR:%.*]]) #[[ATTR14:[0-9]+]] { -; CGSCC-NEXT: [[CALL:%.*]] = call ptr @strrchr(ptr nofree readonly [[STR]], i32 noundef 47) #[[ATTR20:[0-9]+]] +; CGSCC-NEXT: [[CALL:%.*]] = call ptr @strrchr(ptr nofree readonly [[STR]], i32 noundef 47) #[[ATTR19:[0-9]+]] ; CGSCC-NEXT: [[TOBOOL:%.*]] = icmp ne ptr [[CALL]], null ; CGSCC-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[CALL]], i64 1 ; CGSCC-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], ptr [[ADD_PTR]], ptr [[STR]] @@ -1425,7 +1434,7 @@ define void @nonnull_assume_pos(ptr %arg) { ; ; TUNIT-LABEL: define {{[^@]+}}@nonnull_assume_pos ; TUNIT-SAME: (ptr nofree nonnull readnone captures(none) [[ARG:%.*]]) { -; TUNIT-NEXT: call void @llvm.assume(i1 noundef true) #[[ATTR16]] [ "nonnull"(ptr [[ARG]]) ] +; TUNIT-NEXT: call void @llvm.assume(i1 noundef true) #[[ATTR15]] [ "nonnull"(ptr [[ARG]]) ] ; TUNIT-NEXT: call void @use_i8_ptr(ptr noalias nofree nonnull readnone captures(none) [[ARG]]) #[[ATTR5]] ; TUNIT-NEXT: [[TMP1:%.*]] = call ptr @unknown() ; TUNIT-NEXT: ret void @@ -1554,14 +1563,14 @@ define void @phi_caller(ptr %p) { ; TUNIT: Function Attrs: nounwind ; TUNIT-LABEL: define {{[^@]+}}@phi_caller ; TUNIT-SAME: (ptr nofree [[P:%.*]]) #[[ATTR5]] { -; TUNIT-NEXT: [[C:%.*]] = call nonnull ptr @phi(ptr noalias nofree readnone [[P]]) #[[ATTR20:[0-9]+]] +; TUNIT-NEXT: [[C:%.*]] = call nonnull ptr @phi(ptr noalias nofree readnone [[P]]) #[[ATTR18:[0-9]+]] ; TUNIT-NEXT: call void @use_i8_ptr(ptr noalias nofree nonnull readnone captures(none) [[C]]) #[[ATTR5]] ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: nounwind ; CGSCC-LABEL: define {{[^@]+}}@phi_caller ; CGSCC-SAME: (ptr nofree [[P:%.*]]) #[[ATTR4]] { -; CGSCC-NEXT: [[C:%.*]] = call nonnull ptr @phi(ptr noalias nofree readnone [[P]]) #[[ATTR21:[0-9]+]] +; CGSCC-NEXT: [[C:%.*]] = call nonnull ptr @phi(ptr noalias nofree readnone [[P]]) #[[ATTR20:[0-9]+]] ; CGSCC-NEXT: call void @use_i8_ptr(ptr noalias nofree nonnull readnone captures(none) [[C]]) #[[ATTR4]] ; CGSCC-NEXT: ret void ; @@ -1594,14 +1603,14 @@ define void @multi_ret_caller(ptr %p) { ; TUNIT: Function Attrs: nounwind ; TUNIT-LABEL: define {{[^@]+}}@multi_ret_caller ; TUNIT-SAME: (ptr nofree [[P:%.*]]) #[[ATTR5]] { -; TUNIT-NEXT: [[C:%.*]] = call nonnull ptr @multi_ret(ptr noalias nofree readnone [[P]]) #[[ATTR20]] +; TUNIT-NEXT: [[C:%.*]] = call nonnull ptr @multi_ret(ptr noalias nofree readnone [[P]]) #[[ATTR18]] ; TUNIT-NEXT: call void @use_i8_ptr(ptr noalias nofree nonnull readnone captures(none) [[C]]) #[[ATTR5]] ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: nounwind ; CGSCC-LABEL: define {{[^@]+}}@multi_ret_caller ; CGSCC-SAME: (ptr nofree [[P:%.*]]) #[[ATTR4]] { -; CGSCC-NEXT: [[C:%.*]] = call nonnull ptr @multi_ret(ptr noalias nofree readnone [[P]]) #[[ATTR21]] +; CGSCC-NEXT: [[C:%.*]] = call nonnull ptr @multi_ret(ptr noalias nofree readnone [[P]]) #[[ATTR20]] ; CGSCC-NEXT: call void @use_i8_ptr(ptr noalias nofree nonnull readnone captures(none) [[C]]) #[[ATTR4]] ; CGSCC-NEXT: ret void ; @@ -1613,18 +1622,31 @@ define void @multi_ret_caller(ptr %p) { ; From https://github.com/llvm/llvm-project/pull/85810 @G = internal global i64 1, align 8 define dso_local ptr @update_global_in_alive_bb() { -; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn -; CHECK-LABEL: define {{[^@]+}}@update_global_in_alive_bb -; CHECK-SAME: () #[[ATTR15:[0-9]+]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @G, align 8 -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 [[TMP0]], 0 -; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] -; CHECK: if.then: -; CHECK-NEXT: store i64 0, ptr @G, align 8 -; CHECK-NEXT: ret ptr inttoptr (i64 5 to ptr) -; CHECK: if.else: -; CHECK-NEXT: ret ptr null +; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn +; TUNIT-LABEL: define {{[^@]+}}@update_global_in_alive_bb +; TUNIT-SAME: () #[[ATTR14:[0-9]+]] { +; TUNIT-NEXT: entry: +; TUNIT-NEXT: [[TMP0:%.*]] = load i64, ptr @G, align 8 +; TUNIT-NEXT: [[CMP:%.*]] = icmp ne i64 [[TMP0]], 0 +; TUNIT-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; TUNIT: if.then: +; TUNIT-NEXT: store i64 0, ptr @G, align 8 +; TUNIT-NEXT: ret ptr inttoptr (i64 5 to ptr) +; TUNIT: if.else: +; TUNIT-NEXT: ret ptr null +; +; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn +; CGSCC-LABEL: define {{[^@]+}}@update_global_in_alive_bb +; CGSCC-SAME: () #[[ATTR15:[0-9]+]] { +; CGSCC-NEXT: entry: +; CGSCC-NEXT: [[TMP0:%.*]] = load i64, ptr @G, align 8 +; CGSCC-NEXT: [[CMP:%.*]] = icmp ne i64 [[TMP0]], 0 +; CGSCC-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CGSCC: if.then: +; CGSCC-NEXT: store i64 0, ptr @G, align 8 +; CGSCC-NEXT: ret ptr inttoptr (i64 5 to ptr) +; CGSCC: if.else: +; CGSCC-NEXT: ret ptr null ; entry: %0 = load i64, ptr @G, align 8 @@ -1640,48 +1662,47 @@ if.else: attributes #0 = { null_pointer_is_valid } attributes #1 = { nounwind willreturn} ;. -; TUNIT: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) } -; TUNIT: attributes #[[ATTR1]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) } -; TUNIT: attributes #[[ATTR2]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: write) } -; TUNIT: attributes #[[ATTR3]] = { mustprogress nofree nosync nounwind willreturn memory(none) } -; TUNIT: attributes #[[ATTR4]] = { noreturn } -; TUNIT: attributes #[[ATTR5]] = { nounwind } -; TUNIT: attributes #[[ATTR6]] = { nofree nosync nounwind memory(argmem: read) } -; TUNIT: attributes #[[ATTR7]] = { nounwind willreturn } -; TUNIT: attributes #[[ATTR8]] = { mustprogress nounwind willreturn } -; TUNIT: attributes #[[ATTR9:[0-9]+]] = { nounwind willreturn memory(read) } -; TUNIT: attributes #[[ATTR10]] = { mustprogress nofree norecurse nosync nounwind null_pointer_is_valid willreturn memory(none) } -; TUNIT: attributes #[[ATTR11]] = { naked } -; TUNIT: attributes #[[ATTR12]] = { noinline optnone } -; TUNIT: attributes #[[ATTR13:[0-9]+]] = { nofree nounwind willreturn memory(read) } -; TUNIT: attributes #[[ATTR14]] = { mustprogress nofree nosync nounwind willreturn memory(read) } -; TUNIT: attributes #[[ATTR15]] = { mustprogress nofree norecurse nosync nounwind willreturn } -; TUNIT: attributes #[[ATTR16]] = { nofree willreturn memory(write) } -; TUNIT: attributes #[[ATTR17]] = { nofree nosync nounwind memory(read) } -; TUNIT: attributes #[[ATTR18]] = { nosync willreturn memory(read) } -; TUNIT: attributes #[[ATTR19]] = { nofree nosync willreturn memory(read) } -; TUNIT: attributes #[[ATTR20]] = { nofree nosync nounwind willreturn memory(none) } -;. ; CGSCC: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) } ; CGSCC: attributes #[[ATTR1]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) } ; CGSCC: attributes #[[ATTR2]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: write) } ; CGSCC: attributes #[[ATTR3]] = { noreturn } ; CGSCC: attributes #[[ATTR4]] = { nounwind } -; CGSCC: attributes #[[ATTR5]] = { nofree nosync nounwind memory(argmem: read) } -; CGSCC: attributes #[[ATTR6]] = { nounwind willreturn } -; CGSCC: attributes #[[ATTR7]] = { mustprogress nounwind willreturn } -; CGSCC: attributes #[[ATTR8:[0-9]+]] = { nounwind willreturn memory(read) } -; CGSCC: attributes #[[ATTR9]] = { mustprogress nofree norecurse nosync nounwind null_pointer_is_valid willreturn memory(none) } -; CGSCC: attributes #[[ATTR10]] = { mustprogress nofree nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR5]] = { mustprogress nofree nosync nounwind willreturn memory(argmem: read) } +; CGSCC: attributes #[[ATTR6]] = { mustprogress nofree nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR7]] = { nounwind willreturn } +; CGSCC: attributes #[[ATTR8]] = { mustprogress nounwind willreturn } +; CGSCC: attributes #[[ATTR9:[0-9]+]] = { nounwind willreturn memory(read) } +; CGSCC: attributes #[[ATTR10]] = { mustprogress nofree norecurse nosync nounwind null_pointer_is_valid willreturn memory(none) } ; CGSCC: attributes #[[ATTR11]] = { naked } ; CGSCC: attributes #[[ATTR12]] = { noinline optnone } ; CGSCC: attributes #[[ATTR13:[0-9]+]] = { nofree nounwind willreturn memory(read) } ; CGSCC: attributes #[[ATTR14]] = { mustprogress nofree nosync nounwind willreturn memory(read) } ; CGSCC: attributes #[[ATTR15]] = { mustprogress nofree norecurse nosync nounwind willreturn } ; CGSCC: attributes #[[ATTR16]] = { nofree willreturn memory(write) } -; CGSCC: attributes #[[ATTR17]] = { nofree nosync nounwind memory(read) } -; CGSCC: attributes #[[ATTR18]] = { nosync willreturn memory(read) } -; CGSCC: attributes #[[ATTR19]] = { nofree nosync willreturn } -; CGSCC: attributes #[[ATTR20]] = { nofree nosync willreturn memory(read) } -; CGSCC: attributes #[[ATTR21]] = { nofree willreturn } +; CGSCC: attributes #[[ATTR17]] = { nosync willreturn memory(read) } +; CGSCC: attributes #[[ATTR18]] = { nofree nosync willreturn } +; CGSCC: attributes #[[ATTR19]] = { nofree nosync willreturn memory(read) } +; CGSCC: attributes #[[ATTR20]] = { nofree willreturn } +;. +; TUNIT: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) } +; TUNIT: attributes #[[ATTR1]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR2]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: write) } +; TUNIT: attributes #[[ATTR3]] = { mustprogress nofree nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR4]] = { noreturn } +; TUNIT: attributes #[[ATTR5]] = { nounwind } +; TUNIT: attributes #[[ATTR6]] = { nounwind willreturn } +; TUNIT: attributes #[[ATTR7]] = { mustprogress nounwind willreturn } +; TUNIT: attributes #[[ATTR8:[0-9]+]] = { nounwind willreturn memory(read) } +; TUNIT: attributes #[[ATTR9]] = { mustprogress nofree norecurse nosync nounwind null_pointer_is_valid willreturn memory(none) } +; TUNIT: attributes #[[ATTR10]] = { naked } +; TUNIT: attributes #[[ATTR11]] = { noinline optnone } +; TUNIT: attributes #[[ATTR12:[0-9]+]] = { nofree nounwind willreturn memory(read) } +; TUNIT: attributes #[[ATTR13]] = { mustprogress nofree nosync nounwind willreturn memory(read) } +; TUNIT: attributes #[[ATTR14]] = { mustprogress nofree norecurse nosync nounwind willreturn } +; TUNIT: attributes #[[ATTR15]] = { nofree willreturn memory(write) } +; TUNIT: attributes #[[ATTR16]] = { nosync willreturn memory(read) } +; TUNIT: attributes #[[ATTR17]] = { nofree nosync willreturn memory(read) } +; TUNIT: attributes #[[ATTR18]] = { nofree nosync nounwind willreturn memory(none) } +;. +; CGSCC: [[META0]] = !{} ;. diff --git a/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll b/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll index 3e07fe42261e9..2235f194af8ea 100644 --- a/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll +++ b/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll @@ -1267,7 +1267,7 @@ entry: define void @noalias_arg_simplifiable_2(ptr %Bytes) { ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn ; TUNIT-LABEL: define void @noalias_arg_simplifiable_2( -; TUNIT-SAME: ptr nofree captures(none) [[BYTES:%.*]]) #[[ATTR3]] { +; TUNIT-SAME: ptr nofree nonnull captures(none) dereferenceable(24) [[BYTES:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: [[ENTRY:.*]]: ; TUNIT-NEXT: br label %[[FOR_COND:.*]] ; TUNIT: [[FOR_COND]]: @@ -1344,7 +1344,7 @@ define void @noalias_arg_simplifiable_2(ptr %Bytes) { ; ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn ; CGSCC-LABEL: define void @noalias_arg_simplifiable_2( -; CGSCC-SAME: ptr nofree captures(none) [[BYTES:%.*]]) #[[ATTR3]] { +; CGSCC-SAME: ptr nofree nonnull align 4 captures(none) dereferenceable(1024) [[BYTES:%.*]]) #[[ATTR3]] { ; CGSCC-NEXT: [[ENTRY:.*]]: ; CGSCC-NEXT: br label %[[FOR_COND:.*]] ; CGSCC: [[FOR_COND]]: @@ -1399,7 +1399,7 @@ define void @noalias_arg_simplifiable_2(ptr %Bytes) { ; CGSCC-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds i8, ptr [[BYTES]], i64 1023 ; CGSCC-NEXT: store i8 0, ptr [[ARRAYIDX24]], align 1, !tbaa [[CHAR_TBAA15]] ; CGSCC-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds i8, ptr [[BYTES]], i64 500 -; CGSCC-NEXT: call void @write_arg(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[ARRAYIDX25]], i32 noundef 0) #[[ATTR21]] +; CGSCC-NEXT: call void @write_arg(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(524) [[ARRAYIDX25]], i32 noundef 0) #[[ATTR21]] ; CGSCC-NEXT: br label %[[FOR_COND27:.*]] ; CGSCC: [[FOR_COND27]]: ; CGSCC-NEXT: [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], %[[FOR_INC35:.*]] ], [ 0, %[[FOR_END23]] ] diff --git a/llvm/test/Transforms/Attributor/willreturn.ll b/llvm/test/Transforms/Attributor/willreturn.ll index d65480b05759a..543f33ee0621b 100644 --- a/llvm/test/Transforms/Attributor/willreturn.ll +++ b/llvm/test/Transforms/Attributor/willreturn.ll @@ -238,7 +238,7 @@ define void @only_exit() local_unnamed_addr #0 { define void @conditional_exit(i32 %0, ptr nocapture readonly %1) local_unnamed_addr #0 { ; CHECK: Function Attrs: noinline nounwind uwtable ; CHECK-LABEL: define {{[^@]+}}@conditional_exit -; CHECK-SAME: (i32 [[TMP0:%.*]], ptr nofree readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR7:[0-9]+]] { +; CHECK-SAME: (i32 [[TMP0:%.*]], ptr nofree nonnull readonly align 4 captures(none) dereferenceable(4) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR7:[0-9]+]] { ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 ; CHECK-NEXT: br i1 [[TMP3]], label [[TMP5:%.*]], label [[TMP4:%.*]] ; CHECK: 4: diff --git a/llvm/test/Transforms/FunctionAttrs/nonnull.ll b/llvm/test/Transforms/FunctionAttrs/nonnull.ll index 9d5ae1606f2e3..e06fb1cfd9656 100644 --- a/llvm/test/Transforms/FunctionAttrs/nonnull.ll +++ b/llvm/test/Transforms/FunctionAttrs/nonnull.ll @@ -360,7 +360,6 @@ declare nonnull ptr @nonnull() define internal ptr @f1(ptr %arg) { -; FIXME: missing nonnull It should be nonnull @f1(ptr nonnull readonly %arg) ; FNATTRS-LABEL: define internal nonnull ptr @f1( ; FNATTRS-SAME: ptr readonly captures(address_is_null) [[ARG:%.*]]) #[[ATTR4:[0-9]+]] { ; FNATTRS-NEXT: bb: @@ -383,7 +382,7 @@ define internal ptr @f1(ptr %arg) { ; FNATTRS-NEXT: ret ptr [[TMP10]] ; ; ATTRIBUTOR-LABEL: define internal ptr @f1( -; ATTRIBUTOR-SAME: ptr nofree readonly [[ARG:%.*]]) #[[ATTR4:[0-9]+]] { +; ATTRIBUTOR-SAME: ptr nofree nonnull readonly [[ARG:%.*]]) #[[ATTR4:[0-9]+]] { ; ATTRIBUTOR-NEXT: bb: ; ATTRIBUTOR-NEXT: [[TMP:%.*]] = icmp eq ptr [[ARG]], null ; ATTRIBUTOR-NEXT: br i1 [[TMP]], label [[BB9:%.*]], label [[BB1:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll index 3b016f8d0a9ff..63348ccf94f78 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll @@ -44,7 +44,7 @@ define i64 @same_exit_block_pre_inc_use1() #1 { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 64, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_END:%.*]], label [[SCALAR_PH]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1( [[TMP16]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1( [[TMP16]], i1 false) ; CHECK-NEXT: [[TMP20:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP20]] ; CHECK-NEXT: br label [[LOOP_END]] @@ -125,7 +125,7 @@ define i64 @same_exit_block_pre_inc_use4() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v2i1(<2 x i1> [[TMP4]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v2i1(<2 x i1> [[TMP4]], i1 false) ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP8]] ; CHECK-NEXT: br label [[LOOP_END]] @@ -187,7 +187,7 @@ define i64 @loop_contains_safe_call() #1 { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP5]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP5]], i1 false) ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP9]] ; CHECK-NEXT: br label [[LOOP_END]] @@ -256,7 +256,7 @@ define i64 @loop_contains_safe_div() #1 { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 64, [[INDEX1]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_END:%.*]], label [[SCALAR_PH:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1( [[TMP15]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1( [[TMP15]], i1 false) ; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX2]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP16]] ; CHECK-NEXT: br label [[LOOP_END]] @@ -336,7 +336,7 @@ define i64 @loop_contains_load_after_early_exit(ptr dereferenceable(1024) align( ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 false) ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP11]] ; CHECK-NEXT: br label [[LOOP_END]] @@ -483,12 +483,12 @@ exit: define i64 @same_exit_block_requires_interleaving() { ; CHECK-LABEL: define i64 @same_exit_block_requires_interleaving() { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[P1:%.*]] = alloca [128 x %my.struct], align 8 +; CHECK-NEXT: [[P1:%.*]] = alloca [128 x [[MY_STRUCT:%.*]]], align 8 ; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 256) ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_LATCH:%.*]] ], [ 3, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [128 x %my.struct], ptr [[P1]], i64 0, i64 [[INDEX]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [128 x [[MY_STRUCT]]], ptr [[P1]], i64 0, i64 [[INDEX]] ; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3 ; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_LATCH]], label [[LOOP_END:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/single-early-exit-interleave.ll b/llvm/test/Transforms/LoopVectorize/AArch64/single-early-exit-interleave.ll index b40a184a3e425..c56f8327a48b3 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/single-early-exit-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/single-early-exit-interleave.ll @@ -79,20 +79,20 @@ define i64 @same_exit_block_pre_inc_use1() #0 { ; CHECK: vector.early.exit: ; CHECK-NEXT: [[TMP39:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP40:%.*]] = mul nuw i64 [[TMP39]], 16 -; CHECK-NEXT: [[TMP41:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1( [[TMP59]], i1 true) +; CHECK-NEXT: [[TMP41:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1( [[TMP59]], i1 false) ; CHECK-NEXT: [[TMP42:%.*]] = mul i64 [[TMP40]], 3 ; CHECK-NEXT: [[TMP43:%.*]] = add i64 [[TMP42]], [[TMP41]] -; CHECK-NEXT: [[TMP44:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1( [[TMP31]], i1 true) +; CHECK-NEXT: [[TMP44:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1( [[TMP31]], i1 false) ; CHECK-NEXT: [[TMP45:%.*]] = mul i64 [[TMP40]], 2 ; CHECK-NEXT: [[TMP46:%.*]] = add i64 [[TMP45]], [[TMP44]] ; CHECK-NEXT: [[TMP47:%.*]] = icmp ne i64 [[TMP44]], [[TMP40]] ; CHECK-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], i64 [[TMP46]], i64 [[TMP43]] -; CHECK-NEXT: [[TMP49:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1( [[TMP30]], i1 true) +; CHECK-NEXT: [[TMP49:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1( [[TMP30]], i1 false) ; CHECK-NEXT: [[TMP50:%.*]] = mul i64 [[TMP40]], 1 ; CHECK-NEXT: [[TMP51:%.*]] = add i64 [[TMP50]], [[TMP49]] ; CHECK-NEXT: [[TMP52:%.*]] = icmp ne i64 [[TMP49]], [[TMP40]] ; CHECK-NEXT: [[TMP53:%.*]] = select i1 [[TMP52]], i64 [[TMP51]], i64 [[TMP48]] -; CHECK-NEXT: [[TMP61:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1( [[TMP32]], i1 true) +; CHECK-NEXT: [[TMP61:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1( [[TMP32]], i1 false) ; CHECK-NEXT: [[TMP55:%.*]] = mul i64 [[TMP40]], 0 ; CHECK-NEXT: [[TMP56:%.*]] = add i64 [[TMP55]], [[TMP61]] ; CHECK-NEXT: [[TMP57:%.*]] = icmp ne i64 [[TMP61]], [[TMP40]] diff --git a/llvm/test/Transforms/LoopVectorize/single-early-exit-cond-poison.ll b/llvm/test/Transforms/LoopVectorize/single-early-exit-cond-poison.ll index 794e274a2628c..f11f35319b8fc 100644 --- a/llvm/test/Transforms/LoopVectorize/single-early-exit-cond-poison.ll +++ b/llvm/test/Transforms/LoopVectorize/single-early-exit-cond-poison.ll @@ -31,9 +31,9 @@ define noundef i32 @f(i32 noundef %g) { ; VF4IC2: [[MIDDLE_BLOCK]]: ; VF4IC2-NEXT: br label %[[RETURN:.*]] ; VF4IC2: [[VECTOR_EARLY_EXIT]]: -; VF4IC2-NEXT: [[TMP9:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP5]], i1 true) +; VF4IC2-NEXT: [[TMP9:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP5]], i1 false) ; VF4IC2-NEXT: [[TMP10:%.*]] = add i64 4, [[TMP9]] -; VF4IC2-NEXT: [[TMP11:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 true) +; VF4IC2-NEXT: [[TMP11:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 false) ; VF4IC2-NEXT: [[TMP12:%.*]] = add i64 0, [[TMP11]] ; VF4IC2-NEXT: [[TMP13:%.*]] = icmp ne i64 [[TMP11]], 4 ; VF4IC2-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[TMP12]], i64 [[TMP10]] @@ -64,7 +64,7 @@ define noundef i32 @f(i32 noundef %g) { ; VF8IC1: [[MIDDLE_BLOCK]]: ; VF8IC1-NEXT: br label %[[RETURN:.*]] ; VF8IC1: [[VECTOR_EARLY_EXIT]]: -; VF8IC1-NEXT: [[TMP5:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP2]], i1 true) +; VF8IC1-NEXT: [[TMP5:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP2]], i1 false) ; VF8IC1-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 ; VF8IC1-NEXT: [[TMP7:%.*]] = add i32 0, [[TMP6]] ; VF8IC1-NEXT: br label %[[RETURN]] diff --git a/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll b/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll index 03b7ed7fe2135..0bc2748b6252d 100644 --- a/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll +++ b/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll @@ -28,7 +28,7 @@ define i64 @early_exit_alignment_and_deref_known_via_assumption_with_constant_si ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[LOOP_END:.*]] ; CHECK: [[VECTOR_EARLY_EXIT]]: -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 true) +; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 false) ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX1]], [[TMP8]] ; CHECK-NEXT: br label %[[LOOP_END]] ; CHECK: [[LOOP_END]]: @@ -140,7 +140,7 @@ define i64 @early_exit_alignment_and_deref_known_via_assumption_n_not_zero(ptr n ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[LOOP_END_LOOPEXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[VECTOR_EARLY_EXIT]]: -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP2]], i1 true) +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP2]], i1 false) ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX1]], [[TMP7]] ; CHECK-NEXT: br label %[[LOOP_END_LOOPEXIT]] ; CHECK: [[SCALAR_PH]]: @@ -336,7 +336,7 @@ define i64 @early_exit_alignment_and_deref_known_via_assumption_n_not_zero_i16_p ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[VECTOR_EARLY_EXIT]]: -; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP5]], i1 true) +; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP5]], i1 false) ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 2 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP12]] @@ -431,7 +431,7 @@ define ptr @find_deref_pointer_distance_align_attribute_argument(ptr align 2 %fi ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[VECTOR_EARLY_EXIT]]: -; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true) +; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 false) ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], [[TMP11]] ; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 2 ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[TMP13]] @@ -525,7 +525,7 @@ define ptr @find_deref_pointer_distance_align_assumption(ptr %first, ptr %last) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[VECTOR_EARLY_EXIT]]: -; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true) +; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 false) ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], [[TMP11]] ; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 2 ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[TMP13]] @@ -602,7 +602,7 @@ define i64 @early_exit_alignment_and_deref_known_via_assumption_with_constant_si ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[LOOP_END:.*]] ; CHECK: [[VECTOR_EARLY_EXIT]]: -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP2]], i1 true) +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP2]], i1 false) ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX1]], [[TMP7]] ; CHECK-NEXT: br label %[[LOOP_END]] ; CHECK: [[LOOP_END]]: @@ -740,7 +740,7 @@ define i64 @find_if_pointer_distance_deref_via_assumption(ptr %vec) nofree nosyn ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[VECTOR_EARLY_EXIT]]: -; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true) +; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 false) ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], [[TMP11]] ; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 2 ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[BEGIN]], i64 [[TMP13]] diff --git a/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave.ll b/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave.ll index ed5dcc78eeb78..053863117bdc8 100644 --- a/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave.ll @@ -124,17 +124,17 @@ define i64 @same_exit_block_pre_inc_use1() { ; VF4IC4: middle.block: ; VF4IC4-NEXT: br label [[LOOP_END:%.*]] ; VF4IC4: vector.early.exit: -; VF4IC4-NEXT: [[TMP20:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP35]], i1 true) +; VF4IC4-NEXT: [[TMP20:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP35]], i1 false) ; VF4IC4-NEXT: [[TMP21:%.*]] = add i64 12, [[TMP20]] -; VF4IC4-NEXT: [[TMP22:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP12]], i1 true) +; VF4IC4-NEXT: [[TMP22:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP12]], i1 false) ; VF4IC4-NEXT: [[TMP23:%.*]] = add i64 8, [[TMP22]] ; VF4IC4-NEXT: [[TMP24:%.*]] = icmp ne i64 [[TMP22]], 4 ; VF4IC4-NEXT: [[TMP25:%.*]] = select i1 [[TMP24]], i64 [[TMP23]], i64 [[TMP21]] -; VF4IC4-NEXT: [[TMP26:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP11]], i1 true) +; VF4IC4-NEXT: [[TMP26:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP11]], i1 false) ; VF4IC4-NEXT: [[TMP27:%.*]] = add i64 4, [[TMP26]] ; VF4IC4-NEXT: [[TMP28:%.*]] = icmp ne i64 [[TMP26]], 4 ; VF4IC4-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i64 [[TMP27]], i64 [[TMP25]] -; VF4IC4-NEXT: [[TMP30:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP13]], i1 true) +; VF4IC4-NEXT: [[TMP30:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP13]], i1 false) ; VF4IC4-NEXT: [[TMP31:%.*]] = add i64 0, [[TMP30]] ; VF4IC4-NEXT: [[TMP32:%.*]] = icmp ne i64 [[TMP30]], 4 ; VF4IC4-NEXT: [[TMP8:%.*]] = select i1 [[TMP32]], i64 [[TMP31]], i64 [[TMP29]] @@ -211,17 +211,17 @@ define ptr @same_exit_block_pre_inc_use1_ivptr() { ; VF4IC4: middle.block: ; VF4IC4-NEXT: br label [[LOOP_END:%.*]] ; VF4IC4: vector.early.exit: -; VF4IC4-NEXT: [[TMP15:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP29]], i1 true) +; VF4IC4-NEXT: [[TMP15:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP29]], i1 false) ; VF4IC4-NEXT: [[TMP16:%.*]] = add i64 12, [[TMP15]] -; VF4IC4-NEXT: [[TMP30:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP28]], i1 true) +; VF4IC4-NEXT: [[TMP30:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP28]], i1 false) ; VF4IC4-NEXT: [[TMP18:%.*]] = add i64 8, [[TMP30]] ; VF4IC4-NEXT: [[TMP19:%.*]] = icmp ne i64 [[TMP30]], 4 ; VF4IC4-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i64 [[TMP18]], i64 [[TMP16]] -; VF4IC4-NEXT: [[TMP21:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP14]], i1 true) +; VF4IC4-NEXT: [[TMP21:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP14]], i1 false) ; VF4IC4-NEXT: [[TMP22:%.*]] = add i64 4, [[TMP21]] ; VF4IC4-NEXT: [[TMP23:%.*]] = icmp ne i64 [[TMP21]], 4 ; VF4IC4-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i64 [[TMP22]], i64 [[TMP20]] -; VF4IC4-NEXT: [[TMP25:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP17]], i1 true) +; VF4IC4-NEXT: [[TMP25:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP17]], i1 false) ; VF4IC4-NEXT: [[TMP26:%.*]] = add i64 0, [[TMP25]] ; VF4IC4-NEXT: [[TMP27:%.*]] = icmp ne i64 [[TMP25]], 4 ; VF4IC4-NEXT: [[TMP6:%.*]] = select i1 [[TMP27]], i64 [[TMP26]], i64 [[TMP24]] @@ -304,17 +304,17 @@ define i64 @same_exit_block_post_inc_use() { ; VF4IC4: middle.block: ; VF4IC4-NEXT: br label [[LOOP_END:%.*]] ; VF4IC4: vector.early.exit: -; VF4IC4-NEXT: [[TMP20:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP35]], i1 true) +; VF4IC4-NEXT: [[TMP20:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP35]], i1 false) ; VF4IC4-NEXT: [[TMP21:%.*]] = add i64 12, [[TMP20]] -; VF4IC4-NEXT: [[TMP22:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP12]], i1 true) +; VF4IC4-NEXT: [[TMP22:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP12]], i1 false) ; VF4IC4-NEXT: [[TMP23:%.*]] = add i64 8, [[TMP22]] ; VF4IC4-NEXT: [[TMP24:%.*]] = icmp ne i64 [[TMP22]], 4 ; VF4IC4-NEXT: [[TMP25:%.*]] = select i1 [[TMP24]], i64 [[TMP23]], i64 [[TMP21]] -; VF4IC4-NEXT: [[TMP26:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP11]], i1 true) +; VF4IC4-NEXT: [[TMP26:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP11]], i1 false) ; VF4IC4-NEXT: [[TMP27:%.*]] = add i64 4, [[TMP26]] ; VF4IC4-NEXT: [[TMP28:%.*]] = icmp ne i64 [[TMP26]], 4 ; VF4IC4-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i64 [[TMP27]], i64 [[TMP25]] -; VF4IC4-NEXT: [[TMP30:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP13]], i1 true) +; VF4IC4-NEXT: [[TMP30:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP13]], i1 false) ; VF4IC4-NEXT: [[TMP31:%.*]] = add i64 0, [[TMP30]] ; VF4IC4-NEXT: [[TMP32:%.*]] = icmp ne i64 [[TMP30]], 4 ; VF4IC4-NEXT: [[TMP8:%.*]] = select i1 [[TMP32]], i64 [[TMP31]], i64 [[TMP29]] @@ -401,17 +401,17 @@ define i64 @diff_exit_block_pre_inc_use1() { ; VF4IC4: middle.block: ; VF4IC4-NEXT: br label [[LOOP_END:%.*]] ; VF4IC4: vector.early.exit: -; VF4IC4-NEXT: [[TMP20:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP35]], i1 true) +; VF4IC4-NEXT: [[TMP20:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP35]], i1 false) ; VF4IC4-NEXT: [[TMP21:%.*]] = add i64 12, [[TMP20]] -; VF4IC4-NEXT: [[TMP22:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP12]], i1 true) +; VF4IC4-NEXT: [[TMP22:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP12]], i1 false) ; VF4IC4-NEXT: [[TMP23:%.*]] = add i64 8, [[TMP22]] ; VF4IC4-NEXT: [[TMP24:%.*]] = icmp ne i64 [[TMP22]], 4 ; VF4IC4-NEXT: [[TMP25:%.*]] = select i1 [[TMP24]], i64 [[TMP23]], i64 [[TMP21]] -; VF4IC4-NEXT: [[TMP26:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP11]], i1 true) +; VF4IC4-NEXT: [[TMP26:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP11]], i1 false) ; VF4IC4-NEXT: [[TMP27:%.*]] = add i64 4, [[TMP26]] ; VF4IC4-NEXT: [[TMP28:%.*]] = icmp ne i64 [[TMP26]], 4 ; VF4IC4-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i64 [[TMP27]], i64 [[TMP25]] -; VF4IC4-NEXT: [[TMP30:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP13]], i1 true) +; VF4IC4-NEXT: [[TMP30:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP13]], i1 false) ; VF4IC4-NEXT: [[TMP31:%.*]] = add i64 0, [[TMP30]] ; VF4IC4-NEXT: [[TMP32:%.*]] = icmp ne i64 [[TMP30]], 4 ; VF4IC4-NEXT: [[TMP8:%.*]] = select i1 [[TMP32]], i64 [[TMP31]], i64 [[TMP29]] @@ -503,17 +503,17 @@ define i64 @diff_exit_block_post_inc_use1() { ; VF4IC4: middle.block: ; VF4IC4-NEXT: br label [[LOOP_END:%.*]] ; VF4IC4: vector.early.exit: -; VF4IC4-NEXT: [[TMP20:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP35]], i1 true) +; VF4IC4-NEXT: [[TMP20:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP35]], i1 false) ; VF4IC4-NEXT: [[TMP21:%.*]] = add i64 12, [[TMP20]] -; VF4IC4-NEXT: [[TMP22:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP12]], i1 true) +; VF4IC4-NEXT: [[TMP22:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP12]], i1 false) ; VF4IC4-NEXT: [[TMP23:%.*]] = add i64 8, [[TMP22]] ; VF4IC4-NEXT: [[TMP24:%.*]] = icmp ne i64 [[TMP22]], 4 ; VF4IC4-NEXT: [[TMP25:%.*]] = select i1 [[TMP24]], i64 [[TMP23]], i64 [[TMP21]] -; VF4IC4-NEXT: [[TMP26:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP11]], i1 true) +; VF4IC4-NEXT: [[TMP26:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP11]], i1 false) ; VF4IC4-NEXT: [[TMP27:%.*]] = add i64 4, [[TMP26]] ; VF4IC4-NEXT: [[TMP28:%.*]] = icmp ne i64 [[TMP26]], 4 ; VF4IC4-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i64 [[TMP27]], i64 [[TMP25]] -; VF4IC4-NEXT: [[TMP30:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP13]], i1 true) +; VF4IC4-NEXT: [[TMP30:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP13]], i1 false) ; VF4IC4-NEXT: [[TMP31:%.*]] = add i64 0, [[TMP30]] ; VF4IC4-NEXT: [[TMP32:%.*]] = icmp ne i64 [[TMP30]], 4 ; VF4IC4-NEXT: [[TMP8:%.*]] = select i1 [[TMP32]], i64 [[TMP31]], i64 [[TMP29]] @@ -623,17 +623,17 @@ define i64 @same_exit_block_pre_inc_use1_reverse() { ; VF4IC4: middle.block: ; VF4IC4-NEXT: br label [[SCALAR_PH:%.*]] ; VF4IC4: vector.early.exit: -; VF4IC4-NEXT: [[TMP28:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP43]], i1 true) +; VF4IC4-NEXT: [[TMP28:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP43]], i1 false) ; VF4IC4-NEXT: [[TMP29:%.*]] = add i64 12, [[TMP28]] -; VF4IC4-NEXT: [[TMP30:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP20]], i1 true) +; VF4IC4-NEXT: [[TMP30:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP20]], i1 false) ; VF4IC4-NEXT: [[TMP31:%.*]] = add i64 8, [[TMP30]] ; VF4IC4-NEXT: [[TMP32:%.*]] = icmp ne i64 [[TMP30]], 4 ; VF4IC4-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], i64 [[TMP31]], i64 [[TMP29]] -; VF4IC4-NEXT: [[TMP34:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP19]], i1 true) +; VF4IC4-NEXT: [[TMP34:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP19]], i1 false) ; VF4IC4-NEXT: [[TMP35:%.*]] = add i64 4, [[TMP34]] ; VF4IC4-NEXT: [[TMP36:%.*]] = icmp ne i64 [[TMP34]], 4 ; VF4IC4-NEXT: [[TMP37:%.*]] = select i1 [[TMP36]], i64 [[TMP35]], i64 [[TMP33]] -; VF4IC4-NEXT: [[TMP38:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP21]], i1 true) +; VF4IC4-NEXT: [[TMP38:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP21]], i1 false) ; VF4IC4-NEXT: [[TMP39:%.*]] = add i64 0, [[TMP38]] ; VF4IC4-NEXT: [[TMP40:%.*]] = icmp ne i64 [[TMP38]], 4 ; VF4IC4-NEXT: [[TMP10:%.*]] = select i1 [[TMP40]], i64 [[TMP39]], i64 [[TMP37]] @@ -734,17 +734,17 @@ define i8 @same_exit_block_use_loaded_value() { ; VF4IC4: middle.block: ; VF4IC4-NEXT: br label [[LOOP_END:%.*]] ; VF4IC4: vector.early.exit: -; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP17]], i1 true) +; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP17]], i1 false) ; VF4IC4-NEXT: [[TMP20:%.*]] = add i64 12, [[FIRST_ACTIVE_LANE]] -; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE8:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP11]], i1 true) +; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE8:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP11]], i1 false) ; VF4IC4-NEXT: [[TMP21:%.*]] = add i64 8, [[FIRST_ACTIVE_LANE8]] ; VF4IC4-NEXT: [[TMP22:%.*]] = icmp ne i64 [[FIRST_ACTIVE_LANE8]], 4 ; VF4IC4-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], i64 [[TMP21]], i64 [[TMP20]] -; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE9:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP29]], i1 true) +; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE9:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP29]], i1 false) ; VF4IC4-NEXT: [[TMP24:%.*]] = add i64 4, [[FIRST_ACTIVE_LANE9]] ; VF4IC4-NEXT: [[TMP25:%.*]] = icmp ne i64 [[FIRST_ACTIVE_LANE9]], 4 ; VF4IC4-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i64 [[TMP24]], i64 [[TMP23]] -; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE1:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP12]], i1 true) +; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE1:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP12]], i1 false) ; VF4IC4-NEXT: [[TMP27:%.*]] = add i64 0, [[FIRST_ACTIVE_LANE1]] ; VF4IC4-NEXT: [[TMP28:%.*]] = icmp ne i64 [[FIRST_ACTIVE_LANE1]], 4 ; VF4IC4-NEXT: [[TMP8:%.*]] = select i1 [[TMP28]], i64 [[TMP27]], i64 [[TMP26]] @@ -861,17 +861,17 @@ define i8 @same_exit_block_reverse_use_loaded_value() { ; VF4IC4: middle.block: ; VF4IC4-NEXT: br label [[SCALAR_PH:%.*]] ; VF4IC4: vector.early.exit: -; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP37]], i1 true) +; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP37]], i1 false) ; VF4IC4-NEXT: [[TMP28:%.*]] = add i64 12, [[FIRST_ACTIVE_LANE]] -; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE15:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP20]], i1 true) +; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE15:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP20]], i1 false) ; VF4IC4-NEXT: [[TMP29:%.*]] = add i64 8, [[FIRST_ACTIVE_LANE15]] ; VF4IC4-NEXT: [[TMP30:%.*]] = icmp ne i64 [[FIRST_ACTIVE_LANE15]], 4 ; VF4IC4-NEXT: [[TMP31:%.*]] = select i1 [[TMP30]], i64 [[TMP29]], i64 [[TMP28]] -; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE16:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP19]], i1 true) +; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE16:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP19]], i1 false) ; VF4IC4-NEXT: [[TMP32:%.*]] = add i64 4, [[FIRST_ACTIVE_LANE16]] ; VF4IC4-NEXT: [[TMP33:%.*]] = icmp ne i64 [[FIRST_ACTIVE_LANE16]], 4 ; VF4IC4-NEXT: [[TMP34:%.*]] = select i1 [[TMP33]], i64 [[TMP32]], i64 [[TMP31]] -; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE1:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP21]], i1 true) +; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE1:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP21]], i1 false) ; VF4IC4-NEXT: [[TMP35:%.*]] = add i64 0, [[FIRST_ACTIVE_LANE1]] ; VF4IC4-NEXT: [[TMP36:%.*]] = icmp ne i64 [[FIRST_ACTIVE_LANE1]], 4 ; VF4IC4-NEXT: [[TMP10:%.*]] = select i1 [[TMP36]], i64 [[TMP35]], i64 [[TMP34]] diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit.ll index 4fd8d17073de4..ae03f2426a800 100644 --- a/llvm/test/Transforms/LoopVectorize/single_early_exit.ll +++ b/llvm/test/Transforms/LoopVectorize/single_early_exit.ll @@ -424,7 +424,7 @@ define i64 @loop_guard_needed_to_prove_dereferenceable(i32 %x, i1 %cmp2) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP2]], i1 true) +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP2]], i1 false) ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], [[TMP7]] ; CHECK-NEXT: br label [[EXIT_LOOPEXIT]] ; CHECK: scalar.ph: @@ -572,7 +572,7 @@ define i64 @loop_guards_needed_to_prove_deref_multiple(i32 %x, i1 %c, ptr derefe ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[IV_NEXT]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 true) +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 false) ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], [[TMP9]] ; CHECK-NEXT: br label [[EXIT_LOOPEXIT]] ; CHECK: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll index 79821b8be1734..55682bc410527 100644 --- a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll +++ b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll @@ -32,7 +32,7 @@ define i64 @same_exit_block_pre_inc_use1() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 false) ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP10]] ; CHECK-NEXT: br label [[LOOP_END]] @@ -96,7 +96,7 @@ define i32 @same_exit_block_pre_inc_use1_iv64_endi32_step2() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 false) ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[TMP10]] to i32 ; CHECK-NEXT: [[TMP11:%.*]] = mul i32 [[DOTCAST]], 2 @@ -160,7 +160,7 @@ define i32 @same_exit_block_pre_inc_use1_iv128_endi32_step2() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 false) ; CHECK-NEXT: [[TMP8:%.*]] = zext i64 [[FIRST_ACTIVE_LANE]] to i128 ; CHECK-NEXT: [[TMP9:%.*]] = add i128 [[INDEX1]], [[TMP8]] ; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i128 [[TMP9]] to i32 @@ -226,7 +226,7 @@ define float @same_exit_block_pre_inc_use1_iv64_endf32() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 false) ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[TMP10]] to float ; CHECK-NEXT: [[TMP11:%.*]] = fmul fast float 1.000000e+00, [[DOTCAST]] @@ -294,7 +294,7 @@ define ptr @same_exit_block_pre_inc_use1_iv64_endptr() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP15]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP15]], i1 false) ; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 5 ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = getelementptr i8, ptr [[P2]], i64 [[TMP20]] @@ -357,7 +357,7 @@ define ptr @same_exit_block_pre_inc_use1_ivptr() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP11]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP11]], i1 false) ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = getelementptr i8, ptr [[P1]], i64 [[TMP8]] ; CHECK-NEXT: br label [[LOOP_END]] @@ -420,7 +420,7 @@ define i64 @same_exit_block_pre_inc1_use_inv_cond(i1 %cond) { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP7]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP7]], i1 false) ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP11]] ; CHECK-NEXT: br label [[LOOP_END]] @@ -485,7 +485,7 @@ define i64 @same_exit_block_pre_inc_use1_gep_two_indices() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 false) ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP10]] ; CHECK-NEXT: br label [[LOOP_END]] @@ -549,7 +549,7 @@ define i64 @same_exit_block_pre_inc_use1_alloca_diff_type() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 false) ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP10]] ; CHECK-NEXT: br label [[LOOP_END]] @@ -674,7 +674,7 @@ define i64 @same_exit_block_pre_inc_use3() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 false) ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP10]] ; CHECK-NEXT: br label [[LOOP_END]] @@ -739,7 +739,7 @@ define i64 @same_exit_block_pre_inc_use4() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 false) ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP8]] ; CHECK-NEXT: br label [[LOOP_END]] @@ -801,7 +801,7 @@ define i64 @same_exit_block_post_inc_use() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP13]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP13]], i1 false) ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP10]] ; CHECK-NEXT: br label [[LOOP_END]] @@ -861,7 +861,7 @@ define ptr @same_exit_block_post_inc_use1_ivptr() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP15]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP15]], i1 false) ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 1 ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = getelementptr i8, ptr [[P1]], i64 [[TMP9]] @@ -922,7 +922,7 @@ define i64 @same_exit_block_post_inc_use2() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP17]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP17]], i1 false) ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 1 ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP11]] @@ -987,7 +987,7 @@ define i64 @diff_exit_block_pre_inc_use1() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 false) ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP10]] ; CHECK-NEXT: br label [[LOOP:%.*]] @@ -1122,7 +1122,7 @@ define i64 @diff_exit_block_pre_inc_use3() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 false) ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX2]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP10]] ; CHECK-NEXT: br label [[LOOP:%.*]] @@ -1189,7 +1189,7 @@ define i64 @diff_exit_block_post_inc_use1() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP13]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP13]], i1 false) ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP10]] ; CHECK-NEXT: br label [[LOOP:%.*]] @@ -1258,7 +1258,7 @@ define i64 @diff_exit_block_post_inc_use2() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP17]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP17]], i1 false) ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 1 ; CHECK-NEXT: [[TMP21:%.*]] = add i64 3, [[TMP11]] @@ -1330,7 +1330,7 @@ define i64 @diff_exit_block_post_inc_use3(i64 %start) { ; CHECK-NEXT: [[IND_ESCAPE:%.*]] = sub i64 [[TMP0]], 1 ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP19]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP19]], i1 false) ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[TMP11]], 1 ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 [[START]], [[TMP12]] @@ -1401,7 +1401,7 @@ define i64 @loop_contains_safe_call() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP5]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP5]], i1 false) ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP9]] ; CHECK-NEXT: br label [[LOOP_END]] @@ -1463,7 +1463,7 @@ define i64 @loop_contains_safe_div() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP5]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP5]], i1 false) ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP9]] ; CHECK-NEXT: br label [[LOOP_END]] @@ -1526,7 +1526,7 @@ define i64 @loop_contains_load_after_early_exit(ptr dereferenceable(1024) align( ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 false) ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP11]] ; CHECK-NEXT: br label [[LOOP_END]] @@ -1594,7 +1594,7 @@ define i64 @same_exit_block_pre_inc_use1_reverse() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP8]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP8]], i1 false) ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = sub i64 1023, [[TMP12]] ; CHECK-NEXT: br label [[LOOP_END:%.*]] @@ -1719,7 +1719,7 @@ define i64 @same_exit_block_pre_inc_use1_deref_ptrs(ptr dereferenceable(1024) %p ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 false) ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP10]] ; CHECK-NEXT: br label [[LOOP_END]] diff --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll index 8da1dca52e87b..ef4d5c6d66700 100644 --- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll +++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll @@ -127,7 +127,7 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer ; VF8UF1: [[MIDDLE_BLOCK]]: ; VF8UF1-NEXT: br label %[[EXIT:.*]] ; VF8UF1: [[VECTOR_EARLY_EXIT]]: -; VF8UF1-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP3]], i1 true) +; VF8UF1-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP3]], i1 false) ; VF8UF1-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], [[FIRST_ACTIVE_LANE]] ; VF8UF1-NEXT: br label %[[EXIT]] ; VF8UF1: [[EXIT]]: @@ -156,9 +156,9 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer ; VF8UF2: [[MIDDLE_BLOCK]]: ; VF8UF2-NEXT: br label %[[EXIT:.*]] ; VF8UF2: [[VECTOR_EARLY_EXIT]]: -; VF8UF2-NEXT: [[TMP5:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP2]], i1 true) +; VF8UF2-NEXT: [[TMP5:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP2]], i1 false) ; VF8UF2-NEXT: [[TMP7:%.*]] = add i64 8, [[TMP5]] -; VF8UF2-NEXT: [[TMP8:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP1]], i1 true) +; VF8UF2-NEXT: [[TMP8:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP1]], i1 false) ; VF8UF2-NEXT: [[TMP9:%.*]] = add i64 0, [[TMP8]] ; VF8UF2-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP8]], 8 ; VF8UF2-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i64 [[TMP9]], i64 [[TMP7]] @@ -185,7 +185,7 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer ; VF16UF1: [[MIDDLE_BLOCK]]: ; VF16UF1-NEXT: br label %[[EXIT:.*]] ; VF16UF1: [[VECTOR_EARLY_EXIT]]: -; VF16UF1-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v16i1(<16 x i1> [[TMP3]], i1 true) +; VF16UF1-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v16i1(<16 x i1> [[TMP3]], i1 false) ; VF16UF1-NEXT: [[TMP5:%.*]] = add i64 0, [[FIRST_ACTIVE_LANE]] ; VF16UF1-NEXT: br label %[[EXIT]] ; VF16UF1: [[EXIT]]: diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll index aea9a80ba6dd0..a727973b43511 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll @@ -28,7 +28,7 @@ define i64 @std_find_i16_constant_offset_with_assumptions(ptr %first.coerce, i16 ; CHECK: [[MIDDLE_SPLIT]]: ; CHECK-NEXT: br i1 [[TMP2]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[RETURN:.*]] ; CHECK: [[VECTOR_EARLY_EXIT]]: -; CHECK-NEXT: [[TMP5:%.*]] = tail call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP0]], i1 true) +; CHECK-NEXT: [[TMP5:%.*]] = tail call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP0]], i1 false) ; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 1 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[FIRST_COERCE]], i64 [[TMP7]] @@ -149,13 +149,14 @@ define ptr @std_find_caller(ptr noundef %first, ptr noundef %last) { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[LOOP_HEADER_I_PREHEADER2:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP3]], -8 -; CHECK: [[TMP9:%.*]] = getelementptr -; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[XTRAITER]], 1 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[PROL_ITER_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[NEXT_GEP]], align 2 +; CHECK-NEXT: [[OFFSET_IDX1:%.*]] = shl i64 [[INDEX]], 1 +; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[OFFSET_IDX1]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[NEXT_GEP1]], align 2 ; CHECK-NEXT: [[WIDE_LOAD_FR:%.*]] = freeze <8 x i16> [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <8 x i16> [[WIDE_LOAD_FR]], splat (i16 1) ; CHECK-NEXT: [[PROL_ITER_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -170,10 +171,10 @@ define ptr @std_find_caller(ptr noundef %first, ptr noundef %last) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[XTRAITER]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[STD_FIND_GENERIC_IMPL_EXIT]], label %[[LOOP_HEADER_I_PREHEADER2]] ; CHECK: [[LOOP_HEADER_I_PREHEADER2]]: -; CHECK-NEXT: [[PTR_IV_I_PH:%.*]] = phi ptr [ [[FIRST]], %[[LOOP_HEADER_I_PREHEADER]] ], [ [[TMP9]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[PTR_IV_I_PH:%.*]] = phi ptr [ [[FIRST]], %[[LOOP_HEADER_I_PREHEADER]] ], [ [[NEXT_GEP]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER_I:.*]] ; CHECK: [[VECTOR_EARLY_EXIT]]: -; CHECK-NEXT: [[TMP11:%.*]] = tail call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP4]], i1 true) +; CHECK-NEXT: [[TMP11:%.*]] = tail call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP4]], i1 false) ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], [[TMP11]] ; CHECK-NEXT: [[TMP13:%.*]] = shl i64 [[TMP12]], 1 ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[TMP13]] diff --git a/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu-templates.s b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu-templates.s new file mode 100644 index 0000000000000..d7afebe6e5e55 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu-templates.s @@ -0,0 +1,17 @@ +// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16,+wavefrontsize32 -show-encoding %s | FileCheck --check-prefix=GFX11 %s + +// INSTS= +// v_ceil_f32 OPS32 +// v_cos_f32 OPS32 +// +// SRC32= +// v1 # A comment. +// 0.5 +// +// OPS32= +// v5, SRC32 +// v255, 0xaf123456 + +v_bfrev_b32 v5, v1 +// GFX11: v_bfrev_b32_e32 v5, v1 ; encoding: [0x01,0x71,0x0a,0x7e] diff --git a/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu-templates.s.expected b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu-templates.s.expected new file mode 100644 index 0000000000000..21ee43a8a06a3 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu-templates.s.expected @@ -0,0 +1,32 @@ +// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16,+wavefrontsize32 -show-encoding %s | FileCheck --check-prefix=GFX11 %s + +// INSTS= +// v_ceil_f32 OPS32 +// v_cos_f32 OPS32 +// +// SRC32= +// v1 # A comment. +// 0.5 +// +// OPS32= +// v5, SRC32 +// v255, 0xaf123456 + +v_ceil_f32 v5, v1 +// GFX11: v_ceil_f32_e32 v5, v1 ; encoding: [0x01,0x45,0x0a,0x7e] + +v_ceil_f32 v5, 0.5 +// GFX11: v_ceil_f32_e32 v5, 0.5 ; encoding: [0xf0,0x44,0x0a,0x7e] + +v_ceil_f32 v255, 0xaf123456 +// GFX11: v_ceil_f32_e32 v255, 0xaf123456 ; encoding: [0xff,0x44,0xfe,0x7f,0x56,0x34,0x12,0xaf] + +v_cos_f32 v5, v1 +// GFX11: v_cos_f32_e32 v5, v1 ; encoding: [0x01,0x6d,0x0a,0x7e] + +v_cos_f32 v5, 0.5 +// GFX11: v_cos_f32_e32 v5, 0.5 ; encoding: [0xf0,0x6c,0x0a,0x7e] + +v_cos_f32 v255, 0xaf123456 +// GFX11: v_cos_f32_e32 v255, 0xaf123456 ; encoding: [0xff,0x6c,0xfe,0x7f,0x56,0x34,0x12,0xaf] diff --git a/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/amdgpu-templates.test b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/amdgpu-templates.test new file mode 100644 index 0000000000000..6dfdb985d8cdb --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/amdgpu-templates.test @@ -0,0 +1,5 @@ +# REQUIRES: amdgpu-registered-target +## Test expanding instruction templates. + +# RUN: cp -f %S/Inputs/amdgpu-templates.s %t.s && %update_mc_test_checks %t.s +# RUN: diff -u %S/Inputs//amdgpu-templates.s.expected %t.s diff --git a/llvm/utils/lit/examples/many-tests/ManyTests.py b/llvm/utils/lit/examples/many-tests/ManyTests.py index 89e818a037c39..ffdbbad5a77b1 100644 --- a/llvm/utils/lit/examples/many-tests/ManyTests.py +++ b/llvm/utils/lit/examples/many-tests/ManyTests.py @@ -1,4 +1,5 @@ -from lit import Test, TestFormat +from lit import Test +from lit.formats import TestFormat class ManyTests(TestFormat): diff --git a/llvm/utils/update_mc_test_checks.py b/llvm/utils/update_mc_test_checks.py index 363278d1b1f97..9b80267e8ad8c 100755 --- a/llvm/utils/update_mc_test_checks.py +++ b/llvm/utils/update_mc_test_checks.py @@ -29,6 +29,11 @@ ] +class Error(Exception): + def __init__(self, test_info, line_no, msg): + super().__init__(f"{test_info.path}:{line_no}: {msg}") + + def invoke_tool(exe, check_rc, cmd_args, testline, verbose=False): substs = SUBSTITUTIONS + [(t, exe) for t in mc_LIKE_TOOLS] args = [common.applySubstitutions(cmd, substs) for cmd in cmd_args.split("|")] @@ -125,6 +130,62 @@ def getErrCheckLine(prefix, output, mc_mode, line_offset=1): ) +def parse_token_defs(test_info): + tokens = {} + current_token = None + for line_no, line in enumerate(test_info.input_lines, start=1): + # Remove comments. + line = line.split("#")[0].rstrip() + + # Skip everything up to the instructions definition. + if not tokens and not current_token and line != "// INSTS=": + continue + + if not line.startswith("//"): + break + + original_len = len(line) + line = line[2:].lstrip(" ") + indent = original_len - len(line) + + if not line: + current_token = None + continue + + # Define a new token. + if not current_token: + if indent != 4 or not line.endswith("="): + raise Error(test_info, line_no, "token definition expected") + + current_token = line[:-1].strip() + if current_token in tokens: + raise Error(test_info, line_no, f"'{current_token}' redefined") + + tokens[current_token] = [] + continue + + # Add token value. + if indent != 8: + raise Error(test_info, line_no, "wrong indentation for token value") + + tokens[current_token].append(line) + + return tokens + + +def expand_insts(tokens): + def subst(s): + for token, values in tokens.items(): + if token in s: + for value in values: + yield from subst(s.replace(token, value, 1)) + return + + yield s + + yield from subst("INSTS") + + def update_test(ti: common.TestInfo): if ti.path.endswith(".s"): mc_mode = "asm" @@ -209,6 +270,14 @@ def update_test(ti: common.TestInfo): testlines = list(dict.fromkeys(testlines)) common.debug("Valid test line found: ", len(testlines)) + # Where instruction templates are specified, use them instead. + use_asm_templates = False + if mc_mode == "asm": + tokens = parse_token_defs(ti) + if "INSTS" in tokens: + testlines = list(expand_insts(tokens)) + use_asm_templates = True + raw_output = [] raw_prefixes = [] for ( @@ -244,7 +313,6 @@ def update_test(ti: common.TestInfo): raw_prefixes.append(prefixes) - output_lines = [] generated_prefixes = {} sort_keys = {} used_prefixes = set() @@ -321,14 +389,32 @@ def update_test(ti: common.TestInfo): generated_prefixes[input_line] = "\n".join(check_lines) # write output - for input_info in ti.iterlines(output_lines): - input_line = input_info.line - if input_line in testlines: - output_lines.append(input_line) - output_lines.append(generated_prefixes[input_line]) - - elif should_add_line_to_output(input_line, prefix_set, mc_mode): - output_lines.append(input_line) + output_lines = [] + if use_asm_templates: + # Keep all leading comments and empty lines. + for input_info in ti.iterlines(output_lines): + input_line = input_info.line + if not input_line or input_line.startswith(COMMENT[mc_mode]): + output_lines.append(input_line) + continue + break + + # Remove tail empty lines. + while not output_lines[-1]: + del output_lines[-1] + + # Emit test and check lines. + for input_line in testlines: + output_lines.extend(["", input_line, generated_prefixes[input_line]]) + else: + for input_info in ti.iterlines(output_lines): + input_line = input_info.line + if input_line in testlines: + output_lines.append(input_line) + output_lines.append(generated_prefixes[input_line]) + + elif should_add_line_to_output(input_line, prefix_set, mc_mode): + output_lines.append(input_line) if ti.args.unique or ti.args.sort: # split with double newlines diff --git a/mlir/lib/Dialect/MemRef/Transforms/FoldMemRefAliasOps.cpp b/mlir/lib/Dialect/MemRef/Transforms/FoldMemRefAliasOps.cpp index 214410f78e51c..3667fdb2bb728 100644 --- a/mlir/lib/Dialect/MemRef/Transforms/FoldMemRefAliasOps.cpp +++ b/mlir/lib/Dialect/MemRef/Transforms/FoldMemRefAliasOps.cpp @@ -347,28 +347,55 @@ LogicalResult LoadOpOfExpandShapeOpFolder::matchAndRewrite( loadOp.getLoc(), rewriter, expandShapeOp, indices, sourceIndices, isa(loadOp.getOperation())))) return failure(); - llvm::TypeSwitch(loadOp) + + return llvm::TypeSwitch(loadOp) .Case([&](affine::AffineLoadOp op) { rewriter.replaceOpWithNewOp( loadOp, expandShapeOp.getViewSource(), sourceIndices); + return success(); }) .Case([&](memref::LoadOp op) { rewriter.replaceOpWithNewOp( loadOp, expandShapeOp.getViewSource(), sourceIndices, op.getNontemporal()); + return success(); }) .Case([&](vector::LoadOp op) { rewriter.replaceOpWithNewOp( op, op.getType(), expandShapeOp.getViewSource(), sourceIndices, op.getNontemporal()); + return success(); }) .Case([&](vector::MaskedLoadOp op) { rewriter.replaceOpWithNewOp( op, op.getType(), expandShapeOp.getViewSource(), sourceIndices, op.getMask(), op.getPassThru()); + return success(); + }) + .Case([&](vector::TransferReadOp op) { + // We only support minor identity maps in the permutation attribute. + if (!op.getPermutationMap().isMinorIdentity()) + return failure(); + + // We only support the case where the source of the expand shape has + // rank greater than or equal to the vector rank. + const int64_t sourceRank = sourceIndices.size(); + const int64_t vectorRank = op.getVectorType().getRank(); + if (sourceRank < vectorRank) + return failure(); + + // We need to construct a new minor identity map since we will have lost + // some dimensions in folding away the expand shape. + auto minorIdMap = AffineMap::getMinorIdentityMap(sourceRank, vectorRank, + op.getContext()); + + rewriter.replaceOpWithNewOp( + op, op.getVectorType(), expandShapeOp.getViewSource(), + sourceIndices, minorIdMap, op.getPadding(), op.getMask(), + op.getInBounds()); + return success(); }) .DefaultUnreachable("unexpected operation"); - return success(); } template @@ -659,6 +686,7 @@ void memref::populateFoldMemRefAliasOpPatterns(RewritePatternSet &patterns) { LoadOpOfExpandShapeOpFolder, LoadOpOfExpandShapeOpFolder, LoadOpOfExpandShapeOpFolder, + LoadOpOfExpandShapeOpFolder, StoreOpOfExpandShapeOpFolder, StoreOpOfExpandShapeOpFolder, StoreOpOfExpandShapeOpFolder, diff --git a/mlir/lib/Dialect/Vector/IR/ScalableValueBoundsConstraintSet.cpp b/mlir/lib/Dialect/Vector/IR/ScalableValueBoundsConstraintSet.cpp index a26edac98ea8d..2986f4c2d607d 100644 --- a/mlir/lib/Dialect/Vector/IR/ScalableValueBoundsConstraintSet.cpp +++ b/mlir/lib/Dialect/Vector/IR/ScalableValueBoundsConstraintSet.cpp @@ -106,14 +106,12 @@ ScalableValueBoundsConstraintSet::computeScalableBound( AffineMap bound = [&] { if (boundType == BoundType::EQ && !invalidBound(lowerBound) && - lowerBound[0] == upperBound[0]) { + lowerBound[0] == upperBound[0]) return lowerBound[0]; - } - if (boundType == BoundType::LB && !invalidBound(lowerBound)) { + if (boundType == BoundType::LB && !invalidBound(lowerBound)) return lowerBound[0]; - } else if (boundType == BoundType::UB && !invalidBound(upperBound)) { + if (boundType == BoundType::UB && !invalidBound(upperBound)) return upperBound[0]; - } return AffineMap{}; }(); diff --git a/mlir/test/Dialect/MemRef/fold-memref-alias-ops.mlir b/mlir/test/Dialect/MemRef/fold-memref-alias-ops.mlir index 106652623933f..ca91b0141f593 100644 --- a/mlir/test/Dialect/MemRef/fold-memref-alias-ops.mlir +++ b/mlir/test/Dialect/MemRef/fold-memref-alias-ops.mlir @@ -992,6 +992,55 @@ func.func @fold_vector_maskedstore_expand_shape( // ----- +func.func @fold_vector_transfer_read_expand_shape( + %arg0 : memref<32xf32>, %arg1 : index) -> vector<8xf32> { + %c0 = arith.constant 0 : index + %pad = ub.poison : f32 + %0 = memref.expand_shape %arg0 [[0, 1]] output_shape [4, 8] : memref<32xf32> into memref<4x8xf32> + %1 = vector.transfer_read %0[%arg1, %c0], %pad {in_bounds = [true]} : memref<4x8xf32>, vector<8xf32> + return %1 : vector<8xf32> +} + +// CHECK-LABEL: func @fold_vector_transfer_read_expand_shape +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: memref<32xf32> +// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: index +// CHECK: %[[C0:.*]] = arith.constant 0 +// CHECK: %[[PAD:.*]] = ub.poison : f32 +// CHECK: %[[IDX:.*]] = affine.linearize_index [%[[ARG1]], %[[C0]]] by (4, 8) +// CHECK: vector.transfer_read %[[ARG0]][%[[IDX]]], %[[PAD]] {in_bounds = [true]} + +// ----- + +func.func @fold_vector_transfer_read_with_perm_map( + %arg0 : memref<32xf32>, %arg1 : index) -> vector<4x4xf32> { + %c0 = arith.constant 0 : index + %pad = ub.poison : f32 + %0 = memref.expand_shape %arg0 [[0, 1]] output_shape [4, 8] : memref<32xf32> into memref<4x8xf32> + %1 = vector.transfer_read %0[%arg1, %c0], %pad { permutation_map = affine_map<(d0, d1) -> (d1, d0)>, in_bounds = [true, true]} : memref<4x8xf32>, vector<4x4xf32> + return %1 : vector<4x4xf32> +} + +// CHECK-LABEL: func @fold_vector_transfer_read_with_perm_map +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: memref<32xf32> +// CHECK: memref.expand_shape %[[ARG0]] {{\[}}[0, 1]] output_shape [4, 8] : memref<32xf32> into memref<4x8xf32> + +// ----- + +func.func @fold_vector_transfer_read_rank_mismatch( + %arg0 : memref<32xf32>, %arg1 : index) -> vector<4x4xf32> { + %c0 = arith.constant 0 : index + %pad = ub.poison : f32 + %0 = memref.expand_shape %arg0 [[0, 1, 2]] output_shape [2, 4, 4] : memref<32xf32> into memref<2x4x4xf32> + %1 = vector.transfer_read %0[%arg1, %c0, %c0], %pad {in_bounds = [true, true]} : memref<2x4x4xf32>, vector<4x4xf32> + return %1 : vector<4x4xf32> +} + +// CHECK-LABEL: func @fold_vector_transfer_read_rank_mismatch +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: memref<32xf32> +// CHECK: memref.expand_shape %[[ARG0]] {{\[}}[0, 1, 2]] output_shape [2, 4, 4] : memref<32xf32> into memref<2x4x4xf32> + +// ----- + func.func @fold_vector_load_collapse_shape( %arg0 : memref<4x8xf32>, %arg1 : index) -> vector<8xf32> { %0 = memref.collapse_shape %arg0 [[0, 1]] : memref<4x8xf32> into memref<32xf32>