diff --git a/.github/workflows/containers/github-action-ci-windows/Dockerfile b/.github/workflows/containers/github-action-ci-windows/Dockerfile index 9ddf5017bc020..f1e4f1538540b 100644 --- a/.github/workflows/containers/github-action-ci-windows/Dockerfile +++ b/.github/workflows/containers/github-action-ci-windows/Dockerfile @@ -98,3 +98,45 @@ RUN powershell -Command \ Add-Type -AssemblyName System.IO.Compression.FileSystem ; \ [System.IO.Compression.ZipFile]::ExtractToDirectory('actions-runner-win.zip', $PWD) ;\ rm actions-runner-win.zip + +# Set the LLVM_VERSION environment variable +ENV LLVM_VERSION=21.1.2 + +# Download and extract Clang compiler. +# Create directories, download, extract, and clean up all in one layer +RUN powershell -Command \ + # --- Setup directories --- \ + Write-Host "Creating directories..."; \ + New-Item -Path "C:\temp-download" -ItemType "Directory" -Force ; \ + New-Item -Path "C:\xz-utils" -ItemType "Directory" -Force ; \ + New-Item -Path "C:\clang" -ItemType "Directory" -Force ; \ + # --- 1. Download and extract xz --- \ + Set-Location C:\temp-download ; \ + Invoke-WebRequest -Uri "http://github.com/tukaani-project/xz/releases/download/v5.8.1/xz-5.8.1-windows.zip" -OutFile "xz.zip"; \ + (Get-FileHash -Path "C:\temp-download\xz.zip" -Algorithm MD5).Hash -eq 'c3c69fdce3e825cc0b76123b36b0bcc2' ; \ + Add-Type -AssemblyName "System.IO.Compression.FileSystem"; \ + [System.IO.Compression.ZipFile]::ExtractToDirectory('C:\temp-download\xz.zip', 'C:\xz-utils'); \ + # --- 2. Download and decompress Clang --- \ + Invoke-WebRequest -Uri "http://github.com/llvm/llvm-project/releases/download/llvmorg-21.1.2/clang+llvm-21.1.2-x86_64-pc-windows-msvc.tar.xz" -OutFile "clang+llvm-21.1.2-x86_64-pc-windows-msvc.tar.xz" ; \ + (Get-FileHash -Path "C:\temp-download\clang+llvm-21.1.2-x86_64-pc-windows-msvc.tar.xz" -Algorithm MD5).Hash -eq '0ae1d3effd9ab9d323f7fa595777f0a2' ; \ + C:\xz-utils\bin_x86-64\xz.exe -d -qq clang+llvm-21.1.2-x86_64-pc-windows-msvc.tar.xz ; \ + # --- 3. Extract clang --- \ + C:\Windows\System32\tar.exe -xf clang+llvm-21.1.2-x86_64-pc-windows-msvc.tar -C C:\clang ; \ + # --- 4. Clean up --- \ + Set-Location C:\ ; \ + Remove-Item C:\temp-download -Recurse -Force; \ + Remove-Item C:\xz-utils -Recurse -Force; \ + # -- 5. Shorten path to clang files & remove unnecessary files -- \ + Set-Location C:\clang ; \ + Rename-Item -Path "C:\clang\clang+llvm-21.1.2-x86_64-pc-windows-msvc" -NewName "C:\clang\clang-msvc" ; \ + Set-Location C:\clang\clang-msvc ; \ + Remove-Item -Path C:\clang\clang-msvc\libexec -Recurse -Force ; \ + Remove-Item -Path C:\clang\clang-msvc\share -Recurse -Force ; \ + Rename-Item -Path "C:\clang\clang-msvc\bin" -NewName "C:\clang\clang-msvc\bin-full" ; \ + New-Item -Path "C:\clang\clang-msvc\bin" -ItemType Directory -Force ; \ + Set-Location C:\clang\clang-msvc\bin ; \ + Copy-Item -Path C:\clang\clang-msvc\bin-full\*.dll -Destination C:\clang\clang-msvc\bin\. ; \ + Copy-Item -Path C:\clang\clang-msvc\bin-full\clang-cl.exe -Destination C:\clang\clang-msvc\bin\. ; \ + Copy-Item -Path C:\clang\clang-msvc\bin-full\lld-link.exe -Destination C:\clang\clang-msvc\bin\. ; \ + Set-Location C:\clang\clang-msvc ; \ + Remove-Item -Path C:\clang\clang-msvc\bin-full -Recurse -Force ; diff --git a/clang/lib/Analysis/FlowSensitive/Models/UncheckedStatusOrAccessModel.cpp b/clang/lib/Analysis/FlowSensitive/Models/UncheckedStatusOrAccessModel.cpp index 22465e664cdd3..90551c22e0734 100644 --- a/clang/lib/Analysis/FlowSensitive/Models/UncheckedStatusOrAccessModel.cpp +++ b/clang/lib/Analysis/FlowSensitive/Models/UncheckedStatusOrAccessModel.cpp @@ -516,6 +516,18 @@ static void transferNotOkStatusCall(const CallExpr *Expr, State.Env.assume(A.makeNot(OkVal.formula())); } +static void transferEmplaceCall(const CXXMemberCallExpr *Expr, + const MatchFinder::MatchResult &, + LatticeTransferState &State) { + RecordStorageLocation *StatusOrLoc = + getImplicitObjectLocation(*Expr, State.Env); + if (StatusOrLoc == nullptr) + return; + + auto &OkVal = valForOk(locForStatus(*StatusOrLoc), State.Env); + State.Env.assume(OkVal.formula()); +} + CFGMatchSwitch buildTransferMatchSwitch(ASTContext &Ctx, CFGMatchSwitchBuilder Builder) { @@ -559,6 +571,8 @@ buildTransferMatchSwitch(ASTContext &Ctx, }) .CaseOfCFGStmt(isOkStatusCall(), transferOkStatusCall) .CaseOfCFGStmt(isNotOkStatusCall(), transferNotOkStatusCall) + .CaseOfCFGStmt(isStatusOrMemberCallWithName("emplace"), + transferEmplaceCall) .Build(); } diff --git a/clang/unittests/Analysis/FlowSensitive/UncheckedStatusOrAccessModelTestFixture.cpp b/clang/unittests/Analysis/FlowSensitive/UncheckedStatusOrAccessModelTestFixture.cpp index fff79e52e0aaa..425beb939a42a 100644 --- a/clang/unittests/Analysis/FlowSensitive/UncheckedStatusOrAccessModelTestFixture.cpp +++ b/clang/unittests/Analysis/FlowSensitive/UncheckedStatusOrAccessModelTestFixture.cpp @@ -2928,6 +2928,53 @@ TEST_P(UncheckedStatusOrAccessModelTest, PointerEqualityCheck) { )cc"); } +TEST_P(UncheckedStatusOrAccessModelTest, Emplace) { + ExpectDiagnosticsFor(R"cc( +#include "unchecked_statusor_access_test_defs.h" + + struct Foo { + Foo(int); + }; + + void target(absl::StatusOr sor, int value) { + sor.emplace(value); + sor.value(); + } + )cc"); + ExpectDiagnosticsFor(R"cc( +#include "unchecked_statusor_access_test_defs.h" + + struct Foo { + Foo(std::initializer_list, int); + }; + + void target(absl::StatusOr sor, int value) { + sor.emplace({1, 2, 3}, value); + sor.value(); + } + )cc"); + ExpectDiagnosticsFor(R"cc( +#include "unchecked_statusor_access_test_defs.h" + + void target() { + STATUSOR_INT sor; + bool sor_ok = sor.ok(); + if (!sor_ok) + sor.emplace(42); + sor.value(); + } + )cc"); + ExpectDiagnosticsFor(R"cc( +#include "unchecked_statusor_access_test_defs.h" + + void target(bool b) { + STATUSOR_INT sor; + if (b) sor.emplace(42); + if (b) sor.value(); + } + )cc"); +} + } // namespace std::string diff --git a/flang/examples/FeatureList/FeatureList.cpp b/flang/examples/FeatureList/FeatureList.cpp index daa012e3eb08b..225a6558ef956 100644 --- a/flang/examples/FeatureList/FeatureList.cpp +++ b/flang/examples/FeatureList/FeatureList.cpp @@ -451,6 +451,7 @@ struct NodeVisitor { READ_FEATURE(OmpBlockConstruct) READ_FEATURE(OmpClause) READ_FEATURE(OmpClauseList) + READ_FEATURE(OmpCombinerExpression) READ_FEATURE(OmpDefaultClause) READ_FEATURE(OmpDefaultClause::DataSharingAttribute) READ_FEATURE(OmpDefaultmapClause) @@ -496,7 +497,6 @@ struct NodeVisitor { READ_FEATURE(OmpProcBindClause::AffinityPolicy) READ_FEATURE(OmpReductionClause) READ_FEATURE(OmpInReductionClause) - READ_FEATURE(OmpReductionCombiner) READ_FEATURE(OmpInitializerClause) READ_FEATURE(OmpReductionIdentifier) READ_FEATURE(OmpAllocateClause) diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h index 5677277a9b381..af8152deb8a52 100644 --- a/flang/include/flang/Parser/dump-parse-tree.h +++ b/flang/include/flang/Parser/dump-parse-tree.h @@ -538,6 +538,7 @@ class ParseTreeDumper { NODE(parser, OmpClauseList) NODE(parser, OmpCloseModifier) NODE_ENUM(OmpCloseModifier, Value) + NODE(parser, OmpCombinerExpression) NODE(parser, OmpContainsClause) NODE(parser, OmpContextSelectorSpecification) NODE(parser, OmpDeclareVariantDirective) @@ -655,7 +656,6 @@ class ParseTreeDumper { NODE_ENUM(OmpProcBindClause, AffinityPolicy) NODE(parser, OmpReductionClause) NODE(OmpReductionClause, Modifier) - NODE(parser, OmpReductionCombiner) NODE(parser, OmpReductionIdentifier) NODE(parser, OmpReductionModifier) NODE_ENUM(OmpReductionModifier, Value) @@ -693,8 +693,8 @@ class ParseTreeDumper { NODE(parser, OmpTraitSetSelectorName) NODE_ENUM(OmpTraitSetSelectorName, Value) NODE(parser, OmpTransparentClause) + NODE(parser, OmpTypeName) NODE(parser, OmpTypeNameList) - NODE(parser, OmpTypeSpecifier) NODE(parser, OmpUnifiedAddressClause) NODE(parser, OmpUnifiedSharedMemoryClause) NODE(parser, OmpUpdateClause) diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index 6dd4f2492cf22..be64ef3770c60 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -3502,6 +3502,16 @@ struct OmpDirectiveName { llvm::omp::Directive v{llvm::omp::Directive::OMPD_unknown}; }; +// type-name list item +struct OmpTypeName { + UNION_CLASS_BOILERPLATE(OmpTypeName); + std::variant u; +}; + +struct OmpTypeNameList { + WRAPPER_CLASS_BOILERPLATE(OmpTypeNameList, std::list); +}; + // 2.1 Directives or clauses may accept a list or extended-list. // A list item is a variable, array section or common block name (enclosed // in slashes). An extended list item is a list item or a procedure Name. @@ -3539,21 +3549,12 @@ struct OmpReductionIdentifier { // combiner-expression -> // since 4.5 // assignment-statement | // function-reference -struct OmpReductionCombiner { - UNION_CLASS_BOILERPLATE(OmpReductionCombiner); +struct OmpCombinerExpression { + UNION_CLASS_BOILERPLATE(OmpCombinerExpression); std::variant u; }; inline namespace arguments { -struct OmpTypeSpecifier { - UNION_CLASS_BOILERPLATE(OmpTypeSpecifier); - std::variant u; -}; - -struct OmpTypeNameList { - WRAPPER_CLASS_BOILERPLATE(OmpTypeNameList, std::list); -}; - struct OmpLocator { UNION_CLASS_BOILERPLATE(OmpLocator); std::variant u; @@ -3596,7 +3597,7 @@ struct OmpMapperSpecifier { struct OmpReductionSpecifier { TUPLE_CLASS_BOILERPLATE(OmpReductionSpecifier); std::tuple> + std::optional> t; }; diff --git a/flang/include/flang/Semantics/openmp-utils.h b/flang/include/flang/Semantics/openmp-utils.h index 7539d12264435..032944d8be370 100644 --- a/flang/include/flang/Semantics/openmp-utils.h +++ b/flang/include/flang/Semantics/openmp-utils.h @@ -28,6 +28,7 @@ #include namespace Fortran::semantics { +class Scope; class SemanticsContext; class Symbol; diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index 6b02fefb92196..39bac818fe5d0 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -3106,7 +3106,9 @@ IntrinsicLibrary::genAtomicCas(mlir::Type resultType, .getResult(0); auto cmpxchg = mlir::LLVM::AtomicCmpXchgOp::create( builder, loc, address, arg1, arg2, successOrdering, failureOrdering); - return mlir::LLVM::ExtractValueOp::create(builder, loc, cmpxchg, 1); + mlir::Value boolResult = + mlir::LLVM::ExtractValueOp::create(builder, loc, cmpxchg, 1); + return builder.createConvert(loc, resultType, boolResult); } mlir::Value IntrinsicLibrary::genAtomicDec(mlir::Type resultType, diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index c0472ad3c0692..d1e081cfd1b41 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -367,8 +367,8 @@ struct OmpArgumentListParser { }; TYPE_PARSER( // - construct(Parser{}) || - construct(Parser{})) + construct(Parser{}) || + construct(Parser{})) // 2.15.3.6 REDUCTION (reduction-identifier: variable-name-list) TYPE_PARSER(construct(Parser{}) || @@ -376,8 +376,8 @@ TYPE_PARSER(construct(Parser{}) || TYPE_PARSER(construct( // Parser{}, - ":"_tok >> nonemptyList(Parser{}), - maybe(":"_tok >> Parser{}))) + ":"_tok >> nonemptyList(Parser{}), + maybe(":"_tok >> Parser{}))) // --- Parsers for context traits ------------------------------------- @@ -1832,8 +1832,8 @@ TYPE_PARSER(sourced(construct( IsDirective(llvm::omp::Directive::OMPD_declare_mapper)) >= Parser{}))) -TYPE_PARSER(construct(Parser{}) || - construct(Parser{})) +TYPE_PARSER(construct(Parser{}) || + construct(Parser{})) TYPE_PARSER(sourced(construct( OmpBlockConstructParser{llvm::omp::Directive::OMPD_critical}))) diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp index b172e429c84e8..2f86c76c5fe03 100644 --- a/flang/lib/Parser/unparse.cpp +++ b/flang/lib/Parser/unparse.cpp @@ -2111,7 +2111,7 @@ class UnparseVisitor { Walk(std::get(x.t)); Put(":"); Walk(std::get(x.t)); - Walk(": ", std::get>(x.t)); + Walk(": ", std::get>(x.t)); } void Unparse(const llvm::omp::Directive &x) { unsigned ompVersion{langOpts_.OpenMPVersion}; @@ -2519,7 +2519,7 @@ class UnparseVisitor { Walk(x.u); } } - void Unparse(const OmpReductionCombiner &x) { + void Unparse(const OmpCombinerExpression &x) { // Don't let the visitor go to the normal AssignmentStmt Unparse function, // it adds an extra newline that we don't want. if (const auto *assignment{std::get_if(&x.u)}) { diff --git a/flang/lib/Semantics/openmp-utils.cpp b/flang/lib/Semantics/openmp-utils.cpp index cc55bb4954cc3..6b304b62ef867 100644 --- a/flang/lib/Semantics/openmp-utils.cpp +++ b/flang/lib/Semantics/openmp-utils.cpp @@ -26,7 +26,9 @@ #include "flang/Parser/openmp-utils.h" #include "flang/Parser/parse-tree.h" #include "flang/Semantics/expression.h" +#include "flang/Semantics/scope.h" #include "flang/Semantics/semantics.h" +#include "flang/Semantics/symbol.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" @@ -505,5 +507,4 @@ bool IsStrictlyStructuredBlock(const parser::Block &block) { return false; } } - } // namespace Fortran::semantics::omp diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp index db75437708a6c..4af6cf6a91239 100644 --- a/flang/lib/Semantics/resolve-names.cpp +++ b/flang/lib/Semantics/resolve-names.cpp @@ -1772,11 +1772,11 @@ class OmpVisitor : public virtual DeclarationVisitor { messageHandler().set_currStmtSource(std::nullopt); } - bool Pre(const parser::OmpTypeSpecifier &x) { + bool Pre(const parser::OmpTypeName &x) { BeginDeclTypeSpec(); return true; } - void Post(const parser::OmpTypeSpecifier &x) { // + void Post(const parser::OmpTypeName &x) { // EndDeclTypeSpec(); } @@ -2007,7 +2007,7 @@ void OmpVisitor::ProcessReductionSpecifier( } } EndDeclTypeSpec(); - Walk(std::get>(spec.t)); + Walk(std::get>(spec.t)); Walk(clauses); PopScope(); } diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf index 7d6caf58d71b3..5c4c3c6d39820 100644 --- a/flang/test/Lower/CUDA/cuda-device-proc.cuf +++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf @@ -479,3 +479,16 @@ end subroutine ! CHECK-LABEL: func.func @_QPtest_bulk_s2g ! CHECL: nvvm.cp.async.bulk.global.shared.cta %{{.*}}, %{{.*}}, %{{.*}} : <1>, <3> + +attributes(device) subroutine testAtomicCasLoop(aa, n) + integer :: a + do while (atomiccas(a, 0, 1) == 1) + end do +end subroutine + +! CHECK-LABEL: func.func @_QPtestatomiccasloop +! CHECK: %[[CMP_XCHG:.*]] = llvm.cmpxchg %15, %c0_i32, %c1_i32 acq_rel monotonic : !llvm.ptr, i32 +! CHECK: %[[CMP_XCHG_EV:.*]] = llvm.extractvalue %[[CMP_XCHG]][1] : !llvm.struct<(i32, i1)> +! CHECK: %[[CASTED_CMP_XCHG_EV:.*]] = fir.convert %[[CMP_XCHG_EV]] : (i1) -> i32 +! CHECK: %{{.*}} = arith.constant 1 : i32 +! CHECK: %19 = arith.cmpi eq, %[[CASTED_CMP_XCHG_EV]], %{{.*}} : i32 diff --git a/flang/test/Parser/OpenMP/declare-reduction-multi.f90 b/flang/test/Parser/OpenMP/declare-reduction-multi.f90 index 0af3ed6e78571..a682958eb9128 100644 --- a/flang/test/Parser/OpenMP/declare-reduction-multi.f90 +++ b/flang/test/Parser/OpenMP/declare-reduction-multi.f90 @@ -32,9 +32,9 @@ program omp_examples !PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = declare reduction !PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpReductionSpecifier !PARSE-TREE: | | OmpReductionIdentifier -> DefinedOperator -> IntrinsicOperator = Add -!PARSE-TREE: | | OmpTypeNameList -> OmpTypeSpecifier -> TypeSpec -> DerivedTypeSpec +!PARSE-TREE: | | OmpTypeNameList -> OmpTypeName -> TypeSpec -> DerivedTypeSpec !PARSE-TREE: | | | Name = 'tt' -!PARSE-TREE: | | OmpReductionCombiner -> AssignmentStmt = 'omp_out%r=omp_out%r+omp_in%r' +!PARSE-TREE: | | OmpCombinerExpression -> AssignmentStmt = 'omp_out%r=omp_out%r+omp_in%r' !PARSE-TREE: | OmpClauseList -> OmpClause -> Initializer -> OmpInitializerClause -> AssignmentStmt = 'omp_priv%r=0._4' !$omp declare reduction(*:tt:omp_out%r = omp_out%r * omp_in%r) initializer(omp_priv%r = 1) @@ -44,9 +44,9 @@ program omp_examples !PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = declare reduction !PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpReductionSpecifier !PARSE-TREE: | | OmpReductionIdentifier -> DefinedOperator -> IntrinsicOperator = Multiply -!PARSE-TREE: | | OmpTypeNameList -> OmpTypeSpecifier -> TypeSpec -> DerivedTypeSpec +!PARSE-TREE: | | OmpTypeNameList -> OmpTypeName -> TypeSpec -> DerivedTypeSpec !PARSE-TREE: | | | Name = 'tt' -!PARSE-TREE: | | OmpReductionCombiner -> AssignmentStmt = 'omp_out%r=omp_out%r*omp_in%r' +!PARSE-TREE: | | OmpCombinerExpression -> AssignmentStmt = 'omp_out%r=omp_out%r*omp_in%r' !PARSE-TREE: | OmpClauseList -> OmpClause -> Initializer -> OmpInitializerClause -> AssignmentStmt = 'omp_priv%r=1._4' !$omp declare reduction(max:tt:omp_out = mymax(omp_out, omp_in)) initializer(omp_priv%r = 0) @@ -56,9 +56,9 @@ program omp_examples !PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = declare reduction !PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpReductionSpecifier !PARSE-TREE: | | OmpReductionIdentifier -> ProcedureDesignator -> Name = 'max' -!PARSE-TREE: | | OmpTypeNameList -> OmpTypeSpecifier -> TypeSpec -> DerivedTypeSpec +!PARSE-TREE: | | OmpTypeNameList -> OmpTypeName -> TypeSpec -> DerivedTypeSpec !PARSE-TREE: | | | Name = 'tt' -!PARSE-TREE: | | OmpReductionCombiner -> AssignmentStmt = 'omp_out=mymax(omp_out,omp_in)' +!PARSE-TREE: | | OmpCombinerExpression -> AssignmentStmt = 'omp_out=mymax(omp_out,omp_in)' !PARSE-TREE: | OmpClauseList -> OmpClause -> Initializer -> OmpInitializerClause -> AssignmentStmt = 'omp_priv%r=0._4' !$omp declare reduction(min:tt:omp_out%r = min(omp_out%r, omp_in%r)) initializer(omp_priv%r = 1) @@ -68,9 +68,9 @@ program omp_examples !PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = declare reduction !PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpReductionSpecifier !PARSE-TREE: | | OmpReductionIdentifier -> ProcedureDesignator -> Name = 'min' -!PARSE-TREE: | | OmpTypeNameList -> OmpTypeSpecifier -> TypeSpec -> DerivedTypeSpec +!PARSE-TREE: | | OmpTypeNameList -> OmpTypeName -> TypeSpec -> DerivedTypeSpec !PARSE-TREE: | | | Name = 'tt' -!PARSE-TREE: | | OmpReductionCombiner -> AssignmentStmt = 'omp_out%r=min(omp_out%r,omp_in%r)' +!PARSE-TREE: | | OmpCombinerExpression -> AssignmentStmt = 'omp_out%r=min(omp_out%r,omp_in%r)' !PARSE-TREE: | OmpClauseList -> OmpClause -> Initializer -> OmpInitializerClause -> AssignmentStmt = 'omp_priv%r=1._4' call random_number(values%r) diff --git a/flang/test/Parser/OpenMP/declare-reduction-operator.f90 b/flang/test/Parser/OpenMP/declare-reduction-operator.f90 index 347588468617b..e4d07c8265b1e 100644 --- a/flang/test/Parser/OpenMP/declare-reduction-operator.f90 +++ b/flang/test/Parser/OpenMP/declare-reduction-operator.f90 @@ -22,9 +22,9 @@ subroutine reduce_1 ( n, tts ) !PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = declare reduction !PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpReductionSpecifier !PARSE-TREE: | | OmpReductionIdentifier -> DefinedOperator -> IntrinsicOperator = Add -!PARSE-TREE: | | OmpTypeNameList -> OmpTypeSpecifier -> TypeSpec -> DerivedTypeSpec +!PARSE-TREE: | | OmpTypeNameList -> OmpTypeName -> TypeSpec -> DerivedTypeSpec !PARSE-TREE: | | | Name = 'tt' -!PARSE-TREE: | | OmpReductionCombiner -> AssignmentStmt = 'omp_out=tt(x=omp_out%x-omp_in%x,y=omp_out%y-omp_in%y)' +!PARSE-TREE: | | OmpCombinerExpression -> AssignmentStmt = 'omp_out=tt(x=omp_out%x-omp_in%x,y=omp_out%y-omp_in%y)' !PARSE-TREE: | OmpClauseList -> OmpClause -> Initializer -> OmpInitializerClause -> AssignmentStmt = 'omp_priv=tt(x=0_4,y=0_4)' !$omp declare reduction(+ : tt : omp_out = tt(omp_out%x - omp_in%x , omp_out%y - omp_in%y)) initializer(omp_priv = tt(0,0)) @@ -36,9 +36,9 @@ subroutine reduce_1 ( n, tts ) !PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = declare reduction !PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpReductionSpecifier !PARSE-TREE: | | OmpReductionIdentifier -> DefinedOperator -> IntrinsicOperator = Add -!PARSE-TREE: | | OmpTypeNameList -> OmpTypeSpecifier -> TypeSpec -> DerivedTypeSpec +!PARSE-TREE: | | OmpTypeNameList -> OmpTypeName -> TypeSpec -> DerivedTypeSpec !PARSE-TREE: | | | Name = 'tt2' -!PARSE-TREE: | | OmpReductionCombiner -> AssignmentStmt = 'omp_out=tt2(x=omp_out%x-omp_in%x,y=omp_out%y-omp_in%y)' +!PARSE-TREE: | | OmpCombinerExpression -> AssignmentStmt = 'omp_out=tt2(x=omp_out%x-omp_in%x,y=omp_out%y-omp_in%y)' !PARSE-TREE: | OmpClauseList -> OmpClause -> Initializer -> OmpInitializerClause -> AssignmentStmt = 'omp_priv=tt2(x=0._8,y=0._8)' !$omp declare reduction(+ :tt2 : omp_out = tt2(omp_out%x - omp_in%x , omp_out%y - omp_in%y)) initializer(omp_priv = tt2(0,0)) diff --git a/flang/test/Parser/OpenMP/declare-reduction-unparse.f90 b/flang/test/Parser/OpenMP/declare-reduction-unparse.f90 index 7514f0cf83877..73d7ccf489f01 100644 --- a/flang/test/Parser/OpenMP/declare-reduction-unparse.f90 +++ b/flang/test/Parser/OpenMP/declare-reduction-unparse.f90 @@ -25,9 +25,9 @@ end subroutine initme !PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = declare reduction !PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpReductionSpecifier !PARSE-TREE: | | OmpReductionIdentifier -> ProcedureDesignator -> Name = 'red_add' -!PARSE-TREE: | | OmpTypeNameList -> OmpTypeSpecifier -> DeclarationTypeSpec -> IntrinsicTypeSpec -> IntegerTypeSpec -> KindSelector -> Scalar -> Integer -> Constant -> Expr = '4_4' +!PARSE-TREE: | | OmpTypeNameList -> OmpTypeName -> DeclarationTypeSpec -> IntrinsicTypeSpec -> IntegerTypeSpec -> KindSelector -> Scalar -> Integer -> Constant -> Expr = '4_4' !PARSE-TREE: | | | LiteralConstant -> IntLiteralConstant = '4' -!PARSE-TREE: | | OmpReductionCombiner -> AssignmentStmt = 'omp_out=omp_out+omp_in' +!PARSE-TREE: | | OmpCombinerExpression -> AssignmentStmt = 'omp_out=omp_out+omp_in' !PARSE-TREE: | OmpClauseList -> OmpClause -> Initializer -> OmpInitializerClause -> OmpInitializerProc !PARSE-TREE: | | ProcedureDesignator -> Name = 'initme' @@ -73,6 +73,6 @@ end program main !PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = declare reduction !PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpReductionSpecifier !PARSE-TREE: | | OmpReductionIdentifier -> ProcedureDesignator -> Name = 'my_add_red' -!PARSE-TREE: | | OmpTypeNameList -> OmpTypeSpecifier -> DeclarationTypeSpec -> IntrinsicTypeSpec -> IntegerTypeSpec -> -!PARSE-TREE: | | OmpReductionCombiner -> AssignmentStmt = 'omp_out=omp_out+omp_in' +!PARSE-TREE: | | OmpTypeNameList -> OmpTypeName -> DeclarationTypeSpec -> IntrinsicTypeSpec -> IntegerTypeSpec -> +!PARSE-TREE: | | OmpCombinerExpression -> AssignmentStmt = 'omp_out=omp_out+omp_in' !PARSE-TREE: | OmpClauseList -> OmpClause -> Initializer -> OmpInitializerClause -> AssignmentStmt = 'omp_priv=0_4' diff --git a/flang/test/Parser/OpenMP/metadirective-dirspec.f90 b/flang/test/Parser/OpenMP/metadirective-dirspec.f90 index baf969317c18f..c373001be8963 100644 --- a/flang/test/Parser/OpenMP/metadirective-dirspec.f90 +++ b/flang/test/Parser/OpenMP/metadirective-dirspec.f90 @@ -123,11 +123,11 @@ subroutine f03 !PARSE-TREE: | | | OmpDirectiveName -> llvm::omp::Directive = declare reduction !PARSE-TREE: | | | OmpArgumentList -> OmpArgument -> OmpReductionSpecifier !PARSE-TREE: | | | | OmpReductionIdentifier -> DefinedOperator -> IntrinsicOperator = Add -!PARSE-TREE: | | | | OmpTypeNameList -> OmpTypeSpecifier -> TypeSpec -> DerivedTypeSpec +!PARSE-TREE: | | | | OmpTypeNameList -> OmpTypeName -> TypeSpec -> DerivedTypeSpec !PARSE-TREE: | | | | | Name = 'tt1' -!PARSE-TREE: | | | | OmpTypeSpecifier -> TypeSpec -> DerivedTypeSpec +!PARSE-TREE: | | | | OmpTypeName -> TypeSpec -> DerivedTypeSpec !PARSE-TREE: | | | | | Name = 'tt2' -!PARSE-TREE: | | | | OmpReductionCombiner -> AssignmentStmt = 'omp_out%x=omp_in%x+omp_out%x' +!PARSE-TREE: | | | | OmpCombinerExpression -> AssignmentStmt = 'omp_out%x=omp_in%x+omp_out%x' !PARSE-TREE: | | | | | | Designator -> DataRef -> StructureComponent !PARSE-TREE: | | | | | | | DataRef -> Name = 'omp_out' !PARSE-TREE: | | | | | | | Name = 'x' diff --git a/flang/test/Parser/OpenMP/openmp6-directive-spellings.f90 b/flang/test/Parser/OpenMP/openmp6-directive-spellings.f90 index f4cdd556bd4e5..39e8f059bbb24 100644 --- a/flang/test/Parser/OpenMP/openmp6-directive-spellings.f90 +++ b/flang/test/Parser/OpenMP/openmp6-directive-spellings.f90 @@ -86,9 +86,9 @@ subroutine f02 !PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = declare reduction !PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpReductionSpecifier !PARSE-TREE: | | OmpReductionIdentifier -> DefinedOperator -> IntrinsicOperator = Add -!PARSE-TREE: | | OmpTypeNameList -> OmpTypeSpecifier -> TypeSpec -> DerivedTypeSpec +!PARSE-TREE: | | OmpTypeNameList -> OmpTypeName -> TypeSpec -> DerivedTypeSpec !PARSE-TREE: | | | Name = 't' -!PARSE-TREE: | | OmpReductionCombiner -> AssignmentStmt = 'omp_out%x=omp_out%x+omp_in%x' +!PARSE-TREE: | | OmpCombinerExpression -> AssignmentStmt = 'omp_out%x=omp_out%x+omp_in%x' !PARSE-TREE: | | | Variable = 'omp_out%x' !PARSE-TREE: | | | | Designator -> DataRef -> StructureComponent !PARSE-TREE: | | | | | DataRef -> Name = 'omp_out' diff --git a/lld/test/wasm/lto/relocation-model.ll b/lld/test/wasm/lto/relocation-model.ll new file mode 100644 index 0000000000000..8fe198d0c64e6 --- /dev/null +++ b/lld/test/wasm/lto/relocation-model.ll @@ -0,0 +1,21 @@ +;; The explicit relocation model flag. + +; RUN: llvm-as %s -o %t.o + +; RUN: wasm-ld %t.o -o %t.wasm -save-temps -r -mllvm -relocation-model=pic +; RUN: llvm-readobj -r %t.wasm.lto.o | FileCheck %s --check-prefix=PIC + +; RUN: wasm-ld %t.o -o %t_static.wasm -save-temps -r -mllvm -relocation-model=static +; RUN: llvm-readobj -r %t_static.wasm.lto.o | FileCheck %s --check-prefix=STATIC + +; PIC: R_WASM_GLOBAL_INDEX_LEB foo +; STATIC: R_WASM_MEMORY_ADDR_LEB foo + +target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20" +target triple = "wasm32-unknown-unknown" + +@foo = external global i32 +define i32 @_start() { + %t = load i32, i32* @foo + ret i32 %t +} diff --git a/lld/wasm/LTO.cpp b/lld/wasm/LTO.cpp index 71f18aa25a35c..ae85f4693214b 100644 --- a/lld/wasm/LTO.cpp +++ b/lld/wasm/LTO.cpp @@ -57,7 +57,9 @@ static lto::Config createConfig() { c.DebugPassManager = ctx.arg.ltoDebugPassManager; c.AlwaysEmitRegularLTOObj = !ctx.arg.ltoObjPath.empty(); - if (ctx.arg.relocatable) + if (auto relocModel = getRelocModelFromCMModel()) + c.RelocModel = *relocModel; + else if (ctx.arg.relocatable) c.RelocModel = std::nullopt; else if (ctx.isPic) c.RelocModel = Reloc::PIC_; diff --git a/lldb/test/Shell/lldb-server/TestGdbserverErrorMessages.test b/lldb/test/Shell/lldb-server/TestErrorMessages.test similarity index 100% rename from lldb/test/Shell/lldb-server/TestGdbserverErrorMessages.test rename to lldb/test/Shell/lldb-server/TestErrorMessages.test diff --git a/lldb/test/Shell/lldb-server/TestPlatformErrorMessages.test b/lldb/test/Shell/lldb-server/TestPlatformErrorMessages.test deleted file mode 100644 index 7d3b37aa5fc39..0000000000000 --- a/lldb/test/Shell/lldb-server/TestPlatformErrorMessages.test +++ /dev/null @@ -1,25 +0,0 @@ -RUN: %platformserver 2>&1 | FileCheck --check-prefixes=NO_LISTEN,ALL %s -NO_LISTEN: error: either --listen or --child-platform-fd is required - -RUN: %lldb-server platform --listen 2>&1 | FileCheck --check-prefixes=LISTEN_MISSING,ALL %s -LISTEN_MISSING: error: --listen: missing argument - -RUN: %lldb-server p --bogus 2>&1 | FileCheck --check-prefixes=BOGUS,ALL %s -BOGUS: error: unknown argument '--bogus' - -RUN: %platformserver --gdbserver-port 2>&1 | FileCheck --check-prefixes=GDBPORT_MISSING,ALL %s -GDBPORT_MISSING: error: --gdbserver-port: missing argument - -RUN: %platformserver --gdbserver-port notanumber --listen :1234 2>&1 | FileCheck --check-prefixes=GDBPORT_INVALID %s -GDBPORT_INVALID: error: invalid --gdbserver-port value - -RUN: %platformserver --socket-file 2>&1 | FileCheck --check-prefixes=SOCKETFILE_MISSING,ALL %s -SOCKETFILE_MISSING: error: --socket-file: missing argument - -RUN: %platformserver --log-file 2>&1 | FileCheck --check-prefixes=LOGFILE_MISSING,ALL %s -LOGFILE_MISSING: error: --log-file: missing argument - -RUN: %platformserver --log-channels 2>&1 | FileCheck --check-prefixes=LOGCHANNELS_MISSING,ALL %s -LOGCHANNELS_MISSING: error: --log-channels: missing argument - -ALL: Use 'lldb-server{{(\.exe)?}} {{p|platform}} --help' for a complete list of options. diff --git a/lldb/test/Shell/lldb-server/TestPlatformHelp.test b/lldb/test/Shell/lldb-server/TestPlatformHelp.test deleted file mode 100644 index c5ced8a318100..0000000000000 --- a/lldb/test/Shell/lldb-server/TestPlatformHelp.test +++ /dev/null @@ -1,40 +0,0 @@ -RUN: %platformserver --help 2>&1 | FileCheck %s -RUN: %platformserver -h 2>&1 | FileCheck %s -RUN: %lldb-server p --help 2>&1 | FileCheck %s -RUN: %lldb-server p -h 2>&1 | FileCheck %s -RUN: %lldb-server platform --help 2>&1 | FileCheck %s -RUN: %lldb-server platform -h 2>&1 | FileCheck %s - -CHECK: OVERVIEW: lldb-server{{(\.exe)?}} platform - -CHECK: USAGE: lldb-server{{(\.exe)?}} {{p|platform}} [options] --listen <[host]:port> {{\[}}[--] program args...] - -CHECK: CONNECTION OPTIONS: -CHECK: --gdbserver-port -CHECK-SAME: Short form: -P -CHECK: --listen <[host]:port> -CHECK-SAME: Short form: -L -CHECK: --socket-file -CHECK-SAME: Short form: -f - -CHECK: GENERAL OPTIONS: -CHECK: --help -CHECK: --log-channels -CHECK: Short form: -c -CHECK: --log-file -CHECK-SAME: Short form: -l -CHECK: --server - -CHECK: OPTIONS: -CHECK: -- program args - -CHECK: DESCRIPTION -CHECK: Acts as a platform server for remote debugging - -CHECK: EXAMPLES -CHECK: # Listen on port 1234, exit after first connection -CHECK: lldb-server{{(\.exe)?}} platform --listen tcp://0.0.0.0:1234 -CHECK: # Listen on port 5555, accept multiple connections -CHECK: lldb-server{{(\.exe)?}} platform --server --listen tcp://localhost:5555 -CHECK: # Listen on Unix domain socket -CHECK: lldb-server{{(\.exe)?}} platform --listen unix:///tmp/lldb-server.sock diff --git a/lldb/test/Shell/lldb-server/TestPlatformSuccessfulStartup.test b/lldb/test/Shell/lldb-server/TestPlatformSuccessfulStartup.test deleted file mode 100644 index 88a2384c31c34..0000000000000 --- a/lldb/test/Shell/lldb-server/TestPlatformSuccessfulStartup.test +++ /dev/null @@ -1,35 +0,0 @@ -# Test successful startup with valid TCP listen address -# The socket file is created immediately when the server is ready to accept connections, -# so we can verify successful startup without arbitrary sleep delays. -RUN: rm -f %t.socket1 -RUN: timeout 0.2s %platformserver --listen tcp://127.0.0.1:0 --socket-file %t.socket1 > %t.out1 2>&1 || true -RUN: test -f %t.socket1 -RUN: FileCheck --allow-empty --check-prefix=NO-ERROR %s < %t.out1 - -# Test successful startup with valid gdbserver-port -RUN: rm -f %t.socket3 -RUN: timeout 0.2s %platformserver --listen tcp://127.0.0.1:0 --gdbserver-port 0 --socket-file %t.socket3 > %t.out3 2>&1 || true -RUN: test -f %t.socket3 -RUN: FileCheck --allow-empty --check-prefix=NO-ERROR %s < %t.out3 - -# Test successful startup with specific valid gdbserver-port number -RUN: rm -f %t.socket4 -RUN: timeout 0.2s %platformserver --listen tcp://127.0.0.1:0 --gdbserver-port 12345 --socket-file %t.socket4 > %t.out4 2>&1 || true -RUN: test -f %t.socket4 -RUN: FileCheck --allow-empty --check-prefix=NO-ERROR %s < %t.out4 - -# Test successful startup with server mode (accepting multiple connections) -RUN: rm -f %t.socket5 -RUN: timeout 0.2s %platformserver --server --listen tcp://127.0.0.1:0 --socket-file %t.socket5 > %t.out5 2>&1 || true -RUN: test -f %t.socket5 -RUN: FileCheck --allow-empty --check-prefix=NO-ERROR %s < %t.out5 - -# Test successful startup with abbreviated 'p' command -RUN: rm -f %t.socket6 -RUN: timeout 0.2s %lldb-server p --listen tcp://127.0.0.1:0 --socket-file %t.socket6 > %t.out6 2>&1 || true -RUN: test -f %t.socket6 -RUN: FileCheck --allow-empty --check-prefix=NO-ERROR %s < %t.out6 - -# Verify no error or warning messages appear in successful startup -NO-ERROR-NOT: error: -NO-ERROR-NOT: warning: diff --git a/lldb/tools/debugserver/source/MacOSX/arm64/sme_thread_status.h b/lldb/tools/debugserver/source/MacOSX/arm64/sme_thread_status.h index f33b3202ccab5..f258c5924867f 100644 --- a/lldb/tools/debugserver/source/MacOSX/arm64/sme_thread_status.h +++ b/lldb/tools/debugserver/source/MacOSX/arm64/sme_thread_status.h @@ -46,7 +46,7 @@ __attribute__((aligned(alignof(unsigned int)))); #define ARM_SME_ZA_STATE9 40 #define ARM_SME_ZA_STATE10 41 #define ARM_SME_ZA_STATE11 42 -#define ARM_SME_ZA_STATE12 42 +#define ARM_SME_ZA_STATE12 43 #define ARM_SME_ZA_STATE13 44 #define ARM_SME_ZA_STATE14 45 #define ARM_SME_ZA_STATE15 46 diff --git a/lldb/tools/lldb-server/CMakeLists.txt b/lldb/tools/lldb-server/CMakeLists.txt index fb55c64936121..1d8dc72a3f872 100644 --- a/lldb/tools/lldb-server/CMakeLists.txt +++ b/lldb/tools/lldb-server/CMakeLists.txt @@ -2,10 +2,6 @@ set(LLVM_TARGET_DEFINITIONS LLGSOptions.td) tablegen(LLVM LLGSOptions.inc -gen-opt-parser-defs) add_public_tablegen_target(LLGSOptionsTableGen) -set(LLVM_TARGET_DEFINITIONS PlatformOptions.td) -tablegen(LLVM PlatformOptions.inc -gen-opt-parser-defs) -add_public_tablegen_target(PlatformOptionsTableGen) - set(LLDB_PLUGINS) if(CMAKE_SYSTEM_NAME MATCHES "Linux|Android") @@ -71,7 +67,6 @@ add_lldb_tool(lldb-server add_dependencies(lldb-server LLGSOptionsTableGen - PlatformOptionsTableGen ${tablegen_deps} ) target_include_directories(lldb-server PRIVATE "${LLDB_SOURCE_DIR}/source") diff --git a/lldb/tools/lldb-server/PlatformOptions.td b/lldb/tools/lldb-server/PlatformOptions.td deleted file mode 100644 index eedd1d8c35343..0000000000000 --- a/lldb/tools/lldb-server/PlatformOptions.td +++ /dev/null @@ -1,75 +0,0 @@ -include "llvm/Option/OptParser.td" - -class F: Flag<["--", "-"], name>; -class R prefixes, string name> - : Option; - -multiclass SJ { - def NAME: Separate<["--", "-"], name>, - HelpText; - def NAME # _eq: Joined<["--", "-"], name # "=">, - Alias(NAME)>; -} - -def grp_connect : OptionGroup<"connection">, HelpText<"CONNECTION OPTIONS">; - -defm listen: SJ<"listen", "Host and port to listen on. Format: [host]:port or protocol://[host]:port (e.g., tcp://localhost:1234, unix:///path/to/socket). Short form: -L">, - MetaVarName<"<[host]:port>">, - Group; -def: Separate<["-"], "L">, Alias, - Group; - -defm socket_file: SJ<"socket-file", "Write listening socket information (port number for TCP or path for Unix domain sockets) to the specified file. Short form: -f">, - MetaVarName<"">, - Group; -def: Separate<["-"], "f">, Alias, - Group; - -defm gdbserver_port: SJ<"gdbserver-port", "Port to use for spawned gdbserver instances. If 0 or unspecified, a port will be chosen automatically. Short form: -P">, - MetaVarName<"">, - Group; -def: Separate<["-"], "P">, Alias, - Group; - -defm child_platform_fd: SJ<"child-platform-fd", "File descriptor for communication with parent platform process (internal use only).">, - MetaVarName<"">, - Group, - Flags<[HelpHidden]>; - -def grp_general : OptionGroup<"general options">, HelpText<"GENERAL OPTIONS">; - -def server: F<"server">, - HelpText<"Run in server mode, accepting multiple client connections sequentially. Without this flag, the server exits after handling the first connection.">, - Group; - -defm log_channels: SJ<"log-channels", "Channels to log. A colon-separated list of entries. Each entry starts with a channel followed by a space-separated list of categories. Common channels: lldb, gdb-remote, platform, process. Short form: -c">, - MetaVarName<"">, - Group; -def: Separate<["-"], "c">, Alias, - Group; - -defm log_file: SJ<"log-file", "Destination file to log to. If empty, log to stderr. Short form: -l">, - MetaVarName<"">, - Group; -def: Separate<["-"], "l">, Alias, - Group; - -def debug: F<"debug">, - HelpText<"(Unused, kept for backward compatibility)">, - Group, - Flags<[HelpHidden]>; - -def verbose: F<"verbose">, - HelpText<"(Unused, kept for backward compatibility)">, - Group, - Flags<[HelpHidden]>; - -def help: F<"help">, - HelpText<"Display this help message and exit.">, - Group; -def: Flag<["-"], "h">, Alias, - Group; - -def REM : R<["--"], "">, - HelpText<"Arguments to pass to launched gdbserver instances.">, - MetaVarName<"program args">; diff --git a/lldb/tools/lldb-server/lldb-platform.cpp b/lldb/tools/lldb-server/lldb-platform.cpp index 59b1eb419bc2b..0bd928507ba89 100644 --- a/lldb/tools/lldb-server/lldb-platform.cpp +++ b/lldb/tools/lldb-server/lldb-platform.cpp @@ -21,9 +21,6 @@ #include #include -#include "llvm/Option/ArgList.h" -#include "llvm/Option/OptTable.h" -#include "llvm/Option/Option.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/ScopedPrinter.h" #include "llvm/Support/WithColor.h" @@ -59,69 +56,22 @@ using namespace llvm; // of target CPUs. For now, let's just use 100. static const int backlog = 100; static const int socket_error = -1; - -namespace { -using namespace llvm::opt; - -enum ID { - OPT_INVALID = 0, // This is not an option ID. -#define OPTION(...) LLVM_MAKE_OPT_ID(__VA_ARGS__), -#include "PlatformOptions.inc" -#undef OPTION -}; - -#define OPTTABLE_STR_TABLE_CODE -#include "PlatformOptions.inc" -#undef OPTTABLE_STR_TABLE_CODE - -#define OPTTABLE_PREFIXES_TABLE_CODE -#include "PlatformOptions.inc" -#undef OPTTABLE_PREFIXES_TABLE_CODE - -static constexpr opt::OptTable::Info InfoTable[] = { -#define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__), -#include "PlatformOptions.inc" -#undef OPTION -}; - -class PlatformOptTable : public opt::GenericOptTable { -public: - PlatformOptTable() - : opt::GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {} - - void PrintHelp(llvm::StringRef Name) { - std::string Usage = - (Name + " [options] --listen <[host]:port> [[--] program args...]") - .str(); - - std::string Title = "lldb-server platform"; - - OptTable::printHelp(llvm::outs(), Usage.c_str(), Title.c_str()); - - llvm::outs() << R"( -DESCRIPTION - Acts as a platform server for remote debugging. When LLDB clients connect, - the platform server handles platform operations (file transfers, process - launching) and spawns debug server instances (lldb-server gdbserver) to - handle actual debugging sessions. - - By default, the server exits after handling one connection. Use --server - to keep running and accept multiple connections sequentially. - -EXAMPLES - # Listen on port 1234, exit after first connection - lldb-server platform --listen tcp://0.0.0.0:1234 - - # Listen on port 5555, accept multiple connections - lldb-server platform --server --listen tcp://localhost:5555 - - # Listen on Unix domain socket - lldb-server platform --listen unix:///tmp/lldb-server.sock - -)"; - } -}; -} // namespace +static int g_debug = 0; +static int g_verbose = 0; +static int g_server = 0; + +// option descriptors for getopt_long_only() +static struct option g_long_options[] = { + {"debug", no_argument, &g_debug, 1}, + {"verbose", no_argument, &g_verbose, 1}, + {"log-file", required_argument, nullptr, 'l'}, + {"log-channels", required_argument, nullptr, 'c'}, + {"listen", required_argument, nullptr, 'L'}, + {"gdbserver-port", required_argument, nullptr, 'P'}, + {"socket-file", required_argument, nullptr, 'f'}, + {"server", no_argument, &g_server, 1}, + {"child-platform-fd", required_argument, nullptr, 2}, + {nullptr, 0, nullptr, 0}}; #if defined(__APPLE__) #define LOW_PORT (IPPORT_RESERVED) @@ -147,11 +97,12 @@ static void signal_handler(int signo) { } #endif -static void display_usage(PlatformOptTable &Opts, const char *progname, - const char *subcommand) { - std::string Name = - (llvm::sys::path::filename(progname) + " " + subcommand).str(); - Opts.PrintHelp(Name); +static void display_usage(const char *progname, const char *subcommand) { + fprintf(stderr, "Usage:\n %s %s [--log-file log-file-name] [--log-channels " + "log-channel-list] [--port-file port-file-path] --server " + "--listen port\n", + progname, subcommand); + exit(0); } static Status parse_listen_host_port(Socket::SocketProtocol &protocol, @@ -310,8 +261,7 @@ static Status spawn_process(const char *progname, const FileSpec &prog, const Socket *conn_socket, uint16_t gdb_port, const lldb_private::Args &args, const std::string &log_file, - const StringRef log_channels, MainLoop &main_loop, - bool multi_client) { + const StringRef log_channels, MainLoop &main_loop) { Status error; SharedSocket shared_socket(conn_socket, error); if (error.Fail()) @@ -347,12 +297,9 @@ static Status spawn_process(const char *progname, const FileSpec &prog, launch_info.SetLaunchInSeparateProcessGroup(false); - // Set up process monitor callback based on whether we're in server mode. - if (multi_client) - // In server mode: empty callback (don't terminate when child exits). + if (g_server) launch_info.SetMonitorProcessCallback([](lldb::pid_t, int, int) {}); else - // In single-client mode: terminate main loop when child exits. launch_info.SetMonitorProcessCallback([&main_loop](lldb::pid_t, int, int) { main_loop.AddPendingCallback( [](MainLoopBase &loop) { loop.RequestTermination(); }); @@ -424,101 +371,107 @@ int main_platform(int argc, char *argv[]) { signal(SIGPIPE, SIG_IGN); signal(SIGHUP, signal_handler); #endif + int long_option_index = 0; + Status error; + std::string listen_host_port; + int ch; - // Special handling for 'help' as first argument. - if (argc > 0 && strcmp(argv[0], "help") == 0) { - PlatformOptTable Opts; - display_usage(Opts, progname, subcommand); - return EXIT_SUCCESS; - } + std::string log_file; + StringRef + log_channels; // e.g. "lldb process threads:gdb-remote default:linux all" - Status error; shared_fd_t fd = SharedSocket::kInvalidFD; + uint16_t gdbserver_port = 0; + FileSpec socket_file; + bool show_usage = false; + int option_error = 0; - PlatformOptTable Opts; - BumpPtrAllocator Alloc; - StringSaver Saver(Alloc); - bool HasError = false; + std::string short_options(OptionParser::GetShortOptionString(g_long_options)); - opt::InputArgList Args = - Opts.parseArgs(argc, argv, OPT_UNKNOWN, Saver, [&](llvm::StringRef Msg) { - WithColor::error() << Msg << "\n"; - HasError = true; - }); +#if __GLIBC__ + optind = 0; +#else + optreset = 1; + optind = 1; +#endif - std::string Name = - (llvm::sys::path::filename(progname) + " " + subcommand).str(); - std::string HelpText = - "Use '" + Name + " --help' for a complete list of options.\n"; + while ((ch = getopt_long_only(argc, argv, short_options.c_str(), + g_long_options, &long_option_index)) != -1) { + switch (ch) { + case 0: // Any optional that auto set themselves will return 0 + break; - if (HasError) { - llvm::errs() << HelpText; - return EXIT_FAILURE; - } + case 'L': + listen_host_port.append(optarg); + break; - if (Args.hasArg(OPT_help)) { - display_usage(Opts, progname, subcommand); - return EXIT_SUCCESS; - } + case 'l': // Set Log File + if (optarg && optarg[0]) + log_file.assign(optarg); + break; - // Parse arguments. - std::string listen_host_port = Args.getLastArgValue(OPT_listen).str(); - std::string log_file = Args.getLastArgValue(OPT_log_file).str(); - StringRef log_channels = Args.getLastArgValue(OPT_log_channels); - bool multi_client = Args.hasArg(OPT_server); - [[maybe_unused]] bool debug = Args.hasArg(OPT_debug); - [[maybe_unused]] bool verbose = Args.hasArg(OPT_verbose); - - if (Args.hasArg(OPT_socket_file)) { - socket_file.SetFile(Args.getLastArgValue(OPT_socket_file), - FileSpec::Style::native); - } + case 'c': // Log Channels + if (optarg && optarg[0]) + log_channels = StringRef(optarg); + break; - if (Args.hasArg(OPT_gdbserver_port)) { - if (!llvm::to_integer(Args.getLastArgValue(OPT_gdbserver_port), - gdbserver_port)) { - WithColor::error() << "invalid --gdbserver-port value\n"; - return EXIT_FAILURE; - } - } + case 'f': // Socket file + if (optarg && optarg[0]) + socket_file.SetFile(optarg, FileSpec::Style::native); + break; - if (Args.hasArg(OPT_child_platform_fd)) { - uint64_t _fd; - if (!llvm::to_integer(Args.getLastArgValue(OPT_child_platform_fd), _fd)) { - WithColor::error() << "invalid --child-platform-fd value\n"; - return EXIT_FAILURE; + case 'P': + case 'm': + case 'M': { + uint16_t portnum; + if (!llvm::to_integer(optarg, portnum)) { + WithColor::error() << "invalid port number string " << optarg << "\n"; + option_error = 2; + break; + } + // Note the condition gdbserver_port > HIGH_PORT is valid in case of using + // --child-platform-fd. Check gdbserver_port later. + if (ch == 'P') + gdbserver_port = portnum; + else if (gdbserver_port == 0) + gdbserver_port = portnum; + } break; + + case 2: { + uint64_t _fd; + if (!llvm::to_integer(optarg, _fd)) { + WithColor::error() << "invalid fd " << optarg << "\n"; + option_error = 6; + } else + fd = (shared_fd_t)_fd; + } break; + + case 'h': /* fall-through is intentional */ + case '?': + show_usage = true; + break; } - fd = (shared_fd_t)_fd; } if (!LLDBServerUtilities::SetupLogging(log_file, log_channels, 0)) return -1; // Print usage and exit if no listening port is specified. - if (listen_host_port.empty() && fd == SharedSocket::kInvalidFD) { - WithColor::error() << "either --listen or --child-platform-fd is required\n" - << HelpText; - return EXIT_FAILURE; - } + if (listen_host_port.empty() && fd == SharedSocket::kInvalidFD) + show_usage = true; - // Get remaining arguments for inferior. - std::vector Inputs; - for (opt::Arg *Arg : Args.filtered(OPT_INPUT)) - Inputs.push_back(Arg->getValue()); - if (opt::Arg *Arg = Args.getLastArg(OPT_REM)) { - for (const char *Val : Arg->getValues()) - Inputs.push_back(Val); + if (show_usage || option_error) { + display_usage(progname, subcommand); + exit(option_error); } + // Skip any options we consumed with getopt_long_only. + argc -= optind; + argv += optind; lldb_private::Args inferior_arguments; - if (!Inputs.empty()) { - std::vector args_ptrs; - for (const auto &Input : Inputs) - args_ptrs.push_back(Input.data()); - inferior_arguments.SetArguments(args_ptrs.size(), args_ptrs.data()); - } + inferior_arguments.SetArguments(argc, const_cast(argv)); FileSpec debugserver_path = GetDebugserverPath(); if (!debugserver_path) { @@ -561,7 +514,7 @@ int main_platform(int argc, char *argv[]) { platform.SetConnection( std::make_unique(std::move(socket))); client_handle(platform, inferior_arguments); - return EXIT_SUCCESS; + return 0; } if (gdbserver_port != 0 && @@ -569,7 +522,7 @@ int main_platform(int argc, char *argv[]) { WithColor::error() << llvm::formatv("Port number {0} is not in the " "valid user port range of {1} - {2}\n", gdbserver_port, LOW_PORT, HIGH_PORT); - return EXIT_FAILURE; + return 1; } Socket::SocketProtocol protocol = Socket::ProtocolUnixDomain; @@ -606,7 +559,7 @@ int main_platform(int argc, char *argv[]) { if (error.Fail()) { fprintf(stderr, "failed to write socket id to %s: %s\n", socket_file.GetPath().c_str(), error.AsCString()); - return EXIT_FAILURE; + return 1; } } @@ -624,22 +577,22 @@ int main_platform(int argc, char *argv[]) { llvm::Expected> platform_handles = platform_sock->Accept( main_loop, [progname, gdbserver_port, &inferior_arguments, log_file, - log_channels, &main_loop, multi_client, + log_channels, &main_loop, &platform_handles](std::unique_ptr sock_up) { printf("Connection established.\n"); Status error = spawn_process( progname, HostInfo::GetProgramFileSpec(), sock_up.get(), gdbserver_port, inferior_arguments, log_file, log_channels, - main_loop, multi_client); + main_loop); if (error.Fail()) { Log *log = GetLog(LLDBLog::Platform); LLDB_LOGF(log, "spawn_process failed: %s", error.AsCString()); WithColor::error() << "spawn_process failed: " << error.AsCString() << "\n"; - if (!multi_client) + if (!g_server) main_loop.RequestTermination(); } - if (!multi_client) + if (!g_server) platform_handles->clear(); }); if (!platform_handles) { @@ -663,5 +616,5 @@ int main_platform(int argc, char *argv[]) { fprintf(stderr, "lldb-server exiting...\n"); - return EXIT_SUCCESS; + return 0; } diff --git a/llvm/include/llvm/ADT/SmallVector.h b/llvm/include/llvm/ADT/SmallVector.h index ca0b918f56c46..51109d1f416f2 100644 --- a/llvm/include/llvm/ADT/SmallVector.h +++ b/llvm/include/llvm/ADT/SmallVector.h @@ -14,6 +14,7 @@ #ifndef LLVM_ADT_SMALLVECTOR_H #define LLVM_ADT_SMALLVECTOR_H +#include "llvm/ADT/ADL.h" #include "llvm/ADT/DenseMapInfo.h" #include "llvm/Support/Compiler.h" #include @@ -1295,28 +1296,27 @@ inline size_t capacity_in_bytes(const SmallVector &X) { template using ValueTypeFromRangeType = - std::remove_const_t()))>>; + std::remove_const_t>; /// Given a range of type R, iterate the entire range and return a /// SmallVector with elements of the vector. This is useful, for example, /// when you want to iterate a range and then sort the results. template SmallVector, Size> to_vector(R &&Range) { - return {std::begin(Range), std::end(Range)}; + return {adl_begin(Range), adl_end(Range)}; } template SmallVector> to_vector(R &&Range) { - return {std::begin(Range), std::end(Range)}; + return {adl_begin(Range), adl_end(Range)}; } template SmallVector to_vector_of(R &&Range) { - return {std::begin(Range), std::end(Range)}; + return {adl_begin(Range), adl_end(Range)}; } template SmallVector to_vector_of(R &&Range) { - return {std::begin(Range), std::end(Range)}; + return {adl_begin(Range), adl_end(Range)}; } // Explicit instantiations diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index e8b053d015917..73f5e9feeefd3 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1493,24 +1493,23 @@ def int_eh_sjlj_setup_dispatch : Intrinsic<[], []>; // def int_var_annotation : DefaultAttrsIntrinsic< [], [llvm_anyptr_ty, llvm_anyptr_ty, LLVMMatchType<1>, llvm_i32_ty, LLVMMatchType<1>], - [IntrInaccessibleMemOnly], "llvm.var.annotation">; + [IntrInaccessibleMemOnly]>; def int_ptr_annotation : DefaultAttrsIntrinsic< [llvm_anyptr_ty], [LLVMMatchType<0>, llvm_anyptr_ty, LLVMMatchType<1>, llvm_i32_ty, LLVMMatchType<1>], - [IntrInaccessibleMemOnly], "llvm.ptr.annotation">; + [IntrInaccessibleMemOnly]>; def int_annotation : DefaultAttrsIntrinsic< [llvm_anyint_ty], [LLVMMatchType<0>, llvm_anyptr_ty, LLVMMatchType<1>, llvm_i32_ty], - [IntrInaccessibleMemOnly], "llvm.annotation">; + [IntrInaccessibleMemOnly]>; // Annotates the current program point with metadata strings which are emitted // as CodeView debug info records. This is expensive, as it disables inlining // and is modelled as having side effects. def int_codeview_annotation : DefaultAttrsIntrinsic<[], [llvm_metadata_ty], - [IntrInaccessibleMemOnly, IntrNoDuplicate], - "llvm.codeview.annotation">; + [IntrInaccessibleMemOnly, IntrNoDuplicate]>; //===------------------------ Trampoline Intrinsics -----------------------===// // @@ -1887,8 +1886,7 @@ def int_clear_cache : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty], // Intrinsic to detect whether its argument is a constant. def int_is_constant : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty], - [IntrNoMem, IntrConvergent], - "llvm.is.constant">; + [IntrNoMem, IntrConvergent]>; // Introduce a use of the argument without generating any code. def int_fake_use : DefaultAttrsIntrinsic<[], [llvm_vararg_ty], diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td index 3af1750ffcf3f..c9df6c43fd396 100644 --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -456,7 +456,7 @@ class WMMA_REGS { - string intr = "llvm.nvvm.wmma." + string intr_name = "llvm.nvvm.wmma." # Frag.geom # "." # Op # "." # Frag.frag @@ -467,7 +467,7 @@ class WMMA_NAME_LDST { // TODO(tra): record name should ideally use the same field order as the intrinsic. // E.g. string record = !subst("llvm", "int", // !subst(".", "_", llvm)); - string record = "int_nvvm_wmma_" + string record_name = "int_nvvm_wmma_" # Frag.geom # "_" # Op # "_" # Frag.frag @@ -496,7 +496,7 @@ class MMA_SIGNATURE { class WMMA_NAME { string signature = MMA_SIGNATURE.ret; - string record = "int_nvvm_wmma_" + string record_name = "int_nvvm_wmma_" # A.geom # "_mma" # !subst(".", "_", b1op) @@ -510,7 +510,7 @@ class WMMA_NAME { string signature = MMA_SIGNATURE.ret; - string record = "int_nvvm_mma" + string record_name = "int_nvvm_mma" # !subst(".", "_", b1op) # "_" # A.geom # "_" # ALayout @@ -524,7 +524,7 @@ class MMA_SP_NAME { string signature = MMA_SIGNATURE.ret; - string record = "int_nvvm_mma" + string record_name = "int_nvvm_mma" # "_" # !subst("::", "_", Metadata) # "_" # A.geom # "_row_col" @@ -533,26 +533,37 @@ class MMA_SP_NAME { + string record_name = !subst(".", "_", + !subst("llvm.", "int_", name)); + // Use explicit intrinsic name if it has an _ in it, else rely on LLVM + // assigned default name. + string intr_name = !if(!ne(!find(name, "_"), -1), name, ""); +} + class LDMATRIX_NAME { - string intr = "llvm.nvvm.ldmatrix.sync.aligned" + defvar name = "llvm.nvvm.ldmatrix.sync.aligned" # "." # Frag.geom # "." # Frag.frag # !if(Trans, ".trans", "") # "." # Frag.ptx_elt_type ; - string record = !subst(".", "_", - !subst("llvm.", "int_", intr)); + string intr_name = IntrinsicName.intr_name; + string record_name = IntrinsicName.record_name; } class STMATRIX_NAME { - string intr = "llvm.nvvm.stmatrix.sync.aligned" + defvar name = "llvm.nvvm.stmatrix.sync.aligned" # "." # Frag.geom # "." # Frag.frag # !if(Trans, ".trans", "") # "." # Frag.ptx_elt_type ; - string record = !subst(".", "_", - !subst("llvm.", "int_", intr)); + string intr_name = IntrinsicName.intr_name; + string record_name = IntrinsicName.record_name; } // Generates list of 4-tuples of WMMA_REGS representing a valid MMA op. @@ -1042,45 +1053,49 @@ class NVVM_TCGEN05_MMA_BASE { class NVVM_TCGEN05_MMA: NVVM_TCGEN05_MMA_BASE { - string intr = "llvm.nvvm.tcgen05.mma" + string name = "llvm.nvvm.tcgen05.mma" # !if(!eq(Sp, 1), ".sp", "") # "." # Space # !if(!eq(ScaleInputD, 1), ".scale_d", "") # !if(!eq(AShift, 1), ".ashift", ""); - string record = !subst(".", "_", !subst("llvm.", "int_", intr)); + string intr_name = IntrinsicName.intr_name; + string record_name = IntrinsicName.record_name; } class NVVM_TCGEN05_MMA_BLOCKSCALE: NVVM_TCGEN05_MMA_BASE { - string intr = "llvm.nvvm.tcgen05.mma" + string name = "llvm.nvvm.tcgen05.mma" # !if(!eq(Sp, 1), ".sp", "") # "." # Space # "." # Kind # ".block_scale" # ScaleVecSize; - string record = !subst(".", "_", !subst("llvm.", "int_", intr)); + string intr_name = IntrinsicName.intr_name; + string record_name = IntrinsicName.record_name; } class NVVM_TCGEN05_MMA_WS: NVVM_TCGEN05_MMA_BASE { - string intr = "llvm.nvvm.tcgen05.mma.ws" + string name = "llvm.nvvm.tcgen05.mma.ws" # !if(!eq(Sp, 1), ".sp", "") # "." # Space # !if(!eq(ZeroColMask, 1), ".zero_col_mask", ""); - string record = !subst(".", "_", !subst("llvm.", "int_", intr)); + string intr_name = IntrinsicName.intr_name; + string record_name = IntrinsicName.record_name; } class NVVM_TCGEN05_MMA_DISABLE_OUTPUT_LANE: NVVM_TCGEN05_MMA_BASE { - string intr = "llvm.nvvm.tcgen05.mma" + string name = "llvm.nvvm.tcgen05.mma" # !if(!eq(Sp, 1), ".sp", "") # "." # Space # !if(!eq(ScaleInputD, 1), ".scale_d", "") # ".disable_output_lane.cg" # CtaGroup # !if(!eq(AShift, 1), ".ashift", ""); - string record = !subst(".", "_", !subst("llvm.", "int_", intr)); + string intr_name = IntrinsicName.intr_name; + string record_name = IntrinsicName.record_name; } class NVVM_TCGEN05_MMA_BLOCKSCALE_SUPPORTED { @@ -2273,7 +2288,7 @@ class NVVM_WMMA_LD : Intrinsic>, NoCapture>], - WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.intr>; + WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.intr_name>; // WMMA.STORE.D class NVVM_WMMA_ST @@ -2283,18 +2298,18 @@ class NVVM_WMMA_ST Frag.regs, !if(WithStride, [llvm_i32_ty], [])), [IntrWriteMem, IntrArgMemOnly, IntrNoCallback, WriteOnly>, NoCapture>], - WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.intr>; + WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.intr_name>; // Create all load/store variants foreach layout = ["row", "col"] in { foreach stride = [0, 1] in { foreach frag = NVVM_MMA_OPS.all_ld_ops in if NVVM_WMMA_LDST_SUPPORTED.ret then - def WMMA_NAME_LDST<"load", frag, layout, stride>.record + def WMMA_NAME_LDST<"load", frag, layout, stride>.record_name : NVVM_WMMA_LD; foreach frag = NVVM_MMA_OPS.all_st_ops in if NVVM_WMMA_LDST_SUPPORTED.ret then - def WMMA_NAME_LDST<"store", frag, layout, stride>.record + def WMMA_NAME_LDST<"store", frag, layout, stride>.record_name : NVVM_WMMA_ST; } } @@ -2313,7 +2328,7 @@ foreach layout_a = ["row", "col"] in { foreach b1op = NVVM_MMA_B1OPS.ret in { if NVVM_WMMA_SUPPORTED.ret then { def WMMA_NAME.record + op[0], op[1], op[2], op[3]>.record_name : NVVM_MMA; } } // b1op @@ -2330,7 +2345,7 @@ foreach layout_a = ["row", "col"] in { foreach b1op = NVVM_MMA_B1OPS.ret in { foreach kind = ["", "kind::f8f6f4"] in { if NVVM_MMA_SUPPORTED.ret then { - def MMA_NAME.record + def MMA_NAME.record_name : NVVM_MMA; } } // kind @@ -2379,7 +2394,7 @@ foreach metadata = ["sp", "sp::ordered_metadata"] in { foreach op = NVVM_MMA_OPS.all_mma_sp_ops in { if NVVM_MMA_SP_SUPPORTED.ret then { def MMA_SP_NAME.record + op[0], op[1], op[2], op[3]>.record_name : NVVM_MMA_SP; } } // op @@ -2392,12 +2407,12 @@ class NVVM_LDMATRIX : Intrinsic>, NoCapture>], - LDMATRIX_NAME.intr>; + LDMATRIX_NAME.intr_name>; foreach transposed = [0, 1] in { foreach frag = NVVM_MMA_OPS.all_ldmatrix_ops in { if NVVM_LDMATRIX_SUPPORTED.ret then { - def LDMATRIX_NAME.record + def LDMATRIX_NAME.record_name : NVVM_LDMATRIX; } } @@ -2409,12 +2424,12 @@ class NVVM_STMATRIX !listconcat([llvm_anyptr_ty], Frag.regs), [IntrWriteMem, IntrArgMemOnly, IntrNoCallback, WriteOnly>, NoCapture>], - STMATRIX_NAME.intr>; + STMATRIX_NAME.intr_name>; foreach transposed = [0, 1] in { foreach frag = NVVM_MMA_OPS.all_stmatrix_ops in { if NVVM_STMATRIX_SUPPORTED.ret then { - def STMATRIX_NAME.record + def STMATRIX_NAME.record_name : NVVM_STMATRIX; } } @@ -2767,14 +2782,15 @@ foreach cta_group = ["cg1", "cg2"] in { "64x128b_warpx2_02_13", "64x128b_warpx2_01_23", "32x128b_warpx4"] in { - defvar intr_suffix = StrJoin<"_", [shape, src_fmt, cta_group]>.ret; - defvar name_suffix = StrJoin<".", [shape, src_fmt, cta_group]>.ret; + defvar name = "llvm.nvvm.tcgen05.cp." # + StrJoin<".", [shape, src_fmt, cta_group]>.ret; - def int_nvvm_tcgen05_cp_ # intr_suffix : Intrinsic<[], + defvar intrinsic_name = IntrinsicName; + def intrinsic_name.record_name : Intrinsic<[], [llvm_tmem_ptr_ty, // tmem_addr llvm_i64_ty], // smem descriptor [IntrConvergent, IntrInaccessibleMemOrArgMemOnly, NoCapture>], - "llvm.nvvm.tcgen05.cp." # name_suffix>; + intrinsic_name.intr_name>; } } } @@ -2881,9 +2897,9 @@ foreach sp = [0, 1] in { ] ); - def mma.record: + def mma.record_name: DefaultAttrsIntrinsicFlags<[], args, flags, intrinsic_properties, - mma.intr>; + mma.intr_name>; } } } @@ -2918,8 +2934,8 @@ foreach sp = [0, 1] in { Range, 0, !if(!eq(ashift, 1), 2, 4)>] ); - def mma.record: DefaultAttrsIntrinsicFlags<[], args, flags, intrinsic_properties, - mma.intr>; + def mma.record_name : DefaultAttrsIntrinsicFlags<[], args, flags, + intrinsic_properties, mma.intr_name>; } // ashift } // scale_d } // cta_group @@ -2944,11 +2960,11 @@ foreach sp = [0, 1] in { defvar collector_usage = ArgIndex; if NVVM_TCGEN05_MMA_BLOCKSCALE_SUPPORTED.ret then { - def mma.record: DefaultAttrsIntrinsicFlags<[], args, flags, + def mma.record_name : DefaultAttrsIntrinsicFlags<[], args, flags, !listconcat(mma.common_intr_props, [Range, Range]), - mma.intr>; + mma.intr_name>; } } } @@ -2977,9 +2993,9 @@ foreach sp = [0, 1] in { Range, 0, 4>] ); - def mma.record: + def mma.record_name: DefaultAttrsIntrinsicFlags<[], args, flags, intrinsic_properties, - mma.intr>; + mma.intr_name>; } } } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 8ed4062e43946..1b559a628be08 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -514,8 +514,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, MVT::i64, Custom); setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); - setOperationAction({ISD::ABS, ISD::SMIN, ISD::UMIN, ISD::SMAX, ISD::UMAX}, - MVT::i32, Legal); + setOperationAction({ISD::SMIN, ISD::UMIN, ISD::SMAX, ISD::UMAX}, MVT::i32, + Legal); setOperationAction( {ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF, ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp index 09ef6ac7bcdf2..2aa54c920a046 100644 --- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -45,9 +45,6 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM, // Legalize loads and stores to the private address space. setOperationAction(ISD::LOAD, {MVT::i32, MVT::v2i32, MVT::v4i32}, Custom); - // 32-bit ABS is legal for AMDGPU except for R600 - setOperationAction(ISD::ABS, MVT::i32, Expand); - // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address // spaces, so it is custom lowered to handle those where it isn't. for (auto Op : {ISD::SEXTLOAD, ISD::ZEXTLOAD, ISD::EXTLOAD}) diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 3a7e8e8f857e6..69cc38c35314c 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -298,7 +298,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction(ISD::BR_CC, {MVT::i1, MVT::i32, MVT::i64, MVT::f32, MVT::f64}, Expand); - setOperationAction({ISD::UADDO, ISD::USUBO}, MVT::i32, Legal); + setOperationAction({ISD::ABS, ISD::UADDO, ISD::USUBO}, MVT::i32, Legal); setOperationAction({ISD::UADDO_CARRY, ISD::USUBO_CARRY}, MVT::i32, Legal); diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp index 8e35cadb75857..36b99087e0a32 100644 --- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp @@ -1640,8 +1640,7 @@ void ARMAsmPrinter::EmitKCFI_CHECK_Thumb2(Register AddrReg, int64_t Type, bool isLast = (i == 3); // Verify the immediate can be encoded as Thumb2 modified immediate. - int T2SOImmVal = ARM_AM::getT2SOImmVal(imm); - assert(T2SOImmVal != -1 && + assert(ARM_AM::getT2SOImmVal(imm) != -1 && "Cannot encode immediate as Thumb2 modified immediate"); // eor[s] scratch, scratch, #imm (last one sets flags with CPSR) diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index 22cf3a7eef2c1..598735f5972bc 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -4675,7 +4675,7 @@ class WMMA_INSTR _Args> // class WMMA_LOAD - : WMMA_INSTR.record, + : WMMA_INSTR.record_name, [!con((ins ADDR:$src), !if(WithStride, (ins B32:$ldm), (ins)))]>, Requires { @@ -4714,7 +4714,7 @@ class WMMA_LOAD // class WMMA_STORE_D - : WMMA_INSTR.record, + : WMMA_INSTR.record_name, [!con((ins ADDR:$dst), Frag.Ins, !if(WithStride, (ins B32:$ldm), (ins)))]>, @@ -4778,7 +4778,7 @@ class MMA_OP_PREDICATES { class WMMA_MMA - : WMMA_INSTR.record, + : WMMA_INSTR.record_name, [FragA.Ins, FragB.Ins, FragC.Ins]>, // Requires does not seem to have effect on Instruction w/o Patterns. // We set it here anyways and propagate to the Pat<> we construct below. @@ -4837,7 +4837,7 @@ defset list WMMAs = { class MMA - : WMMA_INSTR.record, + : WMMA_INSTR.record_name, [FragA.Ins, FragB.Ins, FragC.Ins]>, // Requires does not seem to have effect on Instruction w/o Patterns. // We set it here anyways and propagate to the Pat<> we construct below. @@ -4891,7 +4891,7 @@ class MMA_SP : WMMA_INSTR.record, + FragA, FragB, FragC, FragD>.record_name, [FragA.Ins, FragB.Ins, FragC.Ins, (ins B32:$metadata, i32imm:$selector)]>, // Requires does not seem to have effect on Instruction w/o Patterns. @@ -4946,7 +4946,7 @@ defset list MMA_SPs = { // ldmatrix.sync.aligned.m8n8[|.trans][|.shared].b16 // class LDMATRIX - : WMMA_INSTR.record, [(ins ADDR:$src)]>, + : WMMA_INSTR.record_name, [(ins ADDR:$src)]>, Requires { // Build PatFrag that only matches particular address space. PatFrag IntrFrag = PatFrag<(ops node:$src), (Intr node:$src), @@ -4981,7 +4981,7 @@ defset list LDMATRIXs = { // stmatrix.sync.aligned.m8n8[|.trans][|.shared].b16 // class STMATRIX - : WMMA_INSTR.record, [!con((ins ADDR:$dst), Frag.Ins)]>, + : WMMA_INSTR.record_name, [!con((ins ADDR:$dst), Frag.Ins)]>, Requires { // Build PatFrag that only matches particular address space. dag PFOperands = !con((ops node:$dst), @@ -5376,7 +5376,7 @@ class Tcgen05MMAInst { Intrinsic Intrin = !cast( - NVVM_TCGEN05_MMA.record + NVVM_TCGEN05_MMA.record_name ); dag ScaleInpIns = !if(!eq(ScaleInputD, 1), (ins i64imm:$scale_input_d), (ins)); @@ -5618,7 +5618,7 @@ class Tcgen05MMABlockScaleInst { Intrinsic Intrin = !cast( - NVVM_TCGEN05_MMA_BLOCKSCALE.record); + NVVM_TCGEN05_MMA_BLOCKSCALE.record_name); dag SparseMetadataIns = !if(!eq(Sp, 1), (ins B32:$spmetadata), (ins)); dag SparseMetadataIntr = !if(!eq(Sp, 1), (Intrin i32:$spmetadata), (Intrin)); @@ -5702,7 +5702,7 @@ class Tcgen05MMAWSInst { Intrinsic Intrin = !cast( - NVVM_TCGEN05_MMA_WS.record); + NVVM_TCGEN05_MMA_WS.record_name); dag ZeroColMaskIns = !if(!eq(HasZeroColMask, 1), (ins B64:$zero_col_mask), (ins)); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td index 4104abd3b0219..4c2f7f6424f63 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td @@ -482,7 +482,7 @@ let Predicates = [HasVendorXSfvfwmaccqqq] in { defm SF_VFWMACC_4x4x4 : VPseudoSiFiveVFWMACC; } -let Predicates = [HasVendorXSfvfnrclipxfqf] in { +let Predicates = [HasVendorXSfvfnrclipxfqf], AltFmtType = IS_NOT_ALTFMT in { defm SF_VFNRCLIP_XU_F_QF : VPseudoSiFiveVFNRCLIP; defm SF_VFNRCLIP_X_F_QF : VPseudoSiFiveVFNRCLIP; } diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp index 6909a282b3129..2dd0fde6b34d6 100644 --- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -948,17 +948,17 @@ void llvm::updateVCallVisibilityInIndex( // linker, as we have no information on their eventual use. if (DynamicExportSymbols.count(P.first)) continue; + // With validation enabled, we want to exclude symbols visible to regular + // objects. Local symbols will be in this group due to the current + // implementation but those with VCallVisibilityTranslationUnit will have + // already been marked in clang so are unaffected. + if (VisibleToRegularObjSymbols.count(P.first)) + continue; for (auto &S : P.second.getSummaryList()) { auto *GVar = dyn_cast(S.get()); if (!GVar || GVar->getVCallVisibility() != GlobalObject::VCallVisibilityPublic) continue; - // With validation enabled, we want to exclude symbols visible to regular - // objects. Local symbols will be in this group due to the current - // implementation but those with VCallVisibilityTranslationUnit will have - // already been marked in clang so are unaffected. - if (VisibleToRegularObjSymbols.count(P.first)) - continue; GVar->setVCallVisibility(GlobalObject::VCallVisibilityLinkageUnit); } } @@ -1408,9 +1408,8 @@ bool DevirtIndex::trySingleImplDevirt(MutableArrayRef TargetsForSlot, // If the summary list contains multiple summaries where at least one is // a local, give up, as we won't know which (possibly promoted) name to use. - for (const auto &S : TheFn.getSummaryList()) - if (GlobalValue::isLocalLinkage(S->linkage()) && Size > 1) - return false; + if (TheFn.hasLocal() && Size > 1) + return false; // Collect functions devirtualized at least for one call site for stats. if (PrintSummaryDevirt || AreStatisticsEnabled()) diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 9fab8b98b4af0..3e7b73a65fe07 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -270,6 +270,82 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind { ret <1 x i64> %ret } +define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind { +; CHECK-O3-LABEL: atomic_vec1_ptr: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: pushq %rax +; CHECK-O3-NEXT: movq %rdi, %rsi +; CHECK-O3-NEXT: movq %rsp, %rdx +; CHECK-O3-NEXT: movl $8, %edi +; CHECK-O3-NEXT: movl $2, %ecx +; CHECK-O3-NEXT: callq __atomic_load@PLT +; CHECK-O3-NEXT: movq (%rsp), %rax +; CHECK-O3-NEXT: popq %rcx +; CHECK-O3-NEXT: retq +; +; CHECK-SSE-O3-LABEL: atomic_vec1_ptr: +; CHECK-SSE-O3: # %bb.0: +; CHECK-SSE-O3-NEXT: pushq %rax +; CHECK-SSE-O3-NEXT: movq %rdi, %rsi +; CHECK-SSE-O3-NEXT: movq %rsp, %rdx +; CHECK-SSE-O3-NEXT: movl $8, %edi +; CHECK-SSE-O3-NEXT: movl $2, %ecx +; CHECK-SSE-O3-NEXT: callq __atomic_load@PLT +; CHECK-SSE-O3-NEXT: movq (%rsp), %rax +; CHECK-SSE-O3-NEXT: popq %rcx +; CHECK-SSE-O3-NEXT: retq +; +; CHECK-AVX-O3-LABEL: atomic_vec1_ptr: +; CHECK-AVX-O3: # %bb.0: +; CHECK-AVX-O3-NEXT: pushq %rax +; CHECK-AVX-O3-NEXT: movq %rdi, %rsi +; CHECK-AVX-O3-NEXT: movq %rsp, %rdx +; CHECK-AVX-O3-NEXT: movl $8, %edi +; CHECK-AVX-O3-NEXT: movl $2, %ecx +; CHECK-AVX-O3-NEXT: callq __atomic_load@PLT +; CHECK-AVX-O3-NEXT: movq (%rsp), %rax +; CHECK-AVX-O3-NEXT: popq %rcx +; CHECK-AVX-O3-NEXT: retq +; +; CHECK-O0-LABEL: atomic_vec1_ptr: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: pushq %rax +; CHECK-O0-NEXT: movq %rdi, %rsi +; CHECK-O0-NEXT: movl $8, %edi +; CHECK-O0-NEXT: movq %rsp, %rdx +; CHECK-O0-NEXT: movl $2, %ecx +; CHECK-O0-NEXT: callq __atomic_load@PLT +; CHECK-O0-NEXT: movq (%rsp), %rax +; CHECK-O0-NEXT: popq %rcx +; CHECK-O0-NEXT: retq +; +; CHECK-SSE-O0-LABEL: atomic_vec1_ptr: +; CHECK-SSE-O0: # %bb.0: +; CHECK-SSE-O0-NEXT: pushq %rax +; CHECK-SSE-O0-NEXT: movq %rdi, %rsi +; CHECK-SSE-O0-NEXT: movl $8, %edi +; CHECK-SSE-O0-NEXT: movq %rsp, %rdx +; CHECK-SSE-O0-NEXT: movl $2, %ecx +; CHECK-SSE-O0-NEXT: callq __atomic_load@PLT +; CHECK-SSE-O0-NEXT: movq (%rsp), %rax +; CHECK-SSE-O0-NEXT: popq %rcx +; CHECK-SSE-O0-NEXT: retq +; +; CHECK-AVX-O0-LABEL: atomic_vec1_ptr: +; CHECK-AVX-O0: # %bb.0: +; CHECK-AVX-O0-NEXT: pushq %rax +; CHECK-AVX-O0-NEXT: movq %rdi, %rsi +; CHECK-AVX-O0-NEXT: movl $8, %edi +; CHECK-AVX-O0-NEXT: movq %rsp, %rdx +; CHECK-AVX-O0-NEXT: movl $2, %ecx +; CHECK-AVX-O0-NEXT: callq __atomic_load@PLT +; CHECK-AVX-O0-NEXT: movq (%rsp), %rax +; CHECK-AVX-O0-NEXT: popq %rcx +; CHECK-AVX-O0-NEXT: retq + %ret = load atomic <1 x ptr>, ptr %x acquire, align 4 + ret <1 x ptr> %ret +} + define <1 x half> @atomic_vec1_half(ptr %x) { ; CHECK-O3-LABEL: atomic_vec1_half: ; CHECK-O3: # %bb.0: @@ -386,3 +462,515 @@ define <1 x double> @atomic_vec1_double_align(ptr %x) nounwind { %ret = load atomic <1 x double>, ptr %x acquire, align 8 ret <1 x double> %ret } + +define <1 x i64> @atomic_vec1_i64(ptr %x) nounwind { +; CHECK-O3-LABEL: atomic_vec1_i64: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: pushq %rax +; CHECK-O3-NEXT: movq %rdi, %rsi +; CHECK-O3-NEXT: movq %rsp, %rdx +; CHECK-O3-NEXT: movl $8, %edi +; CHECK-O3-NEXT: movl $2, %ecx +; CHECK-O3-NEXT: callq __atomic_load@PLT +; CHECK-O3-NEXT: movq (%rsp), %rax +; CHECK-O3-NEXT: popq %rcx +; CHECK-O3-NEXT: retq +; +; CHECK-SSE-O3-LABEL: atomic_vec1_i64: +; CHECK-SSE-O3: # %bb.0: +; CHECK-SSE-O3-NEXT: pushq %rax +; CHECK-SSE-O3-NEXT: movq %rdi, %rsi +; CHECK-SSE-O3-NEXT: movq %rsp, %rdx +; CHECK-SSE-O3-NEXT: movl $8, %edi +; CHECK-SSE-O3-NEXT: movl $2, %ecx +; CHECK-SSE-O3-NEXT: callq __atomic_load@PLT +; CHECK-SSE-O3-NEXT: movq (%rsp), %rax +; CHECK-SSE-O3-NEXT: popq %rcx +; CHECK-SSE-O3-NEXT: retq +; +; CHECK-AVX-O3-LABEL: atomic_vec1_i64: +; CHECK-AVX-O3: # %bb.0: +; CHECK-AVX-O3-NEXT: pushq %rax +; CHECK-AVX-O3-NEXT: movq %rdi, %rsi +; CHECK-AVX-O3-NEXT: movq %rsp, %rdx +; CHECK-AVX-O3-NEXT: movl $8, %edi +; CHECK-AVX-O3-NEXT: movl $2, %ecx +; CHECK-AVX-O3-NEXT: callq __atomic_load@PLT +; CHECK-AVX-O3-NEXT: movq (%rsp), %rax +; CHECK-AVX-O3-NEXT: popq %rcx +; CHECK-AVX-O3-NEXT: retq +; +; CHECK-O0-LABEL: atomic_vec1_i64: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: pushq %rax +; CHECK-O0-NEXT: movq %rdi, %rsi +; CHECK-O0-NEXT: movl $8, %edi +; CHECK-O0-NEXT: movq %rsp, %rdx +; CHECK-O0-NEXT: movl $2, %ecx +; CHECK-O0-NEXT: callq __atomic_load@PLT +; CHECK-O0-NEXT: movq (%rsp), %rax +; CHECK-O0-NEXT: popq %rcx +; CHECK-O0-NEXT: retq +; +; CHECK-SSE-O0-LABEL: atomic_vec1_i64: +; CHECK-SSE-O0: # %bb.0: +; CHECK-SSE-O0-NEXT: pushq %rax +; CHECK-SSE-O0-NEXT: movq %rdi, %rsi +; CHECK-SSE-O0-NEXT: movl $8, %edi +; CHECK-SSE-O0-NEXT: movq %rsp, %rdx +; CHECK-SSE-O0-NEXT: movl $2, %ecx +; CHECK-SSE-O0-NEXT: callq __atomic_load@PLT +; CHECK-SSE-O0-NEXT: movq (%rsp), %rax +; CHECK-SSE-O0-NEXT: popq %rcx +; CHECK-SSE-O0-NEXT: retq +; +; CHECK-AVX-O0-LABEL: atomic_vec1_i64: +; CHECK-AVX-O0: # %bb.0: +; CHECK-AVX-O0-NEXT: pushq %rax +; CHECK-AVX-O0-NEXT: movq %rdi, %rsi +; CHECK-AVX-O0-NEXT: movl $8, %edi +; CHECK-AVX-O0-NEXT: movq %rsp, %rdx +; CHECK-AVX-O0-NEXT: movl $2, %ecx +; CHECK-AVX-O0-NEXT: callq __atomic_load@PLT +; CHECK-AVX-O0-NEXT: movq (%rsp), %rax +; CHECK-AVX-O0-NEXT: popq %rcx +; CHECK-AVX-O0-NEXT: retq + %ret = load atomic <1 x i64>, ptr %x acquire, align 4 + ret <1 x i64> %ret +} + +define <1 x double> @atomic_vec1_double(ptr %x) nounwind { +; CHECK-O3-LABEL: atomic_vec1_double: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: pushq %rax +; CHECK-O3-NEXT: movq %rdi, %rsi +; CHECK-O3-NEXT: movq %rsp, %rdx +; CHECK-O3-NEXT: movl $8, %edi +; CHECK-O3-NEXT: movl $2, %ecx +; CHECK-O3-NEXT: callq __atomic_load@PLT +; CHECK-O3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-O3-NEXT: popq %rax +; CHECK-O3-NEXT: retq +; +; CHECK-SSE-O3-LABEL: atomic_vec1_double: +; CHECK-SSE-O3: # %bb.0: +; CHECK-SSE-O3-NEXT: pushq %rax +; CHECK-SSE-O3-NEXT: movq %rdi, %rsi +; CHECK-SSE-O3-NEXT: movq %rsp, %rdx +; CHECK-SSE-O3-NEXT: movl $8, %edi +; CHECK-SSE-O3-NEXT: movl $2, %ecx +; CHECK-SSE-O3-NEXT: callq __atomic_load@PLT +; CHECK-SSE-O3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-SSE-O3-NEXT: popq %rax +; CHECK-SSE-O3-NEXT: retq +; +; CHECK-AVX-O3-LABEL: atomic_vec1_double: +; CHECK-AVX-O3: # %bb.0: +; CHECK-AVX-O3-NEXT: pushq %rax +; CHECK-AVX-O3-NEXT: movq %rdi, %rsi +; CHECK-AVX-O3-NEXT: movq %rsp, %rdx +; CHECK-AVX-O3-NEXT: movl $8, %edi +; CHECK-AVX-O3-NEXT: movl $2, %ecx +; CHECK-AVX-O3-NEXT: callq __atomic_load@PLT +; CHECK-AVX-O3-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-AVX-O3-NEXT: popq %rax +; CHECK-AVX-O3-NEXT: retq +; +; CHECK-O0-LABEL: atomic_vec1_double: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: pushq %rax +; CHECK-O0-NEXT: movq %rdi, %rsi +; CHECK-O0-NEXT: movl $8, %edi +; CHECK-O0-NEXT: movq %rsp, %rdx +; CHECK-O0-NEXT: movl $2, %ecx +; CHECK-O0-NEXT: callq __atomic_load@PLT +; CHECK-O0-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-O0-NEXT: popq %rax +; CHECK-O0-NEXT: retq +; +; CHECK-SSE-O0-LABEL: atomic_vec1_double: +; CHECK-SSE-O0: # %bb.0: +; CHECK-SSE-O0-NEXT: pushq %rax +; CHECK-SSE-O0-NEXT: movq %rdi, %rsi +; CHECK-SSE-O0-NEXT: movl $8, %edi +; CHECK-SSE-O0-NEXT: movq %rsp, %rdx +; CHECK-SSE-O0-NEXT: movl $2, %ecx +; CHECK-SSE-O0-NEXT: callq __atomic_load@PLT +; CHECK-SSE-O0-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-SSE-O0-NEXT: popq %rax +; CHECK-SSE-O0-NEXT: retq +; +; CHECK-AVX-O0-LABEL: atomic_vec1_double: +; CHECK-AVX-O0: # %bb.0: +; CHECK-AVX-O0-NEXT: pushq %rax +; CHECK-AVX-O0-NEXT: movq %rdi, %rsi +; CHECK-AVX-O0-NEXT: movl $8, %edi +; CHECK-AVX-O0-NEXT: movq %rsp, %rdx +; CHECK-AVX-O0-NEXT: movl $2, %ecx +; CHECK-AVX-O0-NEXT: callq __atomic_load@PLT +; CHECK-AVX-O0-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-AVX-O0-NEXT: popq %rax +; CHECK-AVX-O0-NEXT: retq + %ret = load atomic <1 x double>, ptr %x acquire, align 4 + ret <1 x double> %ret +} + +define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind { +; CHECK-O3-LABEL: atomic_vec2_i32: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: pushq %rax +; CHECK-O3-NEXT: movq %rdi, %rsi +; CHECK-O3-NEXT: movq %rsp, %rdx +; CHECK-O3-NEXT: movl $8, %edi +; CHECK-O3-NEXT: movl $2, %ecx +; CHECK-O3-NEXT: callq __atomic_load@PLT +; CHECK-O3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-O3-NEXT: popq %rax +; CHECK-O3-NEXT: retq +; +; CHECK-SSE-O3-LABEL: atomic_vec2_i32: +; CHECK-SSE-O3: # %bb.0: +; CHECK-SSE-O3-NEXT: pushq %rax +; CHECK-SSE-O3-NEXT: movq %rdi, %rsi +; CHECK-SSE-O3-NEXT: movq %rsp, %rdx +; CHECK-SSE-O3-NEXT: movl $8, %edi +; CHECK-SSE-O3-NEXT: movl $2, %ecx +; CHECK-SSE-O3-NEXT: callq __atomic_load@PLT +; CHECK-SSE-O3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-SSE-O3-NEXT: popq %rax +; CHECK-SSE-O3-NEXT: retq +; +; CHECK-AVX-O3-LABEL: atomic_vec2_i32: +; CHECK-AVX-O3: # %bb.0: +; CHECK-AVX-O3-NEXT: pushq %rax +; CHECK-AVX-O3-NEXT: movq %rdi, %rsi +; CHECK-AVX-O3-NEXT: movq %rsp, %rdx +; CHECK-AVX-O3-NEXT: movl $8, %edi +; CHECK-AVX-O3-NEXT: movl $2, %ecx +; CHECK-AVX-O3-NEXT: callq __atomic_load@PLT +; CHECK-AVX-O3-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-AVX-O3-NEXT: popq %rax +; CHECK-AVX-O3-NEXT: retq +; +; CHECK-O0-LABEL: atomic_vec2_i32: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: pushq %rax +; CHECK-O0-NEXT: movq %rdi, %rsi +; CHECK-O0-NEXT: movl $8, %edi +; CHECK-O0-NEXT: movq %rsp, %rdx +; CHECK-O0-NEXT: movl $2, %ecx +; CHECK-O0-NEXT: callq __atomic_load@PLT +; CHECK-O0-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; CHECK-O0-NEXT: popq %rax +; CHECK-O0-NEXT: retq +; +; CHECK-SSE-O0-LABEL: atomic_vec2_i32: +; CHECK-SSE-O0: # %bb.0: +; CHECK-SSE-O0-NEXT: pushq %rax +; CHECK-SSE-O0-NEXT: movq %rdi, %rsi +; CHECK-SSE-O0-NEXT: movl $8, %edi +; CHECK-SSE-O0-NEXT: movq %rsp, %rdx +; CHECK-SSE-O0-NEXT: movl $2, %ecx +; CHECK-SSE-O0-NEXT: callq __atomic_load@PLT +; CHECK-SSE-O0-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; CHECK-SSE-O0-NEXT: popq %rax +; CHECK-SSE-O0-NEXT: retq +; +; CHECK-AVX-O0-LABEL: atomic_vec2_i32: +; CHECK-AVX-O0: # %bb.0: +; CHECK-AVX-O0-NEXT: pushq %rax +; CHECK-AVX-O0-NEXT: movq %rdi, %rsi +; CHECK-AVX-O0-NEXT: movl $8, %edi +; CHECK-AVX-O0-NEXT: movq %rsp, %rdx +; CHECK-AVX-O0-NEXT: movl $2, %ecx +; CHECK-AVX-O0-NEXT: callq __atomic_load@PLT +; CHECK-AVX-O0-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero +; CHECK-AVX-O0-NEXT: popq %rax +; CHECK-AVX-O0-NEXT: retq + %ret = load atomic <2 x i32>, ptr %x acquire, align 4 + ret <2 x i32> %ret +} + +define <4 x float> @atomic_vec4_float(ptr %x) nounwind { +; CHECK-O3-LABEL: atomic_vec4_float: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: subq $24, %rsp +; CHECK-O3-NEXT: movq %rdi, %rsi +; CHECK-O3-NEXT: movq %rsp, %rdx +; CHECK-O3-NEXT: movl $16, %edi +; CHECK-O3-NEXT: movl $2, %ecx +; CHECK-O3-NEXT: callq __atomic_load@PLT +; CHECK-O3-NEXT: movaps (%rsp), %xmm0 +; CHECK-O3-NEXT: addq $24, %rsp +; CHECK-O3-NEXT: retq +; +; CHECK-SSE-O3-LABEL: atomic_vec4_float: +; CHECK-SSE-O3: # %bb.0: +; CHECK-SSE-O3-NEXT: subq $24, %rsp +; CHECK-SSE-O3-NEXT: movq %rdi, %rsi +; CHECK-SSE-O3-NEXT: movq %rsp, %rdx +; CHECK-SSE-O3-NEXT: movl $16, %edi +; CHECK-SSE-O3-NEXT: movl $2, %ecx +; CHECK-SSE-O3-NEXT: callq __atomic_load@PLT +; CHECK-SSE-O3-NEXT: movaps (%rsp), %xmm0 +; CHECK-SSE-O3-NEXT: addq $24, %rsp +; CHECK-SSE-O3-NEXT: retq +; +; CHECK-AVX-O3-LABEL: atomic_vec4_float: +; CHECK-AVX-O3: # %bb.0: +; CHECK-AVX-O3-NEXT: subq $24, %rsp +; CHECK-AVX-O3-NEXT: movq %rdi, %rsi +; CHECK-AVX-O3-NEXT: movq %rsp, %rdx +; CHECK-AVX-O3-NEXT: movl $16, %edi +; CHECK-AVX-O3-NEXT: movl $2, %ecx +; CHECK-AVX-O3-NEXT: callq __atomic_load@PLT +; CHECK-AVX-O3-NEXT: vmovaps (%rsp), %xmm0 +; CHECK-AVX-O3-NEXT: addq $24, %rsp +; CHECK-AVX-O3-NEXT: retq +; +; CHECK-O0-LABEL: atomic_vec4_float: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: subq $24, %rsp +; CHECK-O0-NEXT: movq %rdi, %rsi +; CHECK-O0-NEXT: movl $16, %edi +; CHECK-O0-NEXT: movq %rsp, %rdx +; CHECK-O0-NEXT: movl $2, %ecx +; CHECK-O0-NEXT: callq __atomic_load@PLT +; CHECK-O0-NEXT: movaps (%rsp), %xmm0 +; CHECK-O0-NEXT: addq $24, %rsp +; CHECK-O0-NEXT: retq +; +; CHECK-SSE-O0-LABEL: atomic_vec4_float: +; CHECK-SSE-O0: # %bb.0: +; CHECK-SSE-O0-NEXT: subq $24, %rsp +; CHECK-SSE-O0-NEXT: movq %rdi, %rsi +; CHECK-SSE-O0-NEXT: movl $16, %edi +; CHECK-SSE-O0-NEXT: movq %rsp, %rdx +; CHECK-SSE-O0-NEXT: movl $2, %ecx +; CHECK-SSE-O0-NEXT: callq __atomic_load@PLT +; CHECK-SSE-O0-NEXT: movaps (%rsp), %xmm0 +; CHECK-SSE-O0-NEXT: addq $24, %rsp +; CHECK-SSE-O0-NEXT: retq +; +; CHECK-AVX-O0-LABEL: atomic_vec4_float: +; CHECK-AVX-O0: # %bb.0: +; CHECK-AVX-O0-NEXT: subq $24, %rsp +; CHECK-AVX-O0-NEXT: movq %rdi, %rsi +; CHECK-AVX-O0-NEXT: movl $16, %edi +; CHECK-AVX-O0-NEXT: movq %rsp, %rdx +; CHECK-AVX-O0-NEXT: movl $2, %ecx +; CHECK-AVX-O0-NEXT: callq __atomic_load@PLT +; CHECK-AVX-O0-NEXT: vmovaps (%rsp), %xmm0 +; CHECK-AVX-O0-NEXT: addq $24, %rsp +; CHECK-AVX-O0-NEXT: retq + %ret = load atomic <4 x float>, ptr %x acquire, align 4 + ret <4 x float> %ret +} + +define <8 x double> @atomic_vec8_double(ptr %x) nounwind { +; CHECK-O3-LABEL: atomic_vec8_double: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: subq $72, %rsp +; CHECK-O3-NEXT: movq %rdi, %rsi +; CHECK-O3-NEXT: movq %rsp, %rdx +; CHECK-O3-NEXT: movl $64, %edi +; CHECK-O3-NEXT: movl $2, %ecx +; CHECK-O3-NEXT: callq __atomic_load@PLT +; CHECK-O3-NEXT: movaps (%rsp), %xmm0 +; CHECK-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 +; CHECK-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2 +; CHECK-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3 +; CHECK-O3-NEXT: addq $72, %rsp +; CHECK-O3-NEXT: retq +; +; CHECK-SSE-O3-LABEL: atomic_vec8_double: +; CHECK-SSE-O3: # %bb.0: +; CHECK-SSE-O3-NEXT: subq $72, %rsp +; CHECK-SSE-O3-NEXT: movq %rdi, %rsi +; CHECK-SSE-O3-NEXT: movq %rsp, %rdx +; CHECK-SSE-O3-NEXT: movl $64, %edi +; CHECK-SSE-O3-NEXT: movl $2, %ecx +; CHECK-SSE-O3-NEXT: callq __atomic_load@PLT +; CHECK-SSE-O3-NEXT: movaps (%rsp), %xmm0 +; CHECK-SSE-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 +; CHECK-SSE-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2 +; CHECK-SSE-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3 +; CHECK-SSE-O3-NEXT: addq $72, %rsp +; CHECK-SSE-O3-NEXT: retq +; +; CHECK-O0-LABEL: atomic_vec8_double: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: subq $72, %rsp +; CHECK-O0-NEXT: movq %rdi, %rsi +; CHECK-O0-NEXT: movl $64, %edi +; CHECK-O0-NEXT: movq %rsp, %rdx +; CHECK-O0-NEXT: movl $2, %ecx +; CHECK-O0-NEXT: callq __atomic_load@PLT +; CHECK-O0-NEXT: movapd (%rsp), %xmm0 +; CHECK-O0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm1 +; CHECK-O0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm2 +; CHECK-O0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm3 +; CHECK-O0-NEXT: addq $72, %rsp +; CHECK-O0-NEXT: retq +; +; CHECK-SSE-O0-LABEL: atomic_vec8_double: +; CHECK-SSE-O0: # %bb.0: +; CHECK-SSE-O0-NEXT: subq $72, %rsp +; CHECK-SSE-O0-NEXT: movq %rdi, %rsi +; CHECK-SSE-O0-NEXT: movl $64, %edi +; CHECK-SSE-O0-NEXT: movq %rsp, %rdx +; CHECK-SSE-O0-NEXT: movl $2, %ecx +; CHECK-SSE-O0-NEXT: callq __atomic_load@PLT +; CHECK-SSE-O0-NEXT: movapd (%rsp), %xmm0 +; CHECK-SSE-O0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm1 +; CHECK-SSE-O0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm2 +; CHECK-SSE-O0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm3 +; CHECK-SSE-O0-NEXT: addq $72, %rsp +; CHECK-SSE-O0-NEXT: retq + %ret = load atomic <8 x double>, ptr %x acquire, align 4 + ret <8 x double> %ret +} + +define <16 x bfloat> @atomic_vec16_bfloat(ptr %x) nounwind { +; CHECK-O3-LABEL: atomic_vec16_bfloat: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: subq $40, %rsp +; CHECK-O3-NEXT: movq %rdi, %rsi +; CHECK-O3-NEXT: movq %rsp, %rdx +; CHECK-O3-NEXT: movl $32, %edi +; CHECK-O3-NEXT: movl $2, %ecx +; CHECK-O3-NEXT: callq __atomic_load@PLT +; CHECK-O3-NEXT: movaps (%rsp), %xmm0 +; CHECK-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 +; CHECK-O3-NEXT: addq $40, %rsp +; CHECK-O3-NEXT: retq +; +; CHECK-SSE-O3-LABEL: atomic_vec16_bfloat: +; CHECK-SSE-O3: # %bb.0: +; CHECK-SSE-O3-NEXT: subq $40, %rsp +; CHECK-SSE-O3-NEXT: movq %rdi, %rsi +; CHECK-SSE-O3-NEXT: movq %rsp, %rdx +; CHECK-SSE-O3-NEXT: movl $32, %edi +; CHECK-SSE-O3-NEXT: movl $2, %ecx +; CHECK-SSE-O3-NEXT: callq __atomic_load@PLT +; CHECK-SSE-O3-NEXT: movaps (%rsp), %xmm0 +; CHECK-SSE-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 +; CHECK-SSE-O3-NEXT: addq $40, %rsp +; CHECK-SSE-O3-NEXT: retq +; +; CHECK-AVX-O3-LABEL: atomic_vec16_bfloat: +; CHECK-AVX-O3: # %bb.0: +; CHECK-AVX-O3-NEXT: subq $40, %rsp +; CHECK-AVX-O3-NEXT: movq %rdi, %rsi +; CHECK-AVX-O3-NEXT: movq %rsp, %rdx +; CHECK-AVX-O3-NEXT: movl $32, %edi +; CHECK-AVX-O3-NEXT: movl $2, %ecx +; CHECK-AVX-O3-NEXT: callq __atomic_load@PLT +; CHECK-AVX-O3-NEXT: vmovups (%rsp), %ymm0 +; CHECK-AVX-O3-NEXT: addq $40, %rsp +; CHECK-AVX-O3-NEXT: retq +; +; CHECK-O0-LABEL: atomic_vec16_bfloat: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: subq $40, %rsp +; CHECK-O0-NEXT: movq %rdi, %rsi +; CHECK-O0-NEXT: movl $32, %edi +; CHECK-O0-NEXT: movq %rsp, %rdx +; CHECK-O0-NEXT: movl $2, %ecx +; CHECK-O0-NEXT: callq __atomic_load@PLT +; CHECK-O0-NEXT: movaps (%rsp), %xmm0 +; CHECK-O0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 +; CHECK-O0-NEXT: addq $40, %rsp +; CHECK-O0-NEXT: retq +; +; CHECK-SSE-O0-LABEL: atomic_vec16_bfloat: +; CHECK-SSE-O0: # %bb.0: +; CHECK-SSE-O0-NEXT: subq $40, %rsp +; CHECK-SSE-O0-NEXT: movq %rdi, %rsi +; CHECK-SSE-O0-NEXT: movl $32, %edi +; CHECK-SSE-O0-NEXT: movq %rsp, %rdx +; CHECK-SSE-O0-NEXT: movl $2, %ecx +; CHECK-SSE-O0-NEXT: callq __atomic_load@PLT +; CHECK-SSE-O0-NEXT: movaps (%rsp), %xmm0 +; CHECK-SSE-O0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 +; CHECK-SSE-O0-NEXT: addq $40, %rsp +; CHECK-SSE-O0-NEXT: retq +; +; CHECK-AVX-O0-LABEL: atomic_vec16_bfloat: +; CHECK-AVX-O0: # %bb.0: +; CHECK-AVX-O0-NEXT: subq $40, %rsp +; CHECK-AVX-O0-NEXT: movq %rdi, %rsi +; CHECK-AVX-O0-NEXT: movl $32, %edi +; CHECK-AVX-O0-NEXT: movq %rsp, %rdx +; CHECK-AVX-O0-NEXT: movl $2, %ecx +; CHECK-AVX-O0-NEXT: callq __atomic_load@PLT +; CHECK-AVX-O0-NEXT: vmovups (%rsp), %ymm0 +; CHECK-AVX-O0-NEXT: addq $40, %rsp +; CHECK-AVX-O0-NEXT: retq + %ret = load atomic <16 x bfloat>, ptr %x acquire, align 4 + ret <16 x bfloat> %ret +} + +define <32 x half> @atomic_vec32_half(ptr %x) nounwind { +; CHECK-O3-LABEL: atomic_vec32_half: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: subq $72, %rsp +; CHECK-O3-NEXT: movq %rdi, %rsi +; CHECK-O3-NEXT: movq %rsp, %rdx +; CHECK-O3-NEXT: movl $64, %edi +; CHECK-O3-NEXT: movl $2, %ecx +; CHECK-O3-NEXT: callq __atomic_load@PLT +; CHECK-O3-NEXT: movaps (%rsp), %xmm0 +; CHECK-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 +; CHECK-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2 +; CHECK-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3 +; CHECK-O3-NEXT: addq $72, %rsp +; CHECK-O3-NEXT: retq +; +; CHECK-SSE-O3-LABEL: atomic_vec32_half: +; CHECK-SSE-O3: # %bb.0: +; CHECK-SSE-O3-NEXT: subq $72, %rsp +; CHECK-SSE-O3-NEXT: movq %rdi, %rsi +; CHECK-SSE-O3-NEXT: movq %rsp, %rdx +; CHECK-SSE-O3-NEXT: movl $64, %edi +; CHECK-SSE-O3-NEXT: movl $2, %ecx +; CHECK-SSE-O3-NEXT: callq __atomic_load@PLT +; CHECK-SSE-O3-NEXT: movaps (%rsp), %xmm0 +; CHECK-SSE-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 +; CHECK-SSE-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2 +; CHECK-SSE-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3 +; CHECK-SSE-O3-NEXT: addq $72, %rsp +; CHECK-SSE-O3-NEXT: retq +; +; CHECK-O0-LABEL: atomic_vec32_half: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: subq $72, %rsp +; CHECK-O0-NEXT: movq %rdi, %rsi +; CHECK-O0-NEXT: movl $64, %edi +; CHECK-O0-NEXT: movq %rsp, %rdx +; CHECK-O0-NEXT: movl $2, %ecx +; CHECK-O0-NEXT: callq __atomic_load@PLT +; CHECK-O0-NEXT: movaps (%rsp), %xmm0 +; CHECK-O0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 +; CHECK-O0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2 +; CHECK-O0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3 +; CHECK-O0-NEXT: addq $72, %rsp +; CHECK-O0-NEXT: retq +; +; CHECK-SSE-O0-LABEL: atomic_vec32_half: +; CHECK-SSE-O0: # %bb.0: +; CHECK-SSE-O0-NEXT: subq $72, %rsp +; CHECK-SSE-O0-NEXT: movq %rdi, %rsi +; CHECK-SSE-O0-NEXT: movl $64, %edi +; CHECK-SSE-O0-NEXT: movq %rsp, %rdx +; CHECK-SSE-O0-NEXT: movl $2, %ecx +; CHECK-SSE-O0-NEXT: callq __atomic_load@PLT +; CHECK-SSE-O0-NEXT: movaps (%rsp), %xmm0 +; CHECK-SSE-O0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 +; CHECK-SSE-O0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2 +; CHECK-SSE-O0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3 +; CHECK-SSE-O0-NEXT: addq $72, %rsp +; CHECK-SSE-O0-NEXT: retq + %ret = load atomic <32 x half>, ptr %x acquire, align 4 + ret <32 x half> %ret +} diff --git a/llvm/test/TableGen/intrinsic-manual-name.td b/llvm/test/TableGen/intrinsic-manual-name.td new file mode 100644 index 0000000000000..5751fc2874b97 --- /dev/null +++ b/llvm/test/TableGen/intrinsic-manual-name.td @@ -0,0 +1,6 @@ +// RUN: llvm-tblgen -gen-intrinsic-impl -I %p/../../include %s -DTEST_INTRINSICS_SUPPRESS_DEFS 2>&1 | FileCheck %s -DFILE=%s + +include "llvm/IR/Intrinsics.td" + +// CHECK: [[FILE]]:[[@LINE+1]]:5: note: Explicitly specified name matches default name, consider dropping it +def int_foo0 : Intrinsic<[llvm_anyint_ty], [], [], "llvm.foo0">; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-multi-block.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-multi-block.ll new file mode 100644 index 0000000000000..99e2be95d8f36 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-multi-block.ll @@ -0,0 +1,260 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6 +; RUN: opt -p loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -S %s | FileCheck --check-prefixes=VF2IC1 %s +; RUN: opt -p loop-vectorize -force-vector-width=2 -force-vector-interleave=2 -S %s | FileCheck --check-prefixes=VF2IC2 %s + +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32" +target triple = "arm64-apple-macosx" + +define void @load_store_interleave_group_block_invar_cond(ptr noalias %data, ptr noalias %dst.0, ptr noalias %dst.1, i1 %c) { +; VF2IC1-LABEL: define void @load_store_interleave_group_block_invar_cond( +; VF2IC1-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[DST_0:%.*]], ptr noalias [[DST_1:%.*]], i1 [[C:%.*]]) { +; VF2IC1-NEXT: [[ENTRY:.*:]] +; VF2IC1-NEXT: br label %[[VECTOR_PH:.*]] +; VF2IC1: [[VECTOR_PH]]: +; VF2IC1-NEXT: br label %[[VECTOR_BODY:.*]] +; VF2IC1: [[VECTOR_BODY]]: +; VF2IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ] +; VF2IC1-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF2IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP0]] +; VF2IC1-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP1]], align 8 +; VF2IC1-NEXT: store <2 x i64> [[WIDE_LOAD]], ptr [[TMP1]], align 8 +; VF2IC1-NEXT: br i1 [[C]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] +; VF2IC1: [[PRED_STORE_IF]]: +; VF2IC1-NEXT: store i8 1, ptr [[DST_0]], align 1 +; VF2IC1-NEXT: br label %[[PRED_STORE_CONTINUE]] +; VF2IC1: [[PRED_STORE_CONTINUE]]: +; VF2IC1-NEXT: br i1 [[C]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]] +; VF2IC1: [[PRED_STORE_IF1]]: +; VF2IC1-NEXT: store i8 1, ptr [[DST_0]], align 1 +; VF2IC1-NEXT: br label %[[PRED_STORE_CONTINUE2]] +; VF2IC1: [[PRED_STORE_CONTINUE2]]: +; VF2IC1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[DST_1]], i64 [[INDEX]] +; VF2IC1-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP2]], align 1 +; VF2IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 1 +; VF2IC1-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF2IC1-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; VF2IC1: [[MIDDLE_BLOCK]]: +; VF2IC1-NEXT: br label %[[EXIT:.*]] +; VF2IC1: [[EXIT]]: +; VF2IC1-NEXT: ret void +; +; VF2IC2-LABEL: define void @load_store_interleave_group_block_invar_cond( +; VF2IC2-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[DST_0:%.*]], ptr noalias [[DST_1:%.*]], i1 [[C:%.*]]) { +; VF2IC2-NEXT: [[ENTRY:.*:]] +; VF2IC2-NEXT: br label %[[VECTOR_PH:.*]] +; VF2IC2: [[VECTOR_PH]]: +; VF2IC2-NEXT: br label %[[VECTOR_BODY:.*]] +; VF2IC2: [[VECTOR_BODY]]: +; VF2IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE7:.*]] ] +; VF2IC2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 1 +; VF2IC2-NEXT: [[TMP1:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF2IC2-NEXT: [[TMP2:%.*]] = shl nsw i64 [[TMP0]], 1 +; VF2IC2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP1]] +; VF2IC2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP2]] +; VF2IC2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP3]], align 8 +; VF2IC2-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8 +; VF2IC2-NEXT: store <2 x i64> [[WIDE_LOAD]], ptr [[TMP3]], align 8 +; VF2IC2-NEXT: store <2 x i64> [[WIDE_LOAD1]], ptr [[TMP4]], align 8 +; VF2IC2-NEXT: br i1 [[C]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] +; VF2IC2: [[PRED_STORE_IF]]: +; VF2IC2-NEXT: store i8 1, ptr [[DST_0]], align 1 +; VF2IC2-NEXT: br label %[[PRED_STORE_CONTINUE]] +; VF2IC2: [[PRED_STORE_CONTINUE]]: +; VF2IC2-NEXT: br i1 [[C]], label %[[PRED_STORE_IF2:.*]], label %[[PRED_STORE_CONTINUE3:.*]] +; VF2IC2: [[PRED_STORE_IF2]]: +; VF2IC2-NEXT: store i8 1, ptr [[DST_0]], align 1 +; VF2IC2-NEXT: br label %[[PRED_STORE_CONTINUE3]] +; VF2IC2: [[PRED_STORE_CONTINUE3]]: +; VF2IC2-NEXT: br i1 [[C]], label %[[PRED_STORE_IF4:.*]], label %[[PRED_STORE_CONTINUE5:.*]] +; VF2IC2: [[PRED_STORE_IF4]]: +; VF2IC2-NEXT: store i8 1, ptr [[DST_0]], align 1 +; VF2IC2-NEXT: br label %[[PRED_STORE_CONTINUE5]] +; VF2IC2: [[PRED_STORE_CONTINUE5]]: +; VF2IC2-NEXT: br i1 [[C]], label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7]] +; VF2IC2: [[PRED_STORE_IF6]]: +; VF2IC2-NEXT: store i8 1, ptr [[DST_0]], align 1 +; VF2IC2-NEXT: br label %[[PRED_STORE_CONTINUE7]] +; VF2IC2: [[PRED_STORE_CONTINUE7]]: +; VF2IC2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[DST_1]], i64 [[INDEX]] +; VF2IC2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 2 +; VF2IC2-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP5]], align 1 +; VF2IC2-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP6]], align 1 +; VF2IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VF2IC2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF2IC2-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; VF2IC2: [[MIDDLE_BLOCK]]: +; VF2IC2-NEXT: br label %[[EXIT:.*]] +; VF2IC2: [[EXIT]]: +; VF2IC2-NEXT: ret void +; +entry: + br label %loop.header + +loop.header: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] + %mul.2 = shl nsw i64 %iv, 1 + %data.0 = getelementptr inbounds i64, ptr %data, i64 %mul.2 + %l.0 = load i64, ptr %data.0, align 8 + store i64 %l.0, ptr %data.0, align 8 + %add.1 = or disjoint i64 %mul.2, 1 + %data.1 = getelementptr inbounds i64, ptr %data, i64 %add.1 + %l.1 = load i64, ptr %data.1, align 8 + store i64 %l.1, ptr %data.1, align 8 + br i1 %c, label %then, label %loop.latch + +then: + store i8 1, ptr %dst.0 + br label %loop.latch + +loop.latch: + %gep.dst.1 = getelementptr inbounds i8, ptr %dst.1, i64 %iv + store i8 0, ptr %gep.dst.1 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 100 + br i1 %ec, label %exit, label %loop.header + +exit: + ret void +} + +define void @load_store_interleave_group_block_var_cond(ptr noalias %data, ptr %masks, ptr noalias %dst) { +; VF2IC1-LABEL: define void @load_store_interleave_group_block_var_cond( +; VF2IC1-SAME: ptr noalias [[DATA:%.*]], ptr [[MASKS:%.*]], ptr noalias [[DST:%.*]]) { +; VF2IC1-NEXT: [[ENTRY:.*:]] +; VF2IC1-NEXT: br label %[[VECTOR_PH:.*]] +; VF2IC1: [[VECTOR_PH]]: +; VF2IC1-NEXT: br label %[[VECTOR_BODY:.*]] +; VF2IC1: [[VECTOR_BODY]]: +; VF2IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE3:.*]] ] +; VF2IC1-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF2IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP0]] +; VF2IC1-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP1]], align 8 +; VF2IC1-NEXT: store <2 x i64> [[WIDE_LOAD]], ptr [[TMP1]], align 8 +; VF2IC1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[MASKS]], i64 [[INDEX]] +; VF2IC1-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i8>, ptr [[TMP2]], align 1 +; VF2IC1-NEXT: [[TMP3:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD1]], zeroinitializer +; VF2IC1-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 +; VF2IC1-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] +; VF2IC1: [[PRED_STORE_IF]]: +; VF2IC1-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 +; VF2IC1-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[MASKS]], i64 [[TMP5]] +; VF2IC1-NEXT: store i8 1, ptr [[TMP6]], align 1 +; VF2IC1-NEXT: br label %[[PRED_STORE_CONTINUE]] +; VF2IC1: [[PRED_STORE_CONTINUE]]: +; VF2IC1-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1 +; VF2IC1-NEXT: br i1 [[TMP7]], label %[[PRED_STORE_IF2:.*]], label %[[PRED_STORE_CONTINUE3]] +; VF2IC1: [[PRED_STORE_IF2]]: +; VF2IC1-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 1 +; VF2IC1-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[MASKS]], i64 [[TMP8]] +; VF2IC1-NEXT: store i8 1, ptr [[TMP9]], align 1 +; VF2IC1-NEXT: br label %[[PRED_STORE_CONTINUE3]] +; VF2IC1: [[PRED_STORE_CONTINUE3]]: +; VF2IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 1 +; VF2IC1-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF2IC1-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; VF2IC1: [[MIDDLE_BLOCK]]: +; VF2IC1-NEXT: br label %[[EXIT:.*]] +; VF2IC1: [[EXIT]]: +; VF2IC1-NEXT: ret void +; +; VF2IC2-LABEL: define void @load_store_interleave_group_block_var_cond( +; VF2IC2-SAME: ptr noalias [[DATA:%.*]], ptr [[MASKS:%.*]], ptr noalias [[DST:%.*]]) { +; VF2IC2-NEXT: [[ENTRY:.*:]] +; VF2IC2-NEXT: br label %[[VECTOR_PH:.*]] +; VF2IC2: [[VECTOR_PH]]: +; VF2IC2-NEXT: br label %[[VECTOR_BODY:.*]] +; VF2IC2: [[VECTOR_BODY]]: +; VF2IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE12:.*]] ] +; VF2IC2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 2 +; VF2IC2-NEXT: [[TMP1:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF2IC2-NEXT: [[TMP2:%.*]] = shl nsw i64 [[TMP0]], 1 +; VF2IC2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP1]] +; VF2IC2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP2]] +; VF2IC2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8 +; VF2IC2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> +; VF2IC2-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> +; VF2IC2-NEXT: [[WIDE_VEC2:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 +; VF2IC2-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <4 x i64> [[WIDE_VEC2]], <4 x i64> poison, <2 x i32> +; VF2IC2-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <4 x i64> [[WIDE_VEC2]], <4 x i64> poison, <2 x i32> +; VF2IC2-NEXT: [[TMP5:%.*]] = shufflevector <2 x i64> [[STRIDED_VEC]], <2 x i64> [[STRIDED_VEC1]], <4 x i32> +; VF2IC2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP5]], <4 x i64> poison, <4 x i32> +; VF2IC2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP3]], align 8 +; VF2IC2-NEXT: [[TMP6:%.*]] = shufflevector <2 x i64> [[STRIDED_VEC3]], <2 x i64> [[STRIDED_VEC4]], <4 x i32> +; VF2IC2-NEXT: [[INTERLEAVED_VEC5:%.*]] = shufflevector <4 x i64> [[TMP6]], <4 x i64> poison, <4 x i32> +; VF2IC2-NEXT: store <4 x i64> [[INTERLEAVED_VEC5]], ptr [[TMP4]], align 8 +; VF2IC2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[MASKS]], i64 [[INDEX]] +; VF2IC2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i32 2 +; VF2IC2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[TMP7]], align 1 +; VF2IC2-NEXT: [[WIDE_LOAD6:%.*]] = load <2 x i8>, ptr [[TMP8]], align 1 +; VF2IC2-NEXT: [[TMP9:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD]], zeroinitializer +; VF2IC2-NEXT: [[TMP10:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD6]], zeroinitializer +; VF2IC2-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP9]], i32 0 +; VF2IC2-NEXT: br i1 [[TMP11]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] +; VF2IC2: [[PRED_STORE_IF]]: +; VF2IC2-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 0 +; VF2IC2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[MASKS]], i64 [[TMP12]] +; VF2IC2-NEXT: store i8 1, ptr [[TMP13]], align 1 +; VF2IC2-NEXT: br label %[[PRED_STORE_CONTINUE]] +; VF2IC2: [[PRED_STORE_CONTINUE]]: +; VF2IC2-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP9]], i32 1 +; VF2IC2-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]] +; VF2IC2: [[PRED_STORE_IF7]]: +; VF2IC2-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 1 +; VF2IC2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[MASKS]], i64 [[TMP15]] +; VF2IC2-NEXT: store i8 1, ptr [[TMP16]], align 1 +; VF2IC2-NEXT: br label %[[PRED_STORE_CONTINUE8]] +; VF2IC2: [[PRED_STORE_CONTINUE8]]: +; VF2IC2-NEXT: [[TMP17:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0 +; VF2IC2-NEXT: br i1 [[TMP17]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]] +; VF2IC2: [[PRED_STORE_IF9]]: +; VF2IC2-NEXT: [[TMP18:%.*]] = add i64 [[INDEX]], 2 +; VF2IC2-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[MASKS]], i64 [[TMP18]] +; VF2IC2-NEXT: store i8 1, ptr [[TMP19]], align 1 +; VF2IC2-NEXT: br label %[[PRED_STORE_CONTINUE10]] +; VF2IC2: [[PRED_STORE_CONTINUE10]]: +; VF2IC2-NEXT: [[TMP20:%.*]] = extractelement <2 x i1> [[TMP10]], i32 1 +; VF2IC2-NEXT: br i1 [[TMP20]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12]] +; VF2IC2: [[PRED_STORE_IF11]]: +; VF2IC2-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 3 +; VF2IC2-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[MASKS]], i64 [[TMP21]] +; VF2IC2-NEXT: store i8 1, ptr [[TMP22]], align 1 +; VF2IC2-NEXT: br label %[[PRED_STORE_CONTINUE12]] +; VF2IC2: [[PRED_STORE_CONTINUE12]]: +; VF2IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; VF2IC2-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF2IC2-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; VF2IC2: [[MIDDLE_BLOCK]]: +; VF2IC2-NEXT: br label %[[EXIT:.*]] +; VF2IC2: [[EXIT]]: +; VF2IC2-NEXT: ret void +; +entry: + br label %loop.header + +loop.header: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] + %mul.2 = shl nsw i64 %iv, 1 + %data.0 = getelementptr inbounds i64, ptr %data, i64 %mul.2 + %l.0 = load i64, ptr %data.0, align 8 + store i64 %l.0, ptr %data.0, align 8 + %add.1 = or disjoint i64 %mul.2, 1 + %data.1 = getelementptr inbounds i64, ptr %data, i64 %add.1 + %l.1 = load i64, ptr %data.1, align 8 + store i64 %l.1, ptr %data.1, align 8 + %gep.mask = getelementptr inbounds i8, ptr %masks, i64 %iv + %l.mask = load i8, ptr %gep.mask + %c = icmp eq i8 %l.mask, 0 + br i1 %c, label %then, label %loop.latch + +then: + store i8 1, ptr %gep.mask + br label %loop.latch + +loop.latch: + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 100 + br i1 %ec, label %exit, label %loop.header + +exit: + ret void +} diff --git a/llvm/unittests/ADT/SmallVectorTest.cpp b/llvm/unittests/ADT/SmallVectorTest.cpp index b216359ffd31c..1a01f30e8dd35 100644 --- a/llvm/unittests/ADT/SmallVectorTest.cpp +++ b/llvm/unittests/ADT/SmallVectorTest.cpp @@ -13,6 +13,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/Support/Compiler.h" +#include "gmock/gmock.h" #include "gtest/gtest.h" #include #include @@ -1156,6 +1157,17 @@ TEST(SmallVectorTest, InitializerList) { EXPECT_TRUE(ArrayRef(V2).equals({4, 5, 3, 2})); } +namespace namespace_with_adl { +struct MyVector { + std::vector data; +}; + +std::vector::const_iterator begin(const MyVector &V) { + return V.data.begin(); +} +std::vector::const_iterator end(const MyVector &V) { return V.data.end(); } +} // namespace namespace_with_adl + TEST(SmallVectorTest, ToVector) { { std::vector v = {'a', 'b', 'c'}; @@ -1173,6 +1185,15 @@ TEST(SmallVectorTest, ToVector) { for (size_t I = 0; I < v.size(); ++I) EXPECT_EQ(v[I], Vector[I]); } + { + // Check that to_vector and to_vector_of work with types that require ADL + // for being/end iterators. + namespace_with_adl::MyVector V = {{1, 2, 3}}; + auto IntVector = to_vector(V); + EXPECT_THAT(IntVector, testing::ElementsAre(1, 2, 3)); + IntVector = to_vector<3>(V); + EXPECT_THAT(IntVector, testing::ElementsAre(1, 2, 3)); + } } struct To { @@ -1231,6 +1252,15 @@ TEST(SmallVectorTest, ToVectorOf) { for (size_t I = 0; I < StdVector.size(); ++I) EXPECT_EQ(StdVector[I], Vector[I]); } + { + // Check that to_vector works with types that require ADL for being/end + // iterators. + namespace_with_adl::MyVector V = {{1, 2, 3}}; + auto UnsignedVector = to_vector_of(V); + EXPECT_THAT(UnsignedVector, testing::ElementsAre(1u, 2u, 3u)); + UnsignedVector = to_vector_of(V); + EXPECT_THAT(UnsignedVector, testing::ElementsAre(1u, 2u, 3u)); + } } template diff --git a/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp b/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp index be7537c83da3a..cd866469792a2 100644 --- a/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp +++ b/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp @@ -278,15 +278,21 @@ CodeGenIntrinsic::CodeGenIntrinsic(const Record *R, TargetPrefix = R->getValueAsString("TargetPrefix"); Name = R->getValueAsString("LLVMName").str(); + std::string DefaultName = "llvm." + EnumName.str(); + llvm::replace(DefaultName, '_', '.'); + if (Name == "") { // If an explicit name isn't specified, derive one from the DefName. - Name = "llvm." + EnumName.str(); - llvm::replace(Name, '_', '.'); + Name = std::move(DefaultName); } else { // Verify it starts with "llvm.". if (!StringRef(Name).starts_with("llvm.")) PrintFatalError(DefLoc, "Intrinsic '" + DefName + "'s name does not start with 'llvm.'!"); + + if (Name == DefaultName) + PrintNote(DefLoc, "Explicitly specified name matches default name, " + "consider dropping it"); } // If TargetPrefix is specified, make sure that Name starts with diff --git a/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn index 39dacf7586233..444670212cafb 100644 --- a/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn @@ -122,6 +122,7 @@ static_library("CodeGen") { "MLRegAllocPriorityAdvisor.cpp", "MachineBasicBlock.cpp", "MachineBlockFrequencyInfo.cpp", + "MachineBlockHashInfo.cpp", "MachineBlockPlacement.cpp", "MachineBranchProbabilityInfo.cpp", "MachineCFGPrinter.cpp", diff --git a/llvm/utils/mlgo-utils/IR2Vec/generateTriplets.py b/llvm/utils/mlgo-utils/IR2Vec/generateTriplets.py index 80ac4c61c7871..dba9e2c137586 100644 --- a/llvm/utils/mlgo-utils/IR2Vec/generateTriplets.py +++ b/llvm/utils/mlgo-utils/IR2Vec/generateTriplets.py @@ -1,14 +1,19 @@ # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. # See https://llvm.org/LICENSE.txt for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -"""IR2Vec Triplet Generator +"""IR2Vec/MIR2Vec Triplet Generator -Generates IR2Vec triplets by applying random optimization levels to LLVM IR files -and extracting triplets using llvm-ir2vec. Automatically generates preprocessed -files: entity2id.txt, relation2id.txt, and train2id.txt. +Generates IR2Vec or MIR2Vec triplets by applying random optimization levels to +LLVM IR files (or processing MIR files) and extracting triplets using llvm-ir2vec. +Automatically generates preprocessed files (entity2id.txt, relation2id.txt, and +train2id.txt) necessary for training IR2Vec or MIR2Vec vocabularies. Usage: - python generateTriplets.py + For LLVM IR: + python generateTriplets.py + + For Machine IR: + python generateTriplets.py --mode=mir """ import argparse @@ -41,7 +46,7 @@ def __init__(self, triplets: Set[str], max_relation: int): class IR2VecTripletGenerator: - """Main class for generating IR2Vec triplets""" + """Main class for generating IR2Vec or MIR2Vec triplets""" def __init__( self, @@ -49,11 +54,13 @@ def __init__( num_optimizations: int, output_dir: Path, max_workers: int = DEFAULT_MAX_WORKERS, + mode: str = "llvm", ): self.llvm_build_dir = llvm_build_dir self.num_optimizations = num_optimizations self.output_dir = output_dir self.max_workers = max_workers + self.mode = mode # "llvm" or "mir" # Tool paths self.opt_binary = os.path.join(llvm_build_dir, "bin", "opt") @@ -85,7 +92,11 @@ def _validate_setup(self): f"llvm-ir2vec binary not found or not executable: {self.ir2vec_binary}" ) - if not (1 <= self.num_optimizations <= len(OPT_LEVELS)): + if self.mode not in ["llvm", "mir"]: + raise ValueError(f"Mode must be 'llvm' or 'mir', got: {self.mode}") + + # For LLVM IR mode, validate optimization count + if self.mode == "llvm" and not (1 <= self.num_optimizations <= len(OPT_LEVELS)): raise ValueError( f"Number of optimizations must be between 1-{len(OPT_LEVELS)}" ) @@ -95,19 +106,28 @@ def _select_optimization_levels(self) -> List[str]: return random.sample(OPT_LEVELS, self.num_optimizations) def _process_single_file(self, input_file: Path) -> TripletResult: - """Process a single LLVM IR file with multiple optimization levels""" + """Process a single LLVM IR or MIR file""" all_triplets = set() max_relation = 1 - opt_levels = self._select_optimization_levels() - for opt_level in opt_levels: - triplets, file_max_relation = self._run_pipeline(input_file, opt_level) + if self.mode == "mir": + # For MIR files, process directly without optimization + triplets, file_max_relation = self._run_mir_pipeline(input_file) if triplets: all_triplets.update(triplets) max_relation = max(max_relation, file_max_relation) - logger.debug( - f"Generated {len(triplets)} triplets for {input_file} with {opt_level}" - ) + logger.debug(f"Generated {len(triplets)} triplets for {input_file}") + else: + # For LLVM IR files, apply multiple optimization levels + opt_levels = self._select_optimization_levels() + for opt_level in opt_levels: + triplets, file_max_relation = self._run_pipeline(input_file, opt_level) + if triplets: + all_triplets.update(triplets) + max_relation = max(max_relation, file_max_relation) + logger.debug( + f"Generated {len(triplets)} triplets for {input_file} with {opt_level}" + ) return TripletResult(all_triplets, max_relation) @@ -124,7 +144,7 @@ def _run_pipeline(self, input_file: Path, opt_level: str) -> Tuple[Set[str], int # Run llvm-ir2vec with opt's output as input ir2vec_proc = subprocess.Popen( - [self.ir2vec_binary, "triplets", "-", "-o", "-"], + [self.ir2vec_binary, "triplets", "--mode=llvm", "-", "-o", "-"], stdin=opt_proc.stdout, stdout=subprocess.PIPE, stderr=subprocess.PIPE, @@ -143,6 +163,32 @@ def _run_pipeline(self, input_file: Path, opt_level: str) -> Tuple[Set[str], int except (subprocess.SubprocessError, OSError): return set(), 1 + def _run_mir_pipeline(self, input_file: Path) -> Tuple[Set[str], int]: + """Run llvm-ir2vec pipeline for MIR files.""" + try: + # Run llvm-ir2vec directly on MIR file + result = subprocess.run( + [ + self.ir2vec_binary, + "triplets", + "--mode=mir", + str(input_file), + "-o", + "-", + ], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + check=False, + ) + + if result.returncode != 0: + return set(), 1 + + return self._parse_triplet_output(result.stdout) + except (subprocess.SubprocessError, OSError): + return set(), 1 + def _parse_triplet_output(self, output: str) -> Tuple[Set[str], int]: """Parse triplet output and extract max relation""" if not output.strip(): @@ -160,12 +206,21 @@ def _parse_triplet_output(self, output: str) -> Tuple[Set[str], int]: return set(lines), max_relation def generate_triplets(self, file_list: Path) -> None: - """Main method to generate triplets from a list of LLVM IR files""" + """Main method to generate triplets from a list of LLVM IR or MIR files""" + # Store file_list_path for later use in entity generation + self.file_list_path = file_list + input_files = self._read_file_list(file_list) - logger.info( - f"Processing {len(input_files)} files with {self.num_optimizations} " - f"optimization levels using {self.max_workers} workers" - ) + + if self.mode == "mir": + logger.info( + f"Processing {len(input_files)} MIR files using {self.max_workers} workers" + ) + else: + logger.info( + f"Processing {len(input_files)} files with {self.num_optimizations} " + f"optimization levels using {self.max_workers} workers" + ) all_triplets = set() global_max_relation = 1 @@ -222,28 +277,60 @@ def _generate_output_files(self, all_triplets: Set[str], max_relation: int) -> N def _generate_entity2id(self, output_file: Path) -> None: """Generate entity2id.txt using llvm-ir2vec""" - subprocess.run( - [str(self.ir2vec_binary), "entities", "-o", str(output_file)], - check=True, - capture_output=True, - ) + if self.mode == "mir": + # For MIR mode, we need to provide a sample MIR file to determine target + # Use the first file from the processed list + input_files = self._read_file_list(self.file_list_path) + if not input_files: + raise ValueError("No input files available for entity generation") + + subprocess.run( + [ + str(self.ir2vec_binary), + "entities", + "--mode=mir", + str(input_files[0]), + "-o", + str(output_file), + ], + check=True, + capture_output=True, + ) + else: + subprocess.run( + [ + str(self.ir2vec_binary), + "entities", + "--mode=llvm", + "-o", + str(output_file), + ], + check=True, + capture_output=True, + ) def _generate_relation2id(self, output_file: Path, max_relation: int) -> None: """Generate relation2id.txt from max relation""" - max_relation = max(max_relation, 1) # At least Type and Next relations + max_relation = max(max_relation, 1) # At least Next relation num_relations = max_relation + 1 with open(output_file, "w") as f: f.write(f"{num_relations}\n") - f.write("Type\t0\n") - f.write("Next\t1\n") - f.writelines(f"Arg{i-2}\t{i}\n" for i in range(2, num_relations)) + if self.mode == "llvm": + # LLVM IR has Type relation at 0 + f.write("Type\t0\n") + f.write("Next\t1\n") + f.writelines(f"Arg{i-2}\t{i}\n" for i in range(2, num_relations)) + else: + # MIR doesn't have Type relation, starts with Next at 0 + f.write("Next\t0\n") + f.writelines(f"Arg{i-1}\t{i}\n" for i in range(1, num_relations)) def main(): """Main entry point""" parser = argparse.ArgumentParser( - description="Generate IR2Vec triplets from LLVM IR files", + description="Generate IR2Vec or MIR2Vec triplets from LLVM IR or Machine IR files", formatter_class=argparse.RawDescriptionHelpFormatter, ) @@ -253,16 +340,25 @@ def main(): parser.add_argument( "num_optimizations", type=int, - help="Number of optimization levels to apply (1-6)", + nargs="?", + default=1, + help="Number of optimization levels to apply (1-6) for LLVM IR mode", ) parser.add_argument( - "ll_file_list", + "input_file_list", type=Path, - help="File containing list of LLVM IR files to process", + help="File containing list of LLVM IR or MIR files to process", ) parser.add_argument( "output_dir", type=Path, help="Output directory for generated files" ) + parser.add_argument( + "--mode", + type=str, + choices=["llvm", "mir"], + default="llvm", + help="Operation mode: 'llvm' for LLVM IR (default) or 'mir' for Machine IR", + ) parser.add_argument( "-j", "--max-workers", @@ -296,8 +392,9 @@ def main(): args.num_optimizations, args.output_dir, args.max_workers, + args.mode, ) - generator.generate_triplets(args.ll_file_list) + generator.generate_triplets(args.input_file_list) if __name__ == "__main__": diff --git a/mlir/include/mlir/Dialect/Affine/Passes.h b/mlir/include/mlir/Dialect/Affine/Passes.h index 2f70f24dd3ef2..ec349ec48e33b 100644 --- a/mlir/include/mlir/Dialect/Affine/Passes.h +++ b/mlir/include/mlir/Dialect/Affine/Passes.h @@ -106,7 +106,6 @@ std::unique_ptr> createLoopTilingPass(); /// all) or the default unroll factor is used (LoopUnroll:kDefaultUnrollFactor). std::unique_ptr> createLoopUnrollPass( int unrollFactor = -1, bool unrollUpToFactor = false, - bool unrollFull = false, const std::function &getUnrollFactor = nullptr); /// Creates a loop unroll jam pass to unroll jam by the specified factor. A diff --git a/mlir/include/mlir/Dialect/Affine/Passes.td b/mlir/include/mlir/Dialect/Affine/Passes.td index 6ad45b828f657..bb6b41c0bba35 100644 --- a/mlir/include/mlir/Dialect/Affine/Passes.td +++ b/mlir/include/mlir/Dialect/Affine/Passes.td @@ -203,12 +203,10 @@ def AffineLoopUnroll : InterfacePass<"affine-loop-unroll", "FunctionOpInterface" let summary = "Unroll affine loops"; let constructor = "mlir::affine::createLoopUnrollPass()"; let options = [ - Option<"unrollFactor", "unroll-factor", "unsigned", /*default=*/"4", + Option<"unrollFactor", "unroll-factor", "int64_t", /*default=*/"4", "Use this unroll factor for all loops being unrolled">, Option<"unrollUpToFactor", "unroll-up-to-factor", "bool", /*default=*/"false", "Allow unrolling up to the factor specified">, - Option<"unrollFull", "unroll-full", "bool", /*default=*/"false", - "Fully unroll loops">, Option<"numRepetitions", "unroll-num-reps", "unsigned", /*default=*/"1", "Unroll innermost loops repeatedly this many times">, Option<"unrollFullThreshold", "unroll-full-threshold", "unsigned", diff --git a/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp b/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp index 316721b2ecd78..60ae78b4133a4 100644 --- a/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp @@ -45,18 +45,15 @@ struct LoopUnroll : public affine::impl::AffineLoopUnrollBase { const std::function getUnrollFactor; LoopUnroll() : getUnrollFactor(nullptr) {} - LoopUnroll(const LoopUnroll &other) - - = default; + LoopUnroll(const LoopUnroll &other) = default; explicit LoopUnroll( std::optional unrollFactor = std::nullopt, - bool unrollUpToFactor = false, bool unrollFull = false, + bool unrollUpToFactor = false, const std::function &getUnrollFactor = nullptr) : getUnrollFactor(getUnrollFactor) { if (unrollFactor) this->unrollFactor = *unrollFactor; this->unrollUpToFactor = unrollUpToFactor; - this->unrollFull = unrollFull; } void runOnOperation() override; @@ -85,11 +82,17 @@ static void gatherInnermostLoops(FunctionOpInterface f, } void LoopUnroll::runOnOperation() { + if (!(unrollFactor.getValue() > 0 || unrollFactor.getValue() == -1)) { + emitError(UnknownLoc::get(&getContext()), + "Invalid option: 'unroll-factor' should be greater than 0 or " + "equal to -1"); + return signalPassFailure(); + } FunctionOpInterface func = getOperation(); if (func.isExternal()) return; - if (unrollFull && unrollFullThreshold.hasValue()) { + if (unrollFactor.getValue() == -1 && unrollFullThreshold.hasValue()) { // Store short loops as we walk. SmallVector loops; @@ -130,7 +133,7 @@ LogicalResult LoopUnroll::runOnAffineForOp(AffineForOp forOp) { return loopUnrollByFactor(forOp, getUnrollFactor(forOp), /*annotateFn=*/nullptr, cleanUpUnroll); // Unroll completely if full loop unroll was specified. - if (unrollFull) + if (unrollFactor.getValue() == -1) return loopUnrollFull(forOp); // Otherwise, unroll by the given unroll factor. if (unrollUpToFactor) @@ -141,9 +144,9 @@ LogicalResult LoopUnroll::runOnAffineForOp(AffineForOp forOp) { std::unique_ptr> mlir::affine::createLoopUnrollPass( - int unrollFactor, bool unrollUpToFactor, bool unrollFull, + int unrollFactor, bool unrollUpToFactor, const std::function &getUnrollFactor) { return std::make_unique( unrollFactor == -1 ? std::nullopt : std::optional(unrollFactor), - unrollUpToFactor, unrollFull, getUnrollFactor); + unrollUpToFactor, getUnrollFactor); } diff --git a/mlir/lib/Target/SPIRV/Serialization/Serializer.cpp b/mlir/lib/Target/SPIRV/Serialization/Serializer.cpp index b56e7788625f5..b88fbaa9018f6 100644 --- a/mlir/lib/Target/SPIRV/Serialization/Serializer.cpp +++ b/mlir/lib/Target/SPIRV/Serialization/Serializer.cpp @@ -260,9 +260,9 @@ static std::string getDecorationName(StringRef attrName) { } template -LogicalResult processDecorationList(Location loc, Decoration decoration, - Attribute attrList, StringRef attrName, - EmitF emitter) { +static LogicalResult processDecorationList(Location loc, Decoration decoration, + Attribute attrList, + StringRef attrName, EmitF emitter) { auto arrayAttr = dyn_cast(attrList); if (!arrayAttr) { return emitError(loc, "expecting array attribute of ") diff --git a/mlir/test/Dialect/Affine/unroll.mlir b/mlir/test/Dialect/Affine/unroll.mlir index 574e9f41494af..efdceed7c9a25 100644 --- a/mlir/test/Dialect/Affine/unroll.mlir +++ b/mlir/test/Dialect/Affine/unroll.mlir @@ -1,9 +1,9 @@ -// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(func.func(affine-loop-unroll{unroll-full=true}))" | FileCheck %s --check-prefix UNROLL-FULL -// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(func.func(affine-loop-unroll{unroll-full=true unroll-full-threshold=2}))" | FileCheck %s --check-prefix SHORT +// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(func.func(affine-loop-unroll{unroll-factor=-1}))" | FileCheck %s --check-prefix UNROLL-FULL +// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(func.func(affine-loop-unroll{unroll-factor=-1 unroll-full-threshold=2}))" | FileCheck %s --check-prefix SHORT // RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(func.func(affine-loop-unroll{unroll-factor=4}))" | FileCheck %s --check-prefix UNROLL-BY-4 // RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(func.func(affine-loop-unroll{unroll-factor=1}))" | FileCheck %s --check-prefix UNROLL-BY-1 // RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(func.func(affine-loop-unroll{unroll-factor=5 cleanup-unroll=true}))" | FileCheck %s --check-prefix UNROLL-CLEANUP-LOOP -// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(gpu.module(gpu.func(affine-loop-unroll{unroll-full=true})))" | FileCheck %s --check-prefix GPU-UNROLL-FULL +// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(gpu.module(gpu.func(affine-loop-unroll{unroll-factor=-1})))" | FileCheck %s --check-prefix GPU-UNROLL-FULL // UNROLL-FULL-DAG: [[$MAP0:#map[0-9]*]] = affine_map<(d0) -> (d0 + 1)> // UNROLL-FULL-DAG: [[$MAP1:#map[0-9]*]] = affine_map<(d0) -> (d0 + 2)> diff --git a/mlir/test/Transforms/scf-loop-unroll.mlir b/mlir/test/Transforms/scf-loop-unroll.mlir index 0ef6ad15d4eb0..db96c659c49fb 100644 --- a/mlir/test/Transforms/scf-loop-unroll.mlir +++ b/mlir/test/Transforms/scf-loop-unroll.mlir @@ -1,6 +1,6 @@ // RUN: mlir-opt %s --test-loop-unrolling="unroll-factor=3" -split-input-file -canonicalize | FileCheck %s // RUN: mlir-opt %s --test-loop-unrolling="unroll-factor=1" -split-input-file -canonicalize | FileCheck %s --check-prefix UNROLL-BY-1 -// RUN: mlir-opt %s --test-loop-unrolling="unroll-full=true" -split-input-file -canonicalize | FileCheck %s --check-prefix UNROLL-FULL +// RUN: mlir-opt %s --test-loop-unrolling="unroll-factor=-1" -split-input-file -canonicalize | FileCheck %s --check-prefix UNROLL-FULL // CHECK-LABEL: scf_loop_unroll_single func.func @scf_loop_unroll_single(%arg0 : f32, %arg1 : f32) -> f32 { diff --git a/mlir/test/lib/Dialect/SCF/TestLoopUnrolling.cpp b/mlir/test/lib/Dialect/SCF/TestLoopUnrolling.cpp index ced003305a7b8..2470380682318 100644 --- a/mlir/test/lib/Dialect/SCF/TestLoopUnrolling.cpp +++ b/mlir/test/lib/Dialect/SCF/TestLoopUnrolling.cpp @@ -42,11 +42,10 @@ struct TestLoopUnrollingPass TestLoopUnrollingPass(const TestLoopUnrollingPass &) {} explicit TestLoopUnrollingPass(uint64_t unrollFactorParam, unsigned loopDepthParam, - bool annotateLoopParam, bool unrollFullParam) { + bool annotateLoopParam) { unrollFactor = unrollFactorParam; loopDepth = loopDepthParam; annotateLoop = annotateLoopParam; - unrollFull = unrollFactorParam; } void getDependentDialects(DialectRegistry ®istry) const override { @@ -54,6 +53,12 @@ struct TestLoopUnrollingPass } void runOnOperation() override { + if (!(unrollFactor.getValue() > 0 || unrollFactor.getValue() == -1)) { + emitError(UnknownLoc::get(&getContext()), + "Invalid option: 'unroll-factor' should be greater than 0 or " + "equal to -1"); + return signalPassFailure(); + } SmallVector loops; getOperation()->walk([&](scf::ForOp forOp) { if (getNestingDepth(forOp) == loopDepth) @@ -65,15 +70,15 @@ struct TestLoopUnrollingPass } }; for (auto loop : loops) { - if (unrollFull) + if (unrollFactor.getValue() == -1) (void)loopUnrollFull(loop); else (void)loopUnrollByFactor(loop, unrollFactor, annotateFn); } } - Option unrollFactor{*this, "unroll-factor", - llvm::cl::desc("Loop unroll factor."), - llvm::cl::init(1)}; + Option unrollFactor{*this, "unroll-factor", + llvm::cl::desc("Loop unroll factor."), + llvm::cl::init(1)}; Option annotateLoop{*this, "annotate", llvm::cl::desc("Annotate unrolled iterations."), llvm::cl::init(false)}; @@ -82,9 +87,6 @@ struct TestLoopUnrollingPass llvm::cl::init(false)}; Option loopDepth{*this, "loop-depth", llvm::cl::desc("Loop depth."), llvm::cl::init(0)}; - Option unrollFull{*this, "unroll-full", - llvm::cl::desc("Full unroll loops."), - llvm::cl::init(false)}; }; } // namespace