diff --git a/amd/comgr/src/comgr-compiler.cpp b/amd/comgr/src/comgr-compiler.cpp index 9e53c75e02342..822e4b3ce512f 100644 --- a/amd/comgr/src/comgr-compiler.cpp +++ b/amd/comgr/src/comgr-compiler.cpp @@ -27,12 +27,12 @@ #include "clang/Driver/DriverDiagnostic.h" #include "clang/Driver/Job.h" #include "clang/Driver/OffloadBundler.h" -#include "clang/Driver/Options.h" #include "clang/Driver/Tool.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/FrontendDiagnostic.h" #include "clang/Frontend/TextDiagnosticPrinter.h" #include "clang/FrontendTool/Utils.h" +#include "clang/Options/Options.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Bitcode/BitcodeWriter.h" #include "llvm/IR/Constants.h" @@ -79,7 +79,7 @@ using namespace llvm::opt; using namespace llvm::sys; using namespace clang; using namespace clang::driver; -using namespace clang::driver::options; +using namespace clang::options; using namespace COMGR::TimeStatistics; namespace COMGR { diff --git a/clang-tools-extra/clangd/CMakeLists.txt b/clang-tools-extra/clangd/CMakeLists.txt index fb3f05329be21..d7ec853af862f 100644 --- a/clang-tools-extra/clangd/CMakeLists.txt +++ b/clang-tools-extra/clangd/CMakeLists.txt @@ -165,6 +165,7 @@ clang_target_link_libraries(clangDaemon clangBasic clangDependencyScanning clangDriver + clangOptions clangFormat clangFrontend clangIndex diff --git a/clang-tools-extra/clangd/CompileCommands.cpp b/clang-tools-extra/clangd/CompileCommands.cpp index c1be93730129a..7990f2719e9a0 100644 --- a/clang-tools-extra/clangd/CompileCommands.cpp +++ b/clang-tools-extra/clangd/CompileCommands.cpp @@ -11,8 +11,8 @@ #include "support/Logger.h" #include "support/Trace.h" #include "clang/Driver/Driver.h" -#include "clang/Driver/Options.h" #include "clang/Frontend/CompilerInvocation.h" +#include "clang/Options/Options.h" #include "clang/Tooling/CompilationDatabase.h" #include "clang/Tooling/Tooling.h" #include "llvm/ADT/ArrayRef.h" @@ -206,7 +206,7 @@ void CommandMangler::operator()(tooling::CompileCommand &Command, if (Cmd.empty()) return; - auto &OptTable = clang::driver::getDriverOptTable(); + auto &OptTable = getDriverOptTable(); // OriginalArgs needs to outlive ArgList. llvm::SmallVector OriginalArgs; OriginalArgs.reserve(Cmd.size()); @@ -222,8 +222,8 @@ void CommandMangler::operator()(tooling::CompileCommand &Command, llvm::opt::InputArgList ArgList; ArgList = OptTable.ParseArgs( llvm::ArrayRef(OriginalArgs).drop_front(), IgnoredCount, IgnoredCount, - llvm::opt::Visibility(IsCLMode ? driver::options::CLOption - : driver::options::ClangOption)); + llvm::opt::Visibility(IsCLMode ? options::CLOption + : options::ClangOption)); llvm::SmallVector IndicesToDrop; // Having multiple architecture options (e.g. when building fat binaries) @@ -232,7 +232,7 @@ void CommandMangler::operator()(tooling::CompileCommand &Command, // As there are no signals to figure out which one user actually wants. They // can explicitly specify one through `CompileFlags.Add` if need be. unsigned ArchOptCount = 0; - for (auto *Input : ArgList.filtered(driver::options::OPT_arch)) { + for (auto *Input : ArgList.filtered(options::OPT_arch)) { ++ArchOptCount; for (auto I = 0U; I <= Input->getNumValues(); ++I) IndicesToDrop.push_back(Input->getIndex() + I); @@ -262,13 +262,12 @@ void CommandMangler::operator()(tooling::CompileCommand &Command, // explicitly at the end of the flags. This ensures modifications done in the // following steps apply in more cases (like setting -x, which only affects // inputs that come after it). - for (auto *Input : ArgList.filtered(driver::options::OPT_INPUT)) { + for (auto *Input : ArgList.filtered(options::OPT_INPUT)) { SawInput(Input->getValue(0)); IndicesToDrop.push_back(Input->getIndex()); } // Anything after `--` is also treated as input, drop them as well. - if (auto *DashDash = - ArgList.getLastArgNoClaim(driver::options::OPT__DASH_DASH)) { + if (auto *DashDash = ArgList.getLastArgNoClaim(options::OPT__DASH_DASH)) { auto DashDashIndex = DashDash->getIndex() + 1; // +1 accounts for Cmd[0] // Another +1 so we don't treat the `--` itself as an input. for (unsigned I = DashDashIndex + 1; I < Cmd.size(); ++I) @@ -424,11 +423,11 @@ DriverMode getDriverMode(const std::vector &Args) { // Returns the set of DriverModes where an option may be used. unsigned char getModes(const llvm::opt::Option &Opt) { unsigned char Result = DM_None; - if (Opt.hasVisibilityFlag(driver::options::ClangOption)) + if (Opt.hasVisibilityFlag(options::ClangOption)) Result |= DM_GCC; - if (Opt.hasVisibilityFlag(driver::options::CC1Option)) + if (Opt.hasVisibilityFlag(options::CC1Option)) Result |= DM_CC1; - if (Opt.hasVisibilityFlag(driver::options::CLOption)) + if (Opt.hasVisibilityFlag(options::CLOption)) Result |= DM_CL; return Result; } @@ -442,8 +441,8 @@ llvm::ArrayRef ArgStripper::rulesFor(llvm::StringRef Arg) { using TableTy = llvm::StringMap, llvm::BumpPtrAllocator>; static TableTy *Table = [] { - auto &DriverTable = driver::getDriverOptTable(); - using DriverID = clang::driver::options::ID; + auto &DriverTable = getDriverOptTable(); + using DriverID = clang::options::ID; // Collect sets of aliases, so we can treat -foo and -foo= as synonyms. // Conceptually a double-linked list: PrevAlias[I] -> I -> NextAlias[I]. @@ -468,7 +467,7 @@ llvm::ArrayRef ArgStripper::rulesFor(llvm::StringRef Arg) { FLAGS, VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, \ METAVAR, VALUES, SUBCOMMANDIDS_OFFSET) \ {DriverID::OPT_##ID, DriverID::OPT_##ALIAS, ALIASARGS}, -#include "clang/Driver/Options.inc" +#include "clang/Options/Options.inc" #undef OPTION }; for (auto &E : AliasTable) diff --git a/clang-tools-extra/modularize/CMakeLists.txt b/clang-tools-extra/modularize/CMakeLists.txt index eb5383c3ad44e..a775b790a3147 100644 --- a/clang-tools-extra/modularize/CMakeLists.txt +++ b/clang-tools-extra/modularize/CMakeLists.txt @@ -20,6 +20,7 @@ clang_target_link_libraries(modularize clangAST clangBasic clangDriver + clangOptions clangFrontend clangLex clangSerialization diff --git a/clang-tools-extra/modularize/CoverageChecker.cpp b/clang-tools-extra/modularize/CoverageChecker.cpp index 1345a6ef8f489..d80d78c64c6e2 100644 --- a/clang-tools-extra/modularize/CoverageChecker.cpp +++ b/clang-tools-extra/modularize/CoverageChecker.cpp @@ -50,18 +50,18 @@ // //===----------------------------------------------------------------------===// +#include "CoverageChecker.h" #include "ModularizeUtilities.h" #include "clang/AST/ASTConsumer.h" -#include "CoverageChecker.h" #include "clang/AST/ASTContext.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/Basic/SourceManager.h" -#include "clang/Driver/Options.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/FrontendAction.h" #include "clang/Frontend/FrontendActions.h" #include "clang/Lex/PPCallbacks.h" #include "clang/Lex/Preprocessor.h" +#include "clang/Options/Options.h" #include "clang/Tooling/CompilationDatabase.h" #include "clang/Tooling/Tooling.h" #include "llvm/Option/Option.h" @@ -73,7 +73,7 @@ using namespace Modularize; using namespace clang; using namespace clang::driver; -using namespace clang::driver::options; +using namespace clang::options; using namespace clang::tooling; namespace cl = llvm::cl; namespace sys = llvm::sys; diff --git a/clang-tools-extra/modularize/Modularize.cpp b/clang-tools-extra/modularize/Modularize.cpp index 376ad0c7875bf..33966b44f719a 100644 --- a/clang-tools-extra/modularize/Modularize.cpp +++ b/clang-tools-extra/modularize/Modularize.cpp @@ -231,11 +231,11 @@ #include "clang/AST/ASTContext.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/Basic/SourceManager.h" -#include "clang/Driver/Options.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/FrontendAction.h" #include "clang/Frontend/FrontendActions.h" #include "clang/Lex/Preprocessor.h" +#include "clang/Options/Options.h" #include "clang/Tooling/CompilationDatabase.h" #include "clang/Tooling/Tooling.h" #include "llvm/Option/Arg.h" @@ -254,7 +254,7 @@ using namespace clang; using namespace clang::driver; -using namespace clang::driver::options; +using namespace clang::options; using namespace clang::tooling; using namespace llvm; using namespace llvm::opt; diff --git a/clang-tools-extra/modularize/ModularizeUtilities.cpp b/clang-tools-extra/modularize/ModularizeUtilities.cpp index 4dd84feac5df4..6978a6b2fe1b7 100644 --- a/clang-tools-extra/modularize/ModularizeUtilities.cpp +++ b/clang-tools-extra/modularize/ModularizeUtilities.cpp @@ -12,17 +12,17 @@ // //===----------------------------------------------------------------------===// +#include "ModularizeUtilities.h" +#include "CoverageChecker.h" #include "clang/Basic/SourceManager.h" -#include "clang/Driver/Options.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/FrontendActions.h" -#include "CoverageChecker.h" +#include "clang/Options/Options.h" #include "llvm/ADT/SmallString.h" #include "llvm/Support/FileUtilities.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" -#include "ModularizeUtilities.h" using namespace clang; using namespace llvm; diff --git a/clang-tools-extra/pp-trace/CMakeLists.txt b/clang-tools-extra/pp-trace/CMakeLists.txt index 1323adbc35269..da36582ee0234 100644 --- a/clang-tools-extra/pp-trace/CMakeLists.txt +++ b/clang-tools-extra/pp-trace/CMakeLists.txt @@ -14,6 +14,7 @@ clang_target_link_libraries(pp-trace PRIVATE clangAST clangBasic + clangOptions clangFrontend clangLex clangSerialization diff --git a/clang-tools-extra/pp-trace/PPTrace.cpp b/clang-tools-extra/pp-trace/PPTrace.cpp index 0b078c49a55b7..ba5a06a26830d 100644 --- a/clang-tools-extra/pp-trace/PPTrace.cpp +++ b/clang-tools-extra/pp-trace/PPTrace.cpp @@ -28,11 +28,11 @@ #include "clang/AST/ASTConsumer.h" #include "clang/AST/ASTContext.h" #include "clang/Basic/SourceManager.h" -#include "clang/Driver/Options.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/FrontendAction.h" #include "clang/Frontend/FrontendActions.h" #include "clang/Lex/Preprocessor.h" +#include "clang/Options/Options.h" #include "clang/Tooling/Execution.h" #include "clang/Tooling/Tooling.h" #include "llvm/Option/Arg.h" diff --git a/clang/docs/CMakeLists.txt b/clang/docs/CMakeLists.txt index 1f06c040c96cb..9469a832adb62 100644 --- a/clang/docs/CMakeLists.txt +++ b/clang/docs/CMakeLists.txt @@ -132,7 +132,7 @@ if (LLVM_ENABLE_SPHINX) # Generated files gen_rst_file_from_td(AttributeReference.rst -gen-attr-docs ../include/clang/Basic/Attr.td "${docs_targets}") gen_rst_file_from_td(DiagnosticsReference.rst -gen-diag-docs ../include/clang/Basic/Diagnostic.td "${docs_targets}") - gen_rst_file_from_td(ClangCommandLineReference.rst -gen-opt-docs ../include/clang/Driver/ClangOptionDocs.td "${docs_targets}") + gen_rst_file_from_td(ClangCommandLineReference.rst -gen-opt-docs ../include/clang/Options/ClangOptionDocs.td "${docs_targets}") # Another generated file from a different source set(docs_tools_dir ${CMAKE_CURRENT_SOURCE_DIR}/tools) diff --git a/clang/docs/InternalsManual.rst b/clang/docs/InternalsManual.rst index eff46ab46e1ca..a849d05eb7ae9 100644 --- a/clang/docs/InternalsManual.rst +++ b/clang/docs/InternalsManual.rst @@ -667,7 +667,7 @@ Command Line Interface ---------------------- The command line interface of the Clang ``-cc1`` frontend is defined alongside -the driver options in ``clang/Driver/Options.td``. The information making up an +the driver options in ``clang/Options/Options.td``. The information making up an option definition includes its prefix and name (for example ``-std=``), form and position of the option value, help text, aliases and more. Each option may belong to a certain group and can be marked with zero or more flags. Options @@ -712,7 +712,7 @@ variable for the option value: } Next, declare the command line interface of the option in the tablegen file -``clang/include/clang/Driver/Options.td``. This is done by instantiating the +``clang/include/clang/Options/Options.td``. This is done by instantiating the ``Option`` class (defined in ``llvm/include/llvm/Option/OptParser.td``). The instance is typically created through one of the helper classes that encode the acceptable ways to specify the option value on the command line: @@ -906,7 +906,7 @@ command line: SHOULD_PARSE, KEYPATH, DEFAULT_VALUE, \ IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, \ MERGER, TABLE_INDEX) - #include "clang/Driver/Options.inc" + #include "clang/Options/Options.inc" #undef LANG_OPTION_WITH_MARSHALLING // ... @@ -925,7 +925,7 @@ command line: GENERATE_OPTION_WITH_MARSHALLING( \ Args, SA, KIND, FLAGS, SPELLING, ALWAYS_EMIT, KEYPATH, DEFAULT_VALUE, \ IMPLIED_CHECK, IMPLIED_VALUE, DENORMALIZER, EXTRACTOR, TABLE_INDEX) - #include "clang/Driver/Options.inc" + #include "clang/Options/Options.inc" #undef LANG_OPTION_WITH_MARSHALLING // ... diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index c095a59bb978d..333aae74db4f8 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -79,6 +79,9 @@ Potentially Breaking Changes void foo(void) { return ({ 1;; }); } +- Downstream projects that previously linked only against ``clangDriver`` may + now (also) need to link against the new ``clangOptions`` library, since + options-related code has been moved out of the Driver into a separate library. C/C++ Language Potentially Breaking Changes ------------------------------------------- diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index cd5f2c3012712..cb08e2107f072 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -311,7 +311,7 @@ let Features = "sse3", Attributes = [NoThrow, RequiredVectorWidth<128>] in { def lddqu : X86Builtin<"_Vector<16, char>(char const *)">; } -let Features = "ssse3", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "ssse3", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def palignr128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Constant int)">; } @@ -605,8 +605,7 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { def mpsadbw256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant char)">; - def palignr256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, " - "_Vector<32, char>, _Constant int)">; + def psadbw256 : X86Builtin< "_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">; @@ -630,6 +629,7 @@ let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi def pmovmskb256 : X86Builtin<"int(_Vector<32, char>)">; def pavgb256 : X86Builtin<"_Vector<32, unsigned char>(_Vector<32, unsigned char>, _Vector<32, unsigned char>)">; def pavgw256 : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, unsigned short>, _Vector<16, unsigned short>)">; + def palignr256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant int)">; def pblendd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Constant int)">; def pblendd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Constant int)">; @@ -3263,7 +3263,7 @@ let Features = "avx512bw", Attributes = [NoThrow, Const] in { def kmovq : X86Builtin<"unsigned long long int(unsigned long long int)">; } -let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def palignr512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>, _Constant int)">; } diff --git a/clang/include/clang/CMakeLists.txt b/clang/include/clang/CMakeLists.txt index 47ac70cd21690..77a44e4c48de5 100644 --- a/clang/include/clang/CMakeLists.txt +++ b/clang/include/clang/CMakeLists.txt @@ -3,7 +3,7 @@ add_subdirectory(Basic) if(CLANG_ENABLE_CIR) add_subdirectory(CIR) endif() -add_subdirectory(Driver) +add_subdirectory(Options) add_subdirectory(Parse) add_subdirectory(Sema) add_subdirectory(Serialization) diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h index 419089731569b..297afe26812a0 100644 --- a/clang/include/clang/Driver/Driver.h +++ b/clang/include/clang/Driver/Driver.h @@ -15,11 +15,11 @@ #include "clang/Driver/Action.h" #include "clang/Driver/DriverDiagnostic.h" #include "clang/Driver/InputInfo.h" -#include "clang/Driver/Options.h" #include "clang/Driver/Phases.h" #include "clang/Driver/ToolChain.h" #include "clang/Driver/Types.h" #include "clang/Driver/Util.h" +#include "clang/Options/Options.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/ADT/StringMap.h" diff --git a/clang/include/clang/Frontend/Utils.h b/clang/include/clang/Frontend/Utils.h index 49fd920d1ec43..ed2703c76f18d 100644 --- a/clang/include/clang/Frontend/Utils.h +++ b/clang/include/clang/Frontend/Utils.h @@ -15,8 +15,8 @@ #include "clang/Basic/Diagnostic.h" #include "clang/Basic/LLVM.h" -#include "clang/Driver/OptionUtils.h" #include "clang/Frontend/DependencyOutputOptions.h" +#include "clang/Options/OptionUtils.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/IntrusiveRefCntPtr.h" #include "llvm/ADT/StringMap.h" diff --git a/clang/include/clang/Driver/CMakeLists.txt b/clang/include/clang/Options/CMakeLists.txt similarity index 100% rename from clang/include/clang/Driver/CMakeLists.txt rename to clang/include/clang/Options/CMakeLists.txt diff --git a/clang/include/clang/Driver/ClangOptionDocs.td b/clang/include/clang/Options/ClangOptionDocs.td similarity index 100% rename from clang/include/clang/Driver/ClangOptionDocs.td rename to clang/include/clang/Options/ClangOptionDocs.td diff --git a/clang/include/clang/Driver/OptionUtils.h b/clang/include/clang/Options/OptionUtils.h similarity index 94% rename from clang/include/clang/Driver/OptionUtils.h rename to clang/include/clang/Options/OptionUtils.h index 922f536bf33ea..83c48bd7d6843 100644 --- a/clang/include/clang/Driver/OptionUtils.h +++ b/clang/include/clang/Options/OptionUtils.h @@ -10,8 +10,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_DRIVER_OPTIONUTILS_H -#define LLVM_CLANG_DRIVER_OPTIONUTILS_H +#ifndef LLVM_CLANG_OPTIONS_OPTIONUTILS_H +#define LLVM_CLANG_OPTIONS_OPTIONUTILS_H #include "clang/Basic/Diagnostic.h" #include "clang/Basic/LLVM.h" @@ -55,4 +55,4 @@ inline uint64_t getLastArgUInt64Value(const llvm::opt::ArgList &Args, } // namespace clang -#endif // LLVM_CLANG_DRIVER_OPTIONUTILS_H +#endif // LLVM_CLANG_OPTIONS_OPTIONUTILS_H diff --git a/clang/include/clang/Driver/Options.h b/clang/include/clang/Options/Options.h similarity index 83% rename from clang/include/clang/Driver/Options.h rename to clang/include/clang/Options/Options.h index 0797410e9940e..ac98699001965 100644 --- a/clang/include/clang/Driver/Options.h +++ b/clang/include/clang/Options/Options.h @@ -6,14 +6,13 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_DRIVER_OPTIONS_H -#define LLVM_CLANG_DRIVER_OPTIONS_H +#ifndef LLVM_CLANG_OPTIONS_OPTIONS_H +#define LLVM_CLANG_OPTIONS_OPTIONS_H #include "llvm/Option/OptTable.h" #include "llvm/Option/Option.h" namespace clang { -namespace driver { namespace options { /// Flags specifically for clang options. Must not overlap with @@ -42,16 +41,15 @@ enum ClangVisibility { }; enum ID { - OPT_INVALID = 0, // This is not an option ID. + OPT_INVALID = 0, // This is not an option ID. #define OPTION(...) LLVM_MAKE_OPT_ID(__VA_ARGS__), -#include "clang/Driver/Options.inc" - LastOption +#include "clang/Options/Options.inc" + LastOption #undef OPTION - }; -} +}; +} // namespace options const llvm::opt::OptTable &getDriverOptTable(); -} -} +} // namespace clang -#endif +#endif // LLVM_CLANG_OPTIONS_OPTIONS_H diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Options/Options.td similarity index 100% rename from clang/include/clang/Driver/Options.td rename to clang/include/clang/Options/Options.td diff --git a/clang/include/module.modulemap b/clang/include/module.modulemap index c5535262ae38c..a11c8683c601e 100644 --- a/clang/include/module.modulemap +++ b/clang/include/module.modulemap @@ -146,6 +146,7 @@ module Clang_Lex { module * { export * } } +module Clang_Options { requires cplusplus umbrella "clang/Options" module * { export * } } module Clang_Parse { requires cplusplus umbrella "clang/Parse" module * { export * } } module Clang_Rewrite { requires cplusplus umbrella "clang/Rewrite/Core" module * { export * } } module Clang_RewriteFrontend { requires cplusplus umbrella "clang/Rewrite/Frontend" module * { export * } } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 0ef130c0a55df..6c7b2f502cc51 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -2841,76 +2841,6 @@ static bool interp__builtin_blend(InterpState &S, CodePtr OpPC, return true; } -static bool interp__builtin_ia32_pshufb(InterpState &S, CodePtr OpPC, - const CallExpr *Call) { - assert(Call->getNumArgs() == 2 && "masked forms handled via select*"); - const Pointer &Control = S.Stk.pop(); - const Pointer &Src = S.Stk.pop(); - const Pointer &Dst = S.Stk.peek(); - - unsigned NumElems = Dst.getNumElems(); - assert(NumElems == Control.getNumElems()); - assert(NumElems == Dst.getNumElems()); - - for (unsigned Idx = 0; Idx != NumElems; ++Idx) { - uint8_t Ctlb = static_cast(Control.elem(Idx)); - - if (Ctlb & 0x80) { - Dst.elem(Idx) = 0; - } else { - unsigned LaneBase = (Idx / 16) * 16; - unsigned SrcOffset = Ctlb & 0x0F; - unsigned SrcIdx = LaneBase + SrcOffset; - - Dst.elem(Idx) = Src.elem(SrcIdx); - } - } - Dst.initializeAllElements(); - return true; -} - -static bool interp__builtin_ia32_pshuf(InterpState &S, CodePtr OpPC, - const CallExpr *Call, bool IsShufHW) { - assert(Call->getNumArgs() == 2 && "masked forms handled via select*"); - APSInt ControlImm = popToAPSInt(S, Call->getArg(1)); - const Pointer &Src = S.Stk.pop(); - const Pointer &Dst = S.Stk.peek(); - - unsigned NumElems = Dst.getNumElems(); - PrimType ElemT = Dst.getFieldDesc()->getPrimType(); - - unsigned ElemBits = static_cast(primSize(ElemT) * 8); - if (ElemBits != 16 && ElemBits != 32) - return false; - - unsigned LaneElts = 128u / ElemBits; - assert(LaneElts && (NumElems % LaneElts == 0)); - - uint8_t Ctl = static_cast(ControlImm.getZExtValue()); - - for (unsigned Idx = 0; Idx != NumElems; Idx++) { - unsigned LaneBase = (Idx / LaneElts) * LaneElts; - unsigned LaneIdx = Idx % LaneElts; - unsigned SrcIdx = Idx; - unsigned Sel = (Ctl >> (2 * (LaneIdx & 0x3))) & 0x3; - if (ElemBits == 32) { - SrcIdx = LaneBase + Sel; - } else { - constexpr unsigned HalfSize = 4; - bool InHigh = LaneIdx >= HalfSize; - if (!IsShufHW && !InHigh) { - SrcIdx = LaneBase + Sel; - } else if (IsShufHW && InHigh) { - SrcIdx = LaneBase + HalfSize + Sel; - } - } - - INT_TYPE_SWITCH_NO_BOOL(ElemT, { Dst.elem(Idx) = Src.elem(SrcIdx); }); - } - Dst.initializeAllElements(); - return true; -} - static bool interp__builtin_ia32_test_op( InterpState &S, CodePtr OpPC, const CallExpr *Call, llvm::function_ref Fn) { @@ -3377,61 +3307,46 @@ static bool interp__builtin_ia32_vpconflict(InterpState &S, CodePtr OpPC, return true; } -static bool interp__builtin_x86_byteshift( - InterpState &S, CodePtr OpPC, const CallExpr *Call, unsigned ID, - llvm::function_ref - Fn) { - assert(Call->getNumArgs() == 2); - - APSInt ImmAPS = popToAPSInt(S, Call->getArg(1)); - uint64_t Shift = ImmAPS.getZExtValue() & 0xff; - - const Pointer &Src = S.Stk.pop(); - if (!Src.getFieldDesc()->isPrimitiveArray()) - return false; - - unsigned NumElems = Src.getNumElems(); - const Pointer &Dst = S.Stk.peek(); - PrimType ElemT = Src.getFieldDesc()->getPrimType(); - - for (unsigned Lane = 0; Lane != NumElems; Lane += 16) { - for (unsigned I = 0; I != 16; ++I) { - unsigned Base = Lane + I; - APSInt Result = APSInt(Fn(Src, Lane, I, Shift)); - INT_TYPE_SWITCH_NO_BOOL(ElemT, - { Dst.elem(Base) = static_cast(Result); }); - } - } - - Dst.initializeAllElements(); - - return true; -} - static bool interp__builtin_ia32_shuffle_generic( InterpState &S, CodePtr OpPC, const CallExpr *Call, llvm::function_ref(unsigned, unsigned)> GetSourceIndex) { - assert(Call->getNumArgs() == 3); + assert(Call->getNumArgs() == 2 || Call->getNumArgs() == 3); unsigned ShuffleMask = 0; Pointer A, MaskVector, B; - - QualType Arg2Type = Call->getArg(2)->getType(); bool IsVectorMask = false; - if (Arg2Type->isVectorType()) { - IsVectorMask = true; - B = S.Stk.pop(); - MaskVector = S.Stk.pop(); - A = S.Stk.pop(); - } else if (Arg2Type->isIntegerType()) { - ShuffleMask = popToAPSInt(S, Call->getArg(2)).getZExtValue(); - B = S.Stk.pop(); - A = S.Stk.pop(); + bool IsSingleOperand = (Call->getNumArgs() == 2); + + if (IsSingleOperand) { + QualType MaskType = Call->getArg(1)->getType(); + if (MaskType->isVectorType()) { + IsVectorMask = true; + MaskVector = S.Stk.pop(); + A = S.Stk.pop(); + B = A; + } else if (MaskType->isIntegerType()) { + ShuffleMask = popToAPSInt(S, Call->getArg(1)).getZExtValue(); + A = S.Stk.pop(); + B = A; + } else { + return false; + } } else { - return false; + QualType Arg2Type = Call->getArg(2)->getType(); + if (Arg2Type->isVectorType()) { + IsVectorMask = true; + B = S.Stk.pop(); + MaskVector = S.Stk.pop(); + A = S.Stk.pop(); + } else if (Arg2Type->isIntegerType()) { + ShuffleMask = popToAPSInt(S, Call->getArg(2)).getZExtValue(); + B = S.Stk.pop(); + A = S.Stk.pop(); + } else { + return false; + } } QualType Arg0Type = Call->getArg(0)->getType(); @@ -3455,6 +3370,7 @@ static bool interp__builtin_ia32_shuffle_generic( ShuffleMask = static_cast(MaskVector.elem(DstIdx)); }); } + auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, ShuffleMask); if (SrcIdx < 0) { @@ -4555,22 +4471,58 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case X86::BI__builtin_ia32_pshufb128: case X86::BI__builtin_ia32_pshufb256: case X86::BI__builtin_ia32_pshufb512: - return interp__builtin_ia32_pshufb(S, OpPC, Call); + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { + uint8_t Ctlb = static_cast(ShuffleMask); + if (Ctlb & 0x80) + return std::make_pair(0, -1); + + unsigned LaneBase = (DstIdx / 16) * 16; + unsigned SrcOffset = Ctlb & 0x0F; + unsigned SrcIdx = LaneBase + SrcOffset; + return std::make_pair(0, static_cast(SrcIdx)); + }); case X86::BI__builtin_ia32_pshuflw: case X86::BI__builtin_ia32_pshuflw256: case X86::BI__builtin_ia32_pshuflw512: - return interp__builtin_ia32_pshuf(S, OpPC, Call, false); + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { + unsigned LaneBase = (DstIdx / 8) * 8; + unsigned LaneIdx = DstIdx % 8; + if (LaneIdx < 4) { + unsigned Sel = (ShuffleMask >> (2 * LaneIdx)) & 0x3; + return std::make_pair(0, static_cast(LaneBase + Sel)); + } + + return std::make_pair(0, static_cast(DstIdx)); + }); case X86::BI__builtin_ia32_pshufhw: case X86::BI__builtin_ia32_pshufhw256: case X86::BI__builtin_ia32_pshufhw512: - return interp__builtin_ia32_pshuf(S, OpPC, Call, true); + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { + unsigned LaneBase = (DstIdx / 8) * 8; + unsigned LaneIdx = DstIdx % 8; + if (LaneIdx >= 4) { + unsigned Sel = (ShuffleMask >> (2 * (LaneIdx - 4))) & 0x3; + return std::make_pair(0, static_cast(LaneBase + 4 + Sel)); + } + + return std::make_pair(0, static_cast(DstIdx)); + }); case X86::BI__builtin_ia32_pshufd: case X86::BI__builtin_ia32_pshufd256: case X86::BI__builtin_ia32_pshufd512: - return interp__builtin_ia32_pshuf(S, OpPC, Call, false); + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { + unsigned LaneBase = (DstIdx / 4) * 4; + unsigned LaneIdx = DstIdx % 4; + unsigned Sel = (ShuffleMask >> (2 * LaneIdx)) & 0x3; + return std::make_pair(0, static_cast(LaneBase + Sel)); + }); case X86::BI__builtin_ia32_kandqi: case X86::BI__builtin_ia32_kandhi: @@ -4728,13 +4680,16 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, // The lane width is hardcoded to 16 to match the SIMD register size, // but the algorithm processes one byte per iteration, // so APInt(8, ...) is correct and intentional. - return interp__builtin_x86_byteshift( - S, OpPC, Call, BuiltinID, - [](const Pointer &Src, unsigned Lane, unsigned I, unsigned Shift) { - if (I < Shift) { - return APInt(8, 0); - } - return APInt(8, Src.elem(Lane + I - Shift)); + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, + [](unsigned DstIdx, unsigned Shift) -> std::pair { + unsigned LaneBase = (DstIdx / 16) * 16; + unsigned LaneIdx = DstIdx % 16; + if (LaneIdx < Shift) + return std::make_pair(0, -1); + + return std::make_pair(0, + static_cast(LaneBase + LaneIdx - Shift)); }); case X86::BI__builtin_ia32_psrldqi128_byteshift: @@ -4744,14 +4699,40 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, // The lane width is hardcoded to 16 to match the SIMD register size, // but the algorithm processes one byte per iteration, // so APInt(8, ...) is correct and intentional. - return interp__builtin_x86_byteshift( - S, OpPC, Call, BuiltinID, - [](const Pointer &Src, unsigned Lane, unsigned I, unsigned Shift) { - if (I + Shift < 16) { - return APInt(8, Src.elem(Lane + I + Shift)); + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, + [](unsigned DstIdx, unsigned Shift) -> std::pair { + unsigned LaneBase = (DstIdx / 16) * 16; + unsigned LaneIdx = DstIdx % 16; + if (LaneIdx + Shift < 16) + return std::make_pair(0, + static_cast(LaneBase + LaneIdx + Shift)); + + return std::make_pair(0, -1); + }); + + case X86::BI__builtin_ia32_palignr128: + case X86::BI__builtin_ia32_palignr256: + case X86::BI__builtin_ia32_palignr512: + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned Shift) { + // Default to -1 → zero-fill this destination element + unsigned VecIdx = 1; + int ElemIdx = -1; + + int Lane = DstIdx / 16; + int Offset = DstIdx % 16; + + // Elements come from VecB first, then VecA after the shift boundary + unsigned ShiftedIdx = Offset + (Shift & 0xFF); + if (ShiftedIdx < 16) { // from VecB + ElemIdx = ShiftedIdx + (Lane * 16); + } else if (ShiftedIdx < 32) { // from VecA + VecIdx = 0; + ElemIdx = (ShiftedIdx - 16) + (Lane * 16); } - return APInt(8, 0); + return std::pair{VecIdx, ElemIdx}; }); default: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 972d9fe3b5e4f..1bfea24b228e8 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -12090,24 +12090,46 @@ static bool evalShuffleGeneric( unsigned ShuffleMask = 0; APValue A, MaskVector, B; bool IsVectorMask = false; - - QualType Arg2Type = Call->getArg(2)->getType(); - if (Arg2Type->isVectorType()) { - IsVectorMask = true; - if (!EvaluateAsRValue(Info, Call->getArg(0), A) || - !EvaluateAsRValue(Info, Call->getArg(1), MaskVector) || - !EvaluateAsRValue(Info, Call->getArg(2), B)) - return false; - } else if (Arg2Type->isIntegerType()) { - APSInt MaskImm; - if (!EvaluateInteger(Call->getArg(2), MaskImm, Info)) - return false; - ShuffleMask = static_cast(MaskImm.getZExtValue()); - if (!EvaluateAsRValue(Info, Call->getArg(0), A) || - !EvaluateAsRValue(Info, Call->getArg(1), B)) + bool IsSingleOperand = (Call->getNumArgs() == 2); + + if (IsSingleOperand) { + QualType MaskType = Call->getArg(1)->getType(); + if (MaskType->isVectorType()) { + IsVectorMask = true; + if (!EvaluateAsRValue(Info, Call->getArg(0), A) || + !EvaluateAsRValue(Info, Call->getArg(1), MaskVector)) + return false; + B = A; + } else if (MaskType->isIntegerType()) { + APSInt MaskImm; + if (!EvaluateInteger(Call->getArg(1), MaskImm, Info)) + return false; + ShuffleMask = static_cast(MaskImm.getZExtValue()); + if (!EvaluateAsRValue(Info, Call->getArg(0), A)) + return false; + B = A; + } else { return false; + } } else { - return false; + QualType Arg2Type = Call->getArg(2)->getType(); + if (Arg2Type->isVectorType()) { + IsVectorMask = true; + if (!EvaluateAsRValue(Info, Call->getArg(0), A) || + !EvaluateAsRValue(Info, Call->getArg(1), MaskVector) || + !EvaluateAsRValue(Info, Call->getArg(2), B)) + return false; + } else if (Arg2Type->isIntegerType()) { + APSInt MaskImm; + if (!EvaluateInteger(Call->getArg(2), MaskImm, Info)) + return false; + ShuffleMask = static_cast(MaskImm.getZExtValue()); + if (!EvaluateAsRValue(Info, Call->getArg(0), A) || + !EvaluateAsRValue(Info, Call->getArg(1), B)) + return false; + } else { + return false; + } } unsigned NumElts = VT->getNumElements(); @@ -12124,8 +12146,16 @@ static bool evalShuffleGeneric( if (SrcIdx < 0) { // Zero out this element QualType ElemTy = VT->getElementType(); - ResultElements.push_back( - APValue(APFloat::getZero(Info.Ctx.getFloatTypeSemantics(ElemTy)))); + if (ElemTy->isRealFloatingType()) { + ResultElements.push_back( + APValue(APFloat::getZero(Info.Ctx.getFloatTypeSemantics(ElemTy)))); + } else if (ElemTy->isIntegerType()) { + APValue Zero(Info.Ctx.MakeIntValue(0, ElemTy)); + ResultElements.push_back(APValue(Zero)); + } else { + // Other types of fallback logic + ResultElements.push_back(APValue()); + } } else { const APValue &Src = (SrcVecIdx == 0) ? A : B; ResultElements.push_back(Src.getVectorElt(SrcIdx)); @@ -12136,98 +12166,6 @@ static bool evalShuffleGeneric( return true; } -static bool evalPshufbBuiltin(EvalInfo &Info, const CallExpr *Call, - APValue &Out) { - APValue SrcVec, ControlVec; - if (!EvaluateAsRValue(Info, Call->getArg(0), SrcVec)) - return false; - if (!EvaluateAsRValue(Info, Call->getArg(1), ControlVec)) - return false; - - const auto *VT = Call->getType()->getAs(); - if (!VT) - return false; - - QualType ElemT = VT->getElementType(); - unsigned NumElts = VT->getNumElements(); - - SmallVector ResultElements; - ResultElements.reserve(NumElts); - - for (unsigned Idx = 0; Idx != NumElts; ++Idx) { - APValue CtlVal = ControlVec.getVectorElt(Idx); - APSInt CtlByte = CtlVal.getInt(); - uint8_t Ctl = static_cast(CtlByte.getZExtValue()); - - if (Ctl & 0x80) { - APValue Zero(Info.Ctx.MakeIntValue(0, ElemT)); - ResultElements.push_back(Zero); - } else { - unsigned LaneBase = (Idx / 16) * 16; - unsigned SrcOffset = Ctl & 0x0F; - unsigned SrcIdx = LaneBase + SrcOffset; - - ResultElements.push_back(SrcVec.getVectorElt(SrcIdx)); - } - } - Out = APValue(ResultElements.data(), ResultElements.size()); - return true; -} - -static bool evalPshufBuiltin(EvalInfo &Info, const CallExpr *Call, - bool IsShufHW, APValue &Out) { - APValue Vec; - APSInt Imm; - if (!EvaluateAsRValue(Info, Call->getArg(0), Vec)) - return false; - if (!EvaluateInteger(Call->getArg(1), Imm, Info)) - return false; - - const auto *VT = Call->getType()->getAs(); - if (!VT) - return false; - - QualType ElemT = VT->getElementType(); - unsigned ElemBits = Info.Ctx.getTypeSize(ElemT); - unsigned NumElts = VT->getNumElements(); - - unsigned LaneBits = 128u; - unsigned LaneElts = LaneBits / ElemBits; - if (!LaneElts || (NumElts % LaneElts) != 0) - return false; - - uint8_t Ctl = static_cast(Imm.getZExtValue()); - - SmallVector ResultElements; - ResultElements.reserve(NumElts); - - for (unsigned Idx = 0; Idx != NumElts; Idx++) { - unsigned LaneBase = (Idx / LaneElts) * LaneElts; - unsigned LaneIdx = Idx % LaneElts; - unsigned SrcIdx = Idx; - unsigned Sel = (Ctl >> (2 * LaneIdx)) & 0x3; - - if (ElemBits == 32) { - SrcIdx = LaneBase + Sel; - } else { - constexpr unsigned HalfSize = 4; - bool InHigh = LaneIdx >= HalfSize; - if (!IsShufHW && !InHigh) { - SrcIdx = LaneBase + Sel; - } else if (IsShufHW && InHigh) { - unsigned Rel = LaneIdx - HalfSize; - Sel = (Ctl >> (2 * Rel)) & 0x3; - SrcIdx = LaneBase + HalfSize + Sel; - } - } - - ResultElements.push_back(Vec.getVectorElt(SrcIdx)); - } - - Out = APValue(ResultElements.data(), ResultElements.size()); - return true; -} - bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { if (!IsConstantEvaluatedBuiltinCall(E)) return ExprEvaluatorBaseTy::VisitCallExpr(E); @@ -12993,7 +12931,19 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { case X86::BI__builtin_ia32_pshufb256: case X86::BI__builtin_ia32_pshufb512: { APValue R; - if (!evalPshufbBuiltin(Info, E, R)) + if (!evalShuffleGeneric( + Info, E, R, + [](unsigned DstIdx, + unsigned ShuffleMask) -> std::pair { + uint8_t Ctlb = static_cast(ShuffleMask); + if (Ctlb & 0x80) + return std::make_pair(0, -1); + + unsigned LaneBase = (DstIdx / 16) * 16; + unsigned SrcOffset = Ctlb & 0x0F; + unsigned SrcIdx = LaneBase + SrcOffset; + return std::make_pair(0, static_cast(SrcIdx)); + })) return false; return Success(R, E); } @@ -13002,7 +12952,21 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { case X86::BI__builtin_ia32_pshuflw256: case X86::BI__builtin_ia32_pshuflw512: { APValue R; - if (!evalPshufBuiltin(Info, E, false, R)) + if (!evalShuffleGeneric( + Info, E, R, + [](unsigned DstIdx, unsigned Mask) -> std::pair { + constexpr unsigned LaneBits = 128u; + constexpr unsigned ElemBits = 16u; + constexpr unsigned LaneElts = LaneBits / ElemBits; + constexpr unsigned HalfSize = 4; + unsigned LaneBase = (DstIdx / LaneElts) * LaneElts; + unsigned LaneIdx = DstIdx % LaneElts; + if (LaneIdx < HalfSize) { + unsigned Sel = (Mask >> (2 * LaneIdx)) & 0x3; + return std::make_pair(0, static_cast(LaneBase + Sel)); + } + return std::make_pair(0, static_cast(DstIdx)); + })) return false; return Success(R, E); } @@ -13011,7 +12975,23 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { case X86::BI__builtin_ia32_pshufhw256: case X86::BI__builtin_ia32_pshufhw512: { APValue R; - if (!evalPshufBuiltin(Info, E, true, R)) + if (!evalShuffleGeneric( + Info, E, R, + [](unsigned DstIdx, unsigned Mask) -> std::pair { + constexpr unsigned LaneBits = 128u; + constexpr unsigned ElemBits = 16u; + constexpr unsigned LaneElts = LaneBits / ElemBits; + constexpr unsigned HalfSize = 4; + unsigned LaneBase = (DstIdx / LaneElts) * LaneElts; + unsigned LaneIdx = DstIdx % LaneElts; + if (LaneIdx >= HalfSize) { + unsigned Rel = LaneIdx - HalfSize; + unsigned Sel = (Mask >> (2 * Rel)) & 0x3; + return std::make_pair( + 0, static_cast(LaneBase + HalfSize + Sel)); + } + return std::make_pair(0, static_cast(DstIdx)); + })) return false; return Success(R, E); } @@ -13020,7 +13000,17 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { case X86::BI__builtin_ia32_pshufd256: case X86::BI__builtin_ia32_pshufd512: { APValue R; - if (!evalPshufBuiltin(Info, E, false, R)) + if (!evalShuffleGeneric( + Info, E, R, + [](unsigned DstIdx, unsigned Mask) -> std::pair { + constexpr unsigned LaneBits = 128u; + constexpr unsigned ElemBits = 32u; + constexpr unsigned LaneElts = LaneBits / ElemBits; + unsigned LaneBase = (DstIdx / LaneElts) * LaneElts; + unsigned LaneIdx = DstIdx % LaneElts; + unsigned Sel = (Mask >> (2 * LaneIdx)) & 0x3; + return std::make_pair(0, static_cast(LaneBase + Sel)); + })) return false; return Success(R, E); } @@ -13500,61 +13490,66 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { case X86::BI__builtin_ia32_pslldqi128_byteshift: case X86::BI__builtin_ia32_pslldqi256_byteshift: case X86::BI__builtin_ia32_pslldqi512_byteshift: { - assert(E->getNumArgs() == 2); - - APValue Src; - APSInt Imm; - if (!EvaluateAsRValue(Info, E->getArg(0), Src) || - !EvaluateInteger(E->getArg(1), Imm, Info)) + APValue R; + if (!evalShuffleGeneric( + Info, E, R, + [](unsigned DstIdx, unsigned Shift) -> std::pair { + unsigned LaneBase = (DstIdx / 16) * 16; + unsigned LaneIdx = DstIdx % 16; + if (LaneIdx < Shift) + return std::make_pair(0, -1); + + return std::make_pair( + 0, static_cast(LaneBase + LaneIdx - Shift)); + })) return false; - - unsigned VecLen = Src.getVectorLength(); - unsigned Shift = Imm.getZExtValue() & 0xff; - - SmallVector ResultElements; - for (unsigned Lane = 0; Lane != VecLen; Lane += 16) { - for (unsigned I = 0; I != 16; ++I) { - if (I < Shift) { - APSInt Zero(8, /*isUnsigned=*/true); - Zero = 0; - ResultElements.push_back(APValue(Zero)); - } else { - ResultElements.push_back(Src.getVectorElt(Lane + I - Shift)); - } - } - } - - return Success(APValue(ResultElements.data(), ResultElements.size()), E); + return Success(R, E); } case X86::BI__builtin_ia32_psrldqi128_byteshift: case X86::BI__builtin_ia32_psrldqi256_byteshift: case X86::BI__builtin_ia32_psrldqi512_byteshift: { - assert(E->getNumArgs() == 2); - - APValue Src; - APSInt Imm; - if (!EvaluateAsRValue(Info, E->getArg(0), Src) || - !EvaluateInteger(E->getArg(1), Imm, Info)) + APValue R; + if (!evalShuffleGeneric( + Info, E, R, + [](unsigned DstIdx, unsigned Shift) -> std::pair { + unsigned LaneBase = (DstIdx / 16) * 16; + unsigned LaneIdx = DstIdx % 16; + if (LaneIdx + Shift < 16) + return std::make_pair( + 0, static_cast(LaneBase + LaneIdx + Shift)); + + return std::make_pair(0, -1); + })) return false; + return Success(R, E); + } - unsigned VecLen = Src.getVectorLength(); - unsigned Shift = Imm.getZExtValue() & 0xff; - - SmallVector ResultElements; - for (unsigned Lane = 0; Lane != VecLen; Lane += 16) { - for (unsigned I = 0; I != 16; ++I) { - if (I + Shift < 16) { - ResultElements.push_back(Src.getVectorElt(Lane + I + Shift)); - } else { - APSInt Zero(8, /*isUnsigned=*/true); - Zero = 0; - ResultElements.push_back(APValue(Zero)); - } - } - } + case X86::BI__builtin_ia32_palignr128: + case X86::BI__builtin_ia32_palignr256: + case X86::BI__builtin_ia32_palignr512: { + APValue R; + if (!evalShuffleGeneric(Info, E, R, [](unsigned DstIdx, unsigned Shift) { + // Default to -1 → zero-fill this destination element + unsigned VecIdx = 1; + int ElemIdx = -1; + + int Lane = DstIdx / 16; + int Offset = DstIdx % 16; + + // Elements come from VecB first, then VecA after the shift boundary + unsigned ShiftedIdx = Offset + (Shift & 0xFF); + if (ShiftedIdx < 16) { // from VecB + ElemIdx = ShiftedIdx + (Lane * 16); + } else if (ShiftedIdx < 32) { // from VecA + VecIdx = 0; + ElemIdx = (ShiftedIdx - 16) + (Lane * 16); + } - return Success(APValue(ResultElements.data(), ResultElements.size()), E); + return std::pair{VecIdx, ElemIdx}; + })) + return false; + return Success(R, E); } case X86::BI__builtin_ia32_vpermi2varq128: case X86::BI__builtin_ia32_vpermi2varpd128: { diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp index 5eba5ba6c3df1..c1a36134d8942 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp @@ -438,6 +438,10 @@ class ScalarExprEmitter : public StmtVisitor { return cgf.emitVAArg(ve); } + mlir::Value VisitCXXRewrittenBinaryOperator(CXXRewrittenBinaryOperator *e) { + return Visit(e->getSemanticForm()); + } + mlir::Value VisitUnaryExprOrTypeTraitExpr(const UnaryExprOrTypeTraitExpr *e); mlir::Value VisitAbstractConditionalOperator(const AbstractConditionalOperator *e); diff --git a/clang/lib/CIR/Dialect/Transforms/CIRCanonicalize.cpp b/clang/lib/CIR/Dialect/Transforms/CIRCanonicalize.cpp index fbecab9774f5b..2ef09b74dc968 100644 --- a/clang/lib/CIR/Dialect/Transforms/CIRCanonicalize.cpp +++ b/clang/lib/CIR/Dialect/Transforms/CIRCanonicalize.cpp @@ -26,6 +26,11 @@ using namespace mlir; using namespace cir; +namespace mlir { +#define GEN_PASS_DEF_CIRCANONICALIZE +#include "clang/CIR/Dialect/Passes.h.inc" +} // namespace mlir + namespace { /// Removes branches between two blocks if it is the only branch. @@ -101,7 +106,8 @@ struct RemoveEmptySwitch : public OpRewritePattern { // CIRCanonicalizePass //===----------------------------------------------------------------------===// -struct CIRCanonicalizePass : public CIRCanonicalizeBase { +struct CIRCanonicalizePass + : public impl::CIRCanonicalizeBase { using CIRCanonicalizeBase::CIRCanonicalizeBase; // The same operation rewriting done here could have been performed diff --git a/clang/lib/CIR/Dialect/Transforms/CIRSimplify.cpp b/clang/lib/CIR/Dialect/Transforms/CIRSimplify.cpp index 3c6f76892d5cb..dcef9ddee1bb4 100644 --- a/clang/lib/CIR/Dialect/Transforms/CIRSimplify.cpp +++ b/clang/lib/CIR/Dialect/Transforms/CIRSimplify.cpp @@ -21,6 +21,11 @@ using namespace mlir; using namespace cir; +namespace mlir { +#define GEN_PASS_DEF_CIRSIMPLIFY +#include "clang/CIR/Dialect/Passes.h.inc" +} // namespace mlir + //===----------------------------------------------------------------------===// // Rewrite patterns //===----------------------------------------------------------------------===// @@ -283,7 +288,7 @@ struct SimplifyVecSplat : public OpRewritePattern { // CIRSimplifyPass //===----------------------------------------------------------------------===// -struct CIRSimplifyPass : public CIRSimplifyBase { +struct CIRSimplifyPass : public impl::CIRSimplifyBase { using CIRSimplifyBase::CIRSimplifyBase; void runOnOperation() override; diff --git a/clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp b/clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp index ca7554e4e3754..69a5334ca2423 100644 --- a/clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp +++ b/clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp @@ -26,6 +26,11 @@ using namespace mlir; using namespace cir; +namespace mlir { +#define GEN_PASS_DEF_CIRFLATTENCFG +#include "clang/CIR/Dialect/Passes.h.inc" +} // namespace mlir + namespace { /// Lowers operations with the terminator trait that have a single successor. @@ -50,7 +55,7 @@ void walkRegionSkipping( }); } -struct CIRFlattenCFGPass : public CIRFlattenCFGBase { +struct CIRFlattenCFGPass : public impl::CIRFlattenCFGBase { CIRFlattenCFGPass() = default; void runOnOperation() override; diff --git a/clang/lib/CIR/Dialect/Transforms/GotoSolver.cpp b/clang/lib/CIR/Dialect/Transforms/GotoSolver.cpp index c0db98440a902..00972b6976295 100644 --- a/clang/lib/CIR/Dialect/Transforms/GotoSolver.cpp +++ b/clang/lib/CIR/Dialect/Transforms/GotoSolver.cpp @@ -14,9 +14,14 @@ using namespace mlir; using namespace cir; +namespace mlir { +#define GEN_PASS_DEF_GOTOSOLVER +#include "clang/CIR/Dialect/Passes.h.inc" +} // namespace mlir + namespace { -struct GotoSolverPass : public GotoSolverBase { +struct GotoSolverPass : public impl::GotoSolverBase { GotoSolverPass() = default; void runOnOperation() override; }; diff --git a/clang/lib/CIR/Dialect/Transforms/HoistAllocas.cpp b/clang/lib/CIR/Dialect/Transforms/HoistAllocas.cpp index 72bbf08c79b16..74b22faadc8ae 100644 --- a/clang/lib/CIR/Dialect/Transforms/HoistAllocas.cpp +++ b/clang/lib/CIR/Dialect/Transforms/HoistAllocas.cpp @@ -20,9 +20,14 @@ using namespace mlir; using namespace cir; +namespace mlir { +#define GEN_PASS_DEF_HOISTALLOCAS +#include "clang/CIR/Dialect/Passes.h.inc" +} // namespace mlir + namespace { -struct HoistAllocasPass : public HoistAllocasBase { +struct HoistAllocasPass : public impl::HoistAllocasBase { HoistAllocasPass() = default; void runOnOperation() override; diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp index cba04649ca05e..29b1211d2c351 100644 --- a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp +++ b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp @@ -23,6 +23,11 @@ using namespace mlir; using namespace cir; +namespace mlir { +#define GEN_PASS_DEF_LOWERINGPREPARE +#include "clang/CIR/Dialect/Passes.h.inc" +} // namespace mlir + static SmallString<128> getTransformedFileName(mlir::ModuleOp mlirModule) { SmallString<128> fileName; @@ -53,7 +58,8 @@ static cir::FuncOp getCalledFunction(cir::CallOp callOp) { } namespace { -struct LoweringPreparePass : public LoweringPrepareBase { +struct LoweringPreparePass + : public impl::LoweringPrepareBase { LoweringPreparePass() = default; void runOnOperation() override; diff --git a/clang/lib/CIR/Dialect/Transforms/PassDetail.h b/clang/lib/CIR/Dialect/Transforms/PassDetail.h index 600dde56d679f..ef42a85cc2751 100644 --- a/clang/lib/CIR/Dialect/Transforms/PassDetail.h +++ b/clang/lib/CIR/Dialect/Transforms/PassDetail.h @@ -21,7 +21,7 @@ namespace mlir { template void registerDialect(DialectRegistry ®istry); -#define GEN_PASS_CLASSES +#define GEN_PASS_DECL #include "clang/CIR/Dialect/Passes.h.inc" } // namespace mlir diff --git a/clang/lib/CMakeLists.txt b/clang/lib/CMakeLists.txt index 4f2218b583e41..e90b009da606a 100644 --- a/clang/lib/CMakeLists.txt +++ b/clang/lib/CMakeLists.txt @@ -13,6 +13,7 @@ add_subdirectory(Edit) add_subdirectory(ExtractAPI) add_subdirectory(Rewrite) add_subdirectory(Driver) +add_subdirectory(Options) add_subdirectory(Serialization) add_subdirectory(Frontend) add_subdirectory(FrontendTool) diff --git a/clang/lib/Driver/CMakeLists.txt b/clang/lib/Driver/CMakeLists.txt index 950709773e637..b68e26f4d3847 100644 --- a/clang/lib/Driver/CMakeLists.txt +++ b/clang/lib/Driver/CMakeLists.txt @@ -20,12 +20,10 @@ add_clang_library(clangDriver Compilation.cpp Distro.cpp Driver.cpp - DriverOptions.cpp Job.cpp Multilib.cpp MultilibBuilder.cpp OffloadBundler.cpp - OptionUtils.cpp Phases.cpp SanitizerArgs.cpp Tool.cpp @@ -100,6 +98,7 @@ add_clang_library(clangDriver LINK_LIBS clangBasic clangLex + clangOptions ${system_libs} ${LLVM_PTHREAD_LIB} ) diff --git a/clang/lib/Driver/Compilation.cpp b/clang/lib/Driver/Compilation.cpp index 665d81f99ba45..4a75ae2c74c51 100644 --- a/clang/lib/Driver/Compilation.cpp +++ b/clang/lib/Driver/Compilation.cpp @@ -11,9 +11,9 @@ #include "clang/Driver/Action.h" #include "clang/Driver/Driver.h" #include "clang/Driver/Job.h" -#include "clang/Driver/Options.h" #include "clang/Driver/ToolChain.h" #include "clang/Driver/Util.h" +#include "clang/Options/Options.h" #include "llvm/Option/ArgList.h" #include "llvm/Option/OptSpecifier.h" #include "llvm/Option/Option.h" diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 89c4b7363f21c..0350a98d2840e 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -60,15 +60,15 @@ #include "clang/Driver/Compilation.h" #include "clang/Driver/InputInfo.h" #include "clang/Driver/Job.h" -#include "clang/Driver/Options.h" -#include "clang/Driver/OptionUtils.h" #include "clang/Driver/Phases.h" #include "clang/Driver/SanitizerArgs.h" #include "clang/Driver/Tool.h" -#include "clang/Driver/Types.h" #include "clang/Driver/ToolChain.h" +#include "clang/Driver/Types.h" #include "clang/Driver/Util.h" #include "clang/Lex/DependencyDirectivesScanner.h" +#include "clang/Options/OptionUtils.h" +#include "clang/Options/Options.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" diff --git a/clang/lib/Driver/SanitizerArgs.cpp b/clang/lib/Driver/SanitizerArgs.cpp index 5dd48f53b9069..420c4cddbc8dd 100644 --- a/clang/lib/Driver/SanitizerArgs.cpp +++ b/clang/lib/Driver/SanitizerArgs.cpp @@ -8,8 +8,8 @@ #include "clang/Driver/SanitizerArgs.h" #include "clang/Basic/Sanitizers.h" #include "clang/Driver/Driver.h" -#include "clang/Driver/Options.h" #include "clang/Driver/ToolChain.h" +#include "clang/Options/Options.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 3452ce5339174..5ee53c01d6e30 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -21,9 +21,9 @@ #include "clang/Driver/Driver.h" #include "clang/Driver/InputInfo.h" #include "clang/Driver/Job.h" -#include "clang/Driver/Options.h" #include "clang/Driver/SanitizerArgs.h" #include "clang/Driver/XRayArgs.h" +#include "clang/Options/Options.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" @@ -338,7 +338,7 @@ static void getRISCVMultilibFlags(const Driver &D, const llvm::Triple &Triple, Multilib::flags_list ToolChain::getMultilibFlags(const llvm::opt::ArgList &Args) const { - using namespace clang::driver::options; + using namespace clang::options; std::vector Result; const llvm::Triple Triple(ComputeEffectiveClangTriple(Args)); @@ -1816,7 +1816,7 @@ void ToolChain::TranslateXarchArgs( unsigned Index = BaseArgs.MakeIndex(A->getValue(ValuePos)); unsigned Prev = Index; std::unique_ptr XarchArg(Opts.ParseOneArg( - Args, Index, llvm::opt::Visibility(clang::driver::options::ClangOption))); + Args, Index, llvm::opt::Visibility(options::ClangOption))); // If the argument parsing failed or more than one argument was // consumed, the -Xarch_ argument's parameter tried to consume diff --git a/clang/lib/Driver/ToolChains/AIX.cpp b/clang/lib/Driver/ToolChains/AIX.cpp index ffd7b69205440..21e7dcad3cdd0 100644 --- a/clang/lib/Driver/ToolChains/AIX.cpp +++ b/clang/lib/Driver/ToolChains/AIX.cpp @@ -9,8 +9,8 @@ #include "AIX.h" #include "clang/Driver/CommonArgs.h" #include "clang/Driver/Compilation.h" -#include "clang/Driver/Options.h" #include "clang/Driver/SanitizerArgs.h" +#include "clang/Options/Options.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Option/ArgList.h" #include "llvm/ProfileData/InstrProf.h" @@ -19,6 +19,7 @@ #include using AIX = clang::driver::toolchains::AIX; +using namespace clang; using namespace clang::driver; using namespace clang::driver::tools; using namespace clang::driver::toolchains; @@ -167,8 +168,7 @@ void aix::Linker::ConstructJob(Compilation &C, const JobAction &JA, Args.hasArg(options::OPT_coverage)) CmdArgs.push_back("-bdbg:namedsects:ss"); - if (Arg *A = - Args.getLastArg(clang::driver::options::OPT_mxcoff_build_id_EQ)) { + if (Arg *A = Args.getLastArg(options::OPT_mxcoff_build_id_EQ)) { StringRef BuildId = A->getValue(); if (BuildId[0] != '0' || BuildId[1] != 'x' || BuildId.find_if_not(llvm::isHexDigit, 2) != StringRef::npos) diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index edfc62f0ad7d5..3b5411179349b 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -12,8 +12,8 @@ #include "clang/Driver/CommonArgs.h" #include "clang/Driver/Compilation.h" #include "clang/Driver/InputInfo.h" -#include "clang/Driver/Options.h" #include "clang/Driver/SanitizerArgs.h" +#include "clang/Options/Options.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Option/ArgList.h" #include "llvm/Support/Error.h" @@ -324,27 +324,24 @@ RocmInstallationDetector::RocmInstallationDetector( const llvm::opt::ArgList &Args, bool DetectHIPRuntime, bool DetectDeviceLib) : D(D) { Verbose = Args.hasArg(options::OPT_v); - RocmPathArg = Args.getLastArgValue(clang::driver::options::OPT_rocm_path_EQ); - PrintROCmSearchDirs = - Args.hasArg(clang::driver::options::OPT_print_rocm_search_dirs); + RocmPathArg = Args.getLastArgValue(options::OPT_rocm_path_EQ); + PrintROCmSearchDirs = Args.hasArg(options::OPT_print_rocm_search_dirs); RocmDeviceLibPathArg = - Args.getAllArgValues(clang::driver::options::OPT_rocm_device_lib_path_EQ); - HIPPathArg = Args.getLastArgValue(clang::driver::options::OPT_hip_path_EQ); - HIPStdParPathArg = - Args.getLastArgValue(clang::driver::options::OPT_hipstdpar_path_EQ); + Args.getAllArgValues(options::OPT_rocm_device_lib_path_EQ); + HIPPathArg = Args.getLastArgValue(options::OPT_hip_path_EQ); + HIPStdParPathArg = Args.getLastArgValue(options::OPT_hipstdpar_path_EQ); HasHIPStdParLibrary = !HIPStdParPathArg.empty() && D.getVFS().exists(HIPStdParPathArg + "/hipstdpar_lib.hpp"); HIPRocThrustPathArg = - Args.getLastArgValue(clang::driver::options::OPT_hipstdpar_thrust_path_EQ); + Args.getLastArgValue(options::OPT_hipstdpar_thrust_path_EQ); HasRocThrustLibrary = !HIPRocThrustPathArg.empty() && D.getVFS().exists(HIPRocThrustPathArg + "/thrust"); - HIPRocPrimPathArg = - Args.getLastArgValue(clang::driver::options::OPT_hipstdpar_prim_path_EQ); + HIPRocPrimPathArg = Args.getLastArgValue(options::OPT_hipstdpar_prim_path_EQ); HasRocPrimLibrary = !HIPRocPrimPathArg.empty() && D.getVFS().exists(HIPRocPrimPathArg + "/rocprim"); - if (auto *A = Args.getLastArg(clang::driver::options::OPT_hip_version_EQ)) { + if (auto *A = Args.getLastArg(options::OPT_hip_version_EQ)) { HIPVersionArg = A->getValue(); unsigned Major = ~0U; unsigned Minor = ~0U; diff --git a/clang/lib/Driver/ToolChains/AMDGPU.h b/clang/lib/Driver/ToolChains/AMDGPU.h index 7185b24aec0f8..c8601d1dedbcb 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.h +++ b/clang/lib/Driver/ToolChains/AMDGPU.h @@ -11,9 +11,9 @@ #include "Gnu.h" #include "clang/Basic/TargetID.h" -#include "clang/Driver/Options.h" #include "clang/Driver/Tool.h" #include "clang/Driver/ToolChain.h" +#include "clang/Options/Options.h" #include "llvm/ADT/SmallString.h" #include "llvm/TargetParser/TargetParser.h" diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp index e73b4f02cac39..a96d40166fdb7 100644 --- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp @@ -11,9 +11,9 @@ #include "clang/Driver/CommonArgs.h" #include "clang/Driver/Compilation.h" #include "clang/Driver/Driver.h" -#include "clang/Driver/Options.h" #include "clang/Driver/SanitizerArgs.h" #include "clang/Driver/Tool.h" +#include "clang/Options/Options.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/Path.h" diff --git a/clang/lib/Driver/ToolChains/AVR.cpp b/clang/lib/Driver/ToolChains/AVR.cpp index 731076d9754a9..588255dc5a0cd 100644 --- a/clang/lib/Driver/ToolChains/AVR.cpp +++ b/clang/lib/Driver/ToolChains/AVR.cpp @@ -10,7 +10,7 @@ #include "clang/Driver/CommonArgs.h" #include "clang/Driver/Compilation.h" #include "clang/Driver/InputInfo.h" -#include "clang/Driver/Options.h" +#include "clang/Options/Options.h" #include "llvm/Option/ArgList.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" diff --git a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp index e8d5e38a9064f..d6fb2a57539ed 100644 --- a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp +++ b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp @@ -9,7 +9,7 @@ #include "AArch64.h" #include "clang/Driver/CommonArgs.h" #include "clang/Driver/Driver.h" -#include "clang/Driver/Options.h" +#include "clang/Options/Options.h" #include "llvm/Option/ArgList.h" #include "llvm/TargetParser/AArch64TargetParser.h" #include "llvm/TargetParser/Host.h" @@ -222,7 +222,7 @@ void aarch64::getAArch64TargetFeatures(const Driver &D, // Default to 'A' profile if the architecture is not specified. success = getAArch64ArchFeaturesFromMarch(D, "armv8-a", Args, Extensions); - if (success && (A = Args.getLastArg(clang::driver::options::OPT_mtune_EQ))) + if (success && (A = Args.getLastArg(options::OPT_mtune_EQ))) success = getAArch64MicroArchFeaturesFromMtune(D, A->getValue(), Args, Features); else if (success && (A = Args.getLastArg(options::OPT_mcpu_EQ))) diff --git a/clang/lib/Driver/ToolChains/Arch/ARM.cpp b/clang/lib/Driver/ToolChains/Arch/ARM.cpp index 61beb0455147d..55eb2dcf7ddf4 100644 --- a/clang/lib/Driver/ToolChains/Arch/ARM.cpp +++ b/clang/lib/Driver/ToolChains/Arch/ARM.cpp @@ -8,7 +8,7 @@ #include "ARM.h" #include "clang/Driver/Driver.h" -#include "clang/Driver/Options.h" +#include "clang/Options/Options.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Option/ArgList.h" #include "llvm/TargetParser/ARMTargetParser.h" @@ -74,7 +74,7 @@ bool arm::isARMEABIBareMetal(const llvm::Triple &Triple) { // Get Arch/CPU from args. void arm::getARMArchCPUFromArgs(const ArgList &Args, llvm::StringRef &Arch, llvm::StringRef &CPU, bool FromAs) { - if (const Arg *A = Args.getLastArg(clang::driver::options::OPT_mcpu_EQ)) + if (const Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) CPU = A->getValue(); if (const Arg *A = Args.getLastArg(options::OPT_march_EQ)) Arch = A->getValue(); diff --git a/clang/lib/Driver/ToolChains/Arch/CSKY.cpp b/clang/lib/Driver/ToolChains/Arch/CSKY.cpp index 2fd2c72147f5b..65f6534e4d038 100644 --- a/clang/lib/Driver/ToolChains/Arch/CSKY.cpp +++ b/clang/lib/Driver/ToolChains/Arch/CSKY.cpp @@ -8,7 +8,7 @@ #include "CSKY.h" #include "clang/Driver/Driver.h" -#include "clang/Driver/Options.h" +#include "clang/Options/Options.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Option/ArgList.h" #include "llvm/TargetParser/CSKYTargetParser.h" @@ -33,7 +33,7 @@ csky::getCSKYArchName(const Driver &D, const ArgList &Args, return std::optional(A->getValue()); } - if (const Arg *A = Args.getLastArg(clang::driver::options::OPT_mcpu_EQ)) { + if (const Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) { llvm::CSKY::ArchKind ArchKind = llvm::CSKY::parseCPUArch(A->getValue()); if (ArchKind == llvm::CSKY::ArchKind::INVALID) { D.Diag(clang::diag::err_drv_clang_unsupported) << A->getAsString(Args); @@ -126,7 +126,7 @@ void csky::getCSKYTargetFeatures(const Driver &D, const llvm::Triple &Triple, archName = A->getValue(); } - if (const Arg *A = Args.getLastArg(clang::driver::options::OPT_mcpu_EQ)) { + if (const Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) { llvm::CSKY::ArchKind Kind = llvm::CSKY::parseCPUArch(A->getValue()); if (Kind == llvm::CSKY::ArchKind::INVALID) { D.Diag(clang::diag::err_drv_clang_unsupported) << A->getAsString(Args); diff --git a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp index 156ea03045569..da084bdabaee3 100644 --- a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp +++ b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp @@ -11,7 +11,7 @@ #include "clang/Basic/DiagnosticDriver.h" #include "clang/Driver/CommonArgs.h" #include "clang/Driver/Driver.h" -#include "clang/Driver/Options.h" +#include "clang/Options/Options.h" #include "llvm/TargetParser/Host.h" #include "llvm/TargetParser/LoongArchTargetParser.h" @@ -130,8 +130,7 @@ void loongarch::getLoongArchTargetFeatures(const Driver &D, const ArgList &Args, std::vector &Features) { // Enable the `lsx` feature on 64-bit LoongArch by default. - if (Triple.isLoongArch64() && - (!Args.hasArgNoClaim(clang::driver::options::OPT_march_EQ))) + if (Triple.isLoongArch64() && (!Args.hasArgNoClaim(options::OPT_march_EQ))) Features.push_back("+lsx"); // -mrelax is default, unless -mno-relax is specified. diff --git a/clang/lib/Driver/ToolChains/Arch/M68k.cpp b/clang/lib/Driver/ToolChains/Arch/M68k.cpp index 708ec84a37cfb..a620597f10475 100644 --- a/clang/lib/Driver/ToolChains/Arch/M68k.cpp +++ b/clang/lib/Driver/ToolChains/Arch/M68k.cpp @@ -8,7 +8,7 @@ #include "M68k.h" #include "clang/Driver/Driver.h" -#include "clang/Driver/Options.h" +#include "clang/Options/Options.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Option/ArgList.h" #include "llvm/Support/Regex.h" @@ -21,7 +21,7 @@ using namespace llvm::opt; /// getM68kTargetCPU - Get the (LLVM) name of the 68000 cpu we are targeting. std::string m68k::getM68kTargetCPU(const ArgList &Args) { - if (Arg *A = Args.getLastArg(clang::driver::options::OPT_mcpu_EQ)) { + if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) { // The canonical CPU name is captalize. However, we allow // starting with lower case or numbers only StringRef CPUName = A->getValue(); @@ -45,17 +45,17 @@ std::string m68k::getM68kTargetCPU(const ArgList &Args) { .Default(CPUName.str()); } // FIXME: Throw error when multiple sub-architecture flag exist - if (Args.hasArg(clang::driver::options::OPT_m68000)) + if (Args.hasArg(options::OPT_m68000)) return "M68000"; - if (Args.hasArg(clang::driver::options::OPT_m68010)) + if (Args.hasArg(options::OPT_m68010)) return "M68010"; - if (Args.hasArg(clang::driver::options::OPT_m68020)) + if (Args.hasArg(options::OPT_m68020)) return "M68020"; - if (Args.hasArg(clang::driver::options::OPT_m68030)) + if (Args.hasArg(options::OPT_m68030)) return "M68030"; - if (Args.hasArg(clang::driver::options::OPT_m68040)) + if (Args.hasArg(options::OPT_m68040)) return "M68040"; - if (Args.hasArg(clang::driver::options::OPT_m68060)) + if (Args.hasArg(options::OPT_m68060)) return "M68060"; return ""; diff --git a/clang/lib/Driver/ToolChains/Arch/Mips.cpp b/clang/lib/Driver/ToolChains/Arch/Mips.cpp index 8d7b85dbeed99..103aae7018fbf 100644 --- a/clang/lib/Driver/ToolChains/Arch/Mips.cpp +++ b/clang/lib/Driver/ToolChains/Arch/Mips.cpp @@ -9,7 +9,7 @@ #include "Mips.h" #include "clang/Driver/CommonArgs.h" #include "clang/Driver/Driver.h" -#include "clang/Driver/Options.h" +#include "clang/Options/Options.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Option/ArgList.h" @@ -49,8 +49,7 @@ void mips::getMipsCPUAndABI(const ArgList &Args, const llvm::Triple &Triple, DefMips64CPU = "mips3"; } - if (Arg *A = Args.getLastArg(clang::driver::options::OPT_march_EQ, - options::OPT_mcpu_EQ)) + if (Arg *A = Args.getLastArg(options::OPT_march_EQ, options::OPT_mcpu_EQ)) CPUName = A->getValue(); if (Arg *A = Args.getLastArg(options::OPT_mabi_EQ)) { diff --git a/clang/lib/Driver/ToolChains/Arch/PPC.cpp b/clang/lib/Driver/ToolChains/Arch/PPC.cpp index 361a68a892a8f..44afdd249fea5 100644 --- a/clang/lib/Driver/ToolChains/Arch/PPC.cpp +++ b/clang/lib/Driver/ToolChains/Arch/PPC.cpp @@ -9,7 +9,7 @@ #include "PPC.h" #include "clang/Driver/CommonArgs.h" #include "clang/Driver/Driver.h" -#include "clang/Driver/Options.h" +#include "clang/Options/Options.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Option/ArgList.h" #include "llvm/TargetParser/Host.h" diff --git a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp index f2e79e71f93d4..1dcce6d053a39 100644 --- a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp +++ b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp @@ -10,7 +10,7 @@ #include "../Clang.h" #include "clang/Driver/CommonArgs.h" #include "clang/Driver/Driver.h" -#include "clang/Driver/Options.h" +#include "clang/Options/Options.h" #include "llvm/Option/ArgList.h" #include "llvm/Support/Error.h" #include "llvm/TargetParser/Host.h" diff --git a/clang/lib/Driver/ToolChains/Arch/Sparc.cpp b/clang/lib/Driver/ToolChains/Arch/Sparc.cpp index 94a94f1e9c487..49256d80cbdf6 100644 --- a/clang/lib/Driver/ToolChains/Arch/Sparc.cpp +++ b/clang/lib/Driver/ToolChains/Arch/Sparc.cpp @@ -8,7 +8,7 @@ #include "Sparc.h" #include "clang/Driver/Driver.h" -#include "clang/Driver/Options.h" +#include "clang/Options/Options.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Option/ArgList.h" #include "llvm/TargetParser/Host.h" @@ -122,7 +122,7 @@ sparc::FloatABI sparc::getSparcFloatABI(const Driver &D, std::string sparc::getSparcTargetCPU(const Driver &D, const ArgList &Args, const llvm::Triple &Triple) { - if (const Arg *A = Args.getLastArg(clang::driver::options::OPT_mcpu_EQ)) { + if (const Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) { StringRef CPUName = A->getValue(); if (CPUName == "native") { std::string CPU = std::string(llvm::sys::getHostCPUName()); diff --git a/clang/lib/Driver/ToolChains/Arch/SystemZ.cpp b/clang/lib/Driver/ToolChains/Arch/SystemZ.cpp index 75b6afd925245..1ef6a725483e8 100644 --- a/clang/lib/Driver/ToolChains/Arch/SystemZ.cpp +++ b/clang/lib/Driver/ToolChains/Arch/SystemZ.cpp @@ -8,7 +8,7 @@ #include "SystemZ.h" #include "clang/Config/config.h" -#include "clang/Driver/Options.h" +#include "clang/Options/Options.h" #include "llvm/Option/ArgList.h" #include "llvm/TargetParser/Host.h" @@ -25,9 +25,9 @@ systemz::FloatABI systemz::getSystemZFloatABI(const Driver &D, D.Diag(diag::err_drv_unsupported_opt) << Args.getLastArg(options::OPT_mfloat_abi_EQ)->getAsString(Args); - if (Arg *A = Args.getLastArg(clang::driver::options::OPT_msoft_float, - options::OPT_mhard_float)) - if (A->getOption().matches(clang::driver::options::OPT_msoft_float)) + if (Arg *A = + Args.getLastArg(options::OPT_msoft_float, options::OPT_mhard_float)) + if (A->getOption().matches(options::OPT_msoft_float)) ABI = systemz::FloatABI::Soft; return ABI; @@ -35,7 +35,7 @@ systemz::FloatABI systemz::getSystemZFloatABI(const Driver &D, std::string systemz::getSystemZTargetCPU(const ArgList &Args, const llvm::Triple &T) { - if (const Arg *A = Args.getLastArg(clang::driver::options::OPT_march_EQ)) { + if (const Arg *A = Args.getLastArg(options::OPT_march_EQ)) { llvm::StringRef CPUName = A->getValue(); if (CPUName == "native") { diff --git a/clang/lib/Driver/ToolChains/Arch/VE.cpp b/clang/lib/Driver/ToolChains/Arch/VE.cpp index adc0873586588..c8353d7dc5f3a 100644 --- a/clang/lib/Driver/ToolChains/Arch/VE.cpp +++ b/clang/lib/Driver/ToolChains/Arch/VE.cpp @@ -8,7 +8,7 @@ #include "VE.h" #include "clang/Driver/Driver.h" -#include "clang/Driver/Options.h" +#include "clang/Options/Options.h" #include "llvm/Option/ArgList.h" using namespace clang::driver; diff --git a/clang/lib/Driver/ToolChains/Arch/X86.cpp b/clang/lib/Driver/ToolChains/Arch/X86.cpp index 1373905a5120e..092069b6ade56 100644 --- a/clang/lib/Driver/ToolChains/Arch/X86.cpp +++ b/clang/lib/Driver/ToolChains/Arch/X86.cpp @@ -8,7 +8,7 @@ #include "X86.h" #include "clang/Driver/Driver.h" -#include "clang/Driver/Options.h" +#include "clang/Options/Options.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/Option/ArgList.h" @@ -21,7 +21,7 @@ using namespace llvm::opt; std::string x86::getX86TargetCPU(const Driver &D, const ArgList &Args, const llvm::Triple &Triple) { - if (const Arg *A = Args.getLastArg(clang::driver::options::OPT_march_EQ)) { + if (const Arg *A = Args.getLastArg(options::OPT_march_EQ)) { StringRef CPU = A->getValue(); if (CPU != "native") return std::string(CPU); @@ -119,7 +119,7 @@ void x86::getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple, std::vector &Features) { // Claim and report unsupported -mabi=. Note: we don't support "sysv_abi" or // "ms_abi" as default function attributes. - if (const Arg *A = Args.getLastArg(clang::driver::options::OPT_mabi_EQ)) { + if (const Arg *A = Args.getLastArg(options::OPT_mabi_EQ)) { StringRef DefaultAbi = (Triple.isOSWindows() || Triple.isUEFI()) ? "ms" : "sysv"; if (A->getValue() != DefaultAbi) @@ -128,7 +128,7 @@ void x86::getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple, } // If -march=native, autodetect the feature list. - if (const Arg *A = Args.getLastArg(clang::driver::options::OPT_march_EQ)) { + if (const Arg *A = Args.getLastArg(options::OPT_march_EQ)) { if (StringRef(A->getValue()) == "native") { for (auto &F : llvm::sys::getHostCPUFeatures()) Features.push_back( @@ -163,7 +163,7 @@ void x86::getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple, // flags). This is a bit hacky but keeps existing usages working. We should // consider deprecating this and instead warn if the user requests external // retpoline thunks and *doesn't* request some form of retpolines. - auto SpectreOpt = clang::driver::options::ID::OPT_INVALID; + auto SpectreOpt = options::ID::OPT_INVALID; if (Args.hasArgNoClaim(options::OPT_mretpoline, options::OPT_mno_retpoline, options::OPT_mspeculative_load_hardening, options::OPT_mno_speculative_load_hardening)) { @@ -189,7 +189,7 @@ void x86::getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple, SpectreOpt = options::OPT_mretpoline_external_thunk; } - auto LVIOpt = clang::driver::options::ID::OPT_INVALID; + auto LVIOpt = options::ID::OPT_INVALID; if (Args.hasFlag(options::OPT_mlvi_hardening, options::OPT_mno_lvi_hardening, false)) { Features.push_back("+lvi-load-hardening"); @@ -207,7 +207,7 @@ void x86::getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple, << D.getOpts().getOptionName(options::OPT_mlvi_hardening) << D.getOpts().getOptionName(options::OPT_m_seses); - if (SpectreOpt != clang::driver::options::ID::OPT_INVALID) + if (SpectreOpt != options::ID::OPT_INVALID) D.Diag(diag::err_drv_argument_not_allowed_with) << D.getOpts().getOptionName(SpectreOpt) << D.getOpts().getOptionName(options::OPT_m_seses); @@ -219,8 +219,8 @@ void x86::getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple, } } - if (SpectreOpt != clang::driver::options::ID::OPT_INVALID && - LVIOpt != clang::driver::options::ID::OPT_INVALID) { + if (SpectreOpt != options::ID::OPT_INVALID && + LVIOpt != options::ID::OPT_INVALID) { D.Diag(diag::err_drv_argument_not_allowed_with) << D.getOpts().getOptionName(SpectreOpt) << D.getOpts().getOptionName(LVIOpt); diff --git a/clang/lib/Driver/ToolChains/BareMetal.cpp b/clang/lib/Driver/ToolChains/BareMetal.cpp index 9b7f58c392885..8d598be9ffb0a 100644 --- a/clang/lib/Driver/ToolChains/BareMetal.cpp +++ b/clang/lib/Driver/ToolChains/BareMetal.cpp @@ -18,7 +18,7 @@ #include "clang/Driver/Compilation.h" #include "clang/Driver/Driver.h" #include "clang/Driver/MultilibBuilder.h" -#include "clang/Driver/Options.h" +#include "clang/Options/Options.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Option/ArgList.h" #include "llvm/Support/Path.h" @@ -135,7 +135,7 @@ static std::string computeClangRuntimesSysRoot(const Driver &D, bool BareMetal::initGCCInstallation(const llvm::Triple &Triple, const llvm::opt::ArgList &Args) { if (Args.getLastArg(options::OPT_gcc_toolchain) || - Args.getLastArg(clang::driver::options::OPT_gcc_install_dir_EQ)) { + Args.getLastArg(clang::options::OPT_gcc_install_dir_EQ)) { GCCInstallation.init(Triple, Args); return GCCInstallation.isValid(); } diff --git a/clang/lib/Driver/ToolChains/CSKYToolChain.cpp b/clang/lib/Driver/ToolChains/CSKYToolChain.cpp index e4db3307ee3aa..c561d7d38da5b 100644 --- a/clang/lib/Driver/ToolChains/CSKYToolChain.cpp +++ b/clang/lib/Driver/ToolChains/CSKYToolChain.cpp @@ -10,7 +10,7 @@ #include "clang/Driver/CommonArgs.h" #include "clang/Driver/Compilation.h" #include "clang/Driver/InputInfo.h" -#include "clang/Driver/Options.h" +#include "clang/Options/Options.h" #include "llvm/Option/ArgList.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index c9b7937ce267f..f1389ed42cd7a 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -30,10 +30,10 @@ #include "clang/Driver/CommonArgs.h" #include "clang/Driver/Distro.h" #include "clang/Driver/InputInfo.h" -#include "clang/Driver/Options.h" #include "clang/Driver/SanitizerArgs.h" #include "clang/Driver/Types.h" #include "clang/Driver/XRayArgs.h" +#include "clang/Options/Options.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringExtras.h" @@ -66,7 +66,7 @@ using namespace clang; using namespace llvm::opt; static void CheckPreprocessingOptions(const Driver &D, const ArgList &Args) { - if (Arg *A = Args.getLastArg(clang::driver::options::OPT_C, options::OPT_CC, + if (Arg *A = Args.getLastArg(options::OPT_C, options::OPT_CC, options::OPT_fminimize_whitespace, options::OPT_fno_minimize_whitespace, options::OPT_fkeep_system_includes, @@ -1700,7 +1700,7 @@ void Clang::AddAArch64TargetArgs(const ArgList &Args, AddAAPCSVolatileBitfieldArgs(Args, CmdArgs); - if (const Arg *A = Args.getLastArg(clang::driver::options::OPT_mtune_EQ)) { + if (const Arg *A = Args.getLastArg(options::OPT_mtune_EQ)) { CmdArgs.push_back("-tune-cpu"); if (strcmp(A->getValue(), "native") == 0) CmdArgs.push_back(Args.MakeArgString(llvm::sys::getHostCPUName())); @@ -2106,7 +2106,7 @@ void Clang::AddSparcTargetArgs(const ArgList &Args, CmdArgs.push_back("hard"); } - if (const Arg *A = Args.getLastArg(clang::driver::options::OPT_mtune_EQ)) { + if (const Arg *A = Args.getLastArg(options::OPT_mtune_EQ)) { StringRef Name = A->getValue(); std::string TuneCPU; if (Name == "native") @@ -2212,12 +2212,11 @@ void Clang::AddX86TargetArgs(const ArgList &Args, // Default to "generic" unless -march is present or targetting the PS4/PS5. std::string TuneCPU; - if (!Args.hasArg(clang::driver::options::OPT_march_EQ) && - !getToolChain().getTriple().isPS()) + if (!Args.hasArg(options::OPT_march_EQ) && !getToolChain().getTriple().isPS()) TuneCPU = "generic"; // Override based on -mtune. - if (const Arg *A = Args.getLastArg(clang::driver::options::OPT_mtune_EQ)) { + if (const Arg *A = Args.getLastArg(options::OPT_mtune_EQ)) { StringRef Name = A->getValue(); if (Name == "native") { diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 4b1520d2365f9..2b279387d2c14 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -31,11 +31,11 @@ #include "clang/Driver/Driver.h" #include "clang/Driver/InputInfo.h" #include "clang/Driver/Job.h" -#include "clang/Driver/Options.h" #include "clang/Driver/SanitizerArgs.h" #include "clang/Driver/ToolChain.h" #include "clang/Driver/Util.h" #include "clang/Driver/XRayArgs.h" +#include "clang/Options/Options.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallString.h" @@ -88,8 +88,7 @@ static bool addRPathCmdArg(const llvm::opt::ArgList &Args, static bool useFramePointerForTargetByDefault(const llvm::opt::ArgList &Args, const llvm::Triple &Triple) { - if (Args.hasArg(clang::driver::options::OPT_pg) && - !Args.hasArg(clang::driver::options::OPT_mfentry)) + if (Args.hasArg(options::OPT_pg) && !Args.hasArg(options::OPT_mfentry)) return true; if (Triple.isAndroid()) @@ -268,17 +267,16 @@ getFramePointerKind(const llvm::opt::ArgList &Args, // without requiring new frame records to be created. bool DefaultFP = useFramePointerForTargetByDefault(Args, Triple); - bool EnableFP = - mustUseNonLeafFramePointerForTarget(Triple) || - Args.hasFlag(clang::driver::options::OPT_fno_omit_frame_pointer, - clang::driver::options::OPT_fomit_frame_pointer, DefaultFP); + bool EnableFP = mustUseNonLeafFramePointerForTarget(Triple) || + Args.hasFlag(options::OPT_fno_omit_frame_pointer, + options::OPT_fomit_frame_pointer, DefaultFP); bool DefaultLeafFP = useLeafFramePointerForTargetByDefault(Triple) || (EnableFP && framePointerImpliesLeafFramePointer(Args, Triple)); - bool EnableLeafFP = Args.hasFlag( - clang::driver::options::OPT_mno_omit_leaf_frame_pointer, - clang::driver::options::OPT_momit_leaf_frame_pointer, DefaultLeafFP); + bool EnableLeafFP = + Args.hasFlag(options::OPT_mno_omit_leaf_frame_pointer, + options::OPT_momit_leaf_frame_pointer, DefaultLeafFP); bool FPRegReserved = EnableFP || mustMaintainValidFrameChain(Args, Triple); @@ -777,7 +775,7 @@ std::string tools::getCPUName(const Driver &D, const ArgList &Args, case llvm::Triple::ppcle: case llvm::Triple::ppc64: case llvm::Triple::ppc64le: - if (Arg *A = Args.getLastArg(clang::driver::options::OPT_mcpu_EQ)) + if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) return std::string( llvm::PPC::getNormalizedPPCTargetCPU(T, A->getValue())); return std::string(llvm::PPC::getNormalizedPPCTargetCPU(T)); @@ -1440,7 +1438,7 @@ void tools::addOpenMPRuntimeSpecificRPath(const ToolChain &TC, addRPathCmdArg(Args, CmdArgs, CandidateRPath_lib); std::string rocmPath = - Args.getLastArgValue(clang::driver::options::OPT_rocm_path_EQ).str(); + Args.getLastArgValue(clang::options::OPT_rocm_path_EQ).str(); if (rocmPath.size() != 0) { std::string rocmPath_lib = rocmPath + "/lib"; std::string rocmPath_suf = rocmPath + "/" + LibSuffix; @@ -1869,7 +1867,7 @@ bool tools::addSanitizerRuntimes(const ToolChain &TC, const ArgList &Args, if (SanArgs.needsFuzzerInterceptors()) addSanitizerRuntime(TC, Args, CmdArgs, "fuzzer_interceptors", false, true); - if (!Args.hasArg(clang::driver::options::OPT_nostdlibxx)) { + if (!Args.hasArg(options::OPT_nostdlibxx)) { bool OnlyLibstdcxxStatic = Args.hasArg(options::OPT_static_libstdcxx) && !Args.hasArg(options::OPT_static); if (OnlyLibstdcxxStatic) @@ -3574,7 +3572,7 @@ void tools::handleInterchangeLoopsArgs(const ArgList &Args, // Otherwise, return an empty string and issue a diagnosic message if needed. StringRef tools::parseMPreferVectorWidthOption(clang::DiagnosticsEngine &Diags, const llvm::opt::ArgList &Args) { - Arg *A = Args.getLastArg(clang::driver::options::OPT_mprefer_vector_width_EQ); + Arg *A = Args.getLastArg(options::OPT_mprefer_vector_width_EQ); if (!A) return ""; diff --git a/clang/lib/Driver/ToolChains/CrossWindows.cpp b/clang/lib/Driver/ToolChains/CrossWindows.cpp index 51c892fc91718..6df5315e8fff8 100644 --- a/clang/lib/Driver/ToolChains/CrossWindows.cpp +++ b/clang/lib/Driver/ToolChains/CrossWindows.cpp @@ -10,8 +10,8 @@ #include "clang/Driver/CommonArgs.h" #include "clang/Driver/Compilation.h" #include "clang/Driver/Driver.h" -#include "clang/Driver/Options.h" #include "clang/Driver/SanitizerArgs.h" +#include "clang/Options/Options.h" #include "llvm/Option/ArgList.h" #include "llvm/Support/Path.h" diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index 8fc1bded2acea..de3cf7abbdec5 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -15,7 +15,7 @@ #include "clang/Driver/Distro.h" #include "clang/Driver/Driver.h" #include "clang/Driver/InputInfo.h" -#include "clang/Driver/Options.h" +#include "clang/Options/Options.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Config/llvm-config.h" // for LLVM_HOST_TRIPLE #include "llvm/Option/ArgList.h" @@ -154,16 +154,16 @@ CudaInstallationDetector::CudaInstallationDetector( std::initializer_list Versions = {"8.0", "7.5", "7.0"}; auto &FS = D.getVFS(); - if (Args.hasArg(clang::driver::options::OPT_cuda_path_EQ)) { + if (Args.hasArg(options::OPT_cuda_path_EQ)) { Candidates.emplace_back( - Args.getLastArgValue(clang::driver::options::OPT_cuda_path_EQ).str()); + Args.getLastArgValue(options::OPT_cuda_path_EQ).str()); } else if (HostTriple.isOSWindows()) { for (const char *Ver : Versions) Candidates.emplace_back( D.SysRoot + "/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v" + Ver); } else { - if (!Args.hasArg(clang::driver::options::OPT_cuda_path_ignore_env)) { + if (!Args.hasArg(options::OPT_cuda_path_ignore_env)) { // Try to find ptxas binary. If the executable is located in a directory // called 'bin/', its parent directory might be a good guess for a valid // CUDA installation. diff --git a/clang/lib/Driver/ToolChains/Cygwin.cpp b/clang/lib/Driver/ToolChains/Cygwin.cpp index d9c16347daa34..55438125ce0f1 100644 --- a/clang/lib/Driver/ToolChains/Cygwin.cpp +++ b/clang/lib/Driver/ToolChains/Cygwin.cpp @@ -10,7 +10,7 @@ #include "clang/Config/config.h" #include "clang/Driver/CommonArgs.h" #include "clang/Driver/Driver.h" -#include "clang/Driver/Options.h" +#include "clang/Options/Options.h" #include "llvm/Support/Path.h" #include "llvm/Support/VirtualFileSystem.h" @@ -58,7 +58,7 @@ void Cygwin::AddClangSystemIncludeArgs(const ArgList &DriverArgs, const Driver &D = getDriver(); std::string SysRoot = computeSysRoot(); - if (DriverArgs.hasArg(clang::driver::options::OPT_nostdinc)) + if (DriverArgs.hasArg(options::OPT_nostdinc)) return; if (!DriverArgs.hasArg(options::OPT_nostdlibinc)) diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp index 2fb7652d64536..fc3cd9030f71d 100644 --- a/clang/lib/Driver/ToolChains/Darwin.cpp +++ b/clang/lib/Driver/ToolChains/Darwin.cpp @@ -14,8 +14,8 @@ #include "clang/Driver/CommonArgs.h" #include "clang/Driver/Compilation.h" #include "clang/Driver/Driver.h" -#include "clang/Driver/Options.h" #include "clang/Driver/SanitizerArgs.h" +#include "clang/Options/Options.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Option/ArgList.h" #include "llvm/ProfileData/InstrProf.h" @@ -1079,7 +1079,7 @@ StringRef MachO::getMachOArchName(const ArgList &Args) const { case llvm::Triple::thumb: case llvm::Triple::arm: - if (const Arg *A = Args.getLastArg(clang::driver::options::OPT_march_EQ)) + if (const Arg *A = Args.getLastArg(options::OPT_march_EQ)) if (const char *Arch = ArmMachOArchName(A->getValue())) return Arch; @@ -2993,7 +2993,7 @@ DerivedArgList *MachO::TranslateArgs(const DerivedArgList &Args, if (!BoundArch.empty()) { StringRef Name = BoundArch; const Option MCpu = Opts.getOption(options::OPT_mcpu_EQ); - const Option MArch = Opts.getOption(clang::driver::options::OPT_march_EQ); + const Option MArch = Opts.getOption(options::OPT_march_EQ); // This code must be kept in sync with LLVM's getArchTypeForDarwinArch, // which defines the list of which architectures we accept. diff --git a/clang/lib/Driver/ToolChains/DragonFly.cpp b/clang/lib/Driver/ToolChains/DragonFly.cpp index 524f5f2ff391e..d4a6d6ae3e349 100644 --- a/clang/lib/Driver/ToolChains/DragonFly.cpp +++ b/clang/lib/Driver/ToolChains/DragonFly.cpp @@ -10,7 +10,7 @@ #include "clang/Driver/CommonArgs.h" #include "clang/Driver/Compilation.h" #include "clang/Driver/Driver.h" -#include "clang/Driver/Options.h" +#include "clang/Options/Options.h" #include "llvm/Option/ArgList.h" #include "llvm/Support/Path.h" @@ -219,7 +219,7 @@ void DragonFly::AddClangSystemIncludeArgs( llvm::opt::ArgStringList &CC1Args) const { const Driver &D = getDriver(); - if (DriverArgs.hasArg(clang::driver::options::OPT_nostdinc)) + if (DriverArgs.hasArg(options::OPT_nostdinc)) return; if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) { diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index f24dd3982eab7..8d53547f8ab66 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -11,7 +11,7 @@ #include "clang/Basic/CodeGenOptions.h" #include "clang/Driver/CommonArgs.h" -#include "clang/Driver/Options.h" +#include "clang/Options/Options.h" #include "llvm/Frontend/Debug/Options.h" #include "llvm/Support/Path.h" #include "llvm/TargetParser/Host.h" @@ -238,7 +238,7 @@ void Flang::addCodegenOptions(const ArgList &Args, options::OPT_ftime_report, options::OPT_ftime_report_EQ, options::OPT_funroll_loops, options::OPT_fno_unroll_loops, options::OPT_fdefer_desc_map, options::OPT_fno_defer_desc_map}); - if (Args.hasArg(clang::driver::options::OPT_fcoarray)) + if (Args.hasArg(options::OPT_fcoarray)) CmdArgs.push_back("-fcoarray"); } diff --git a/clang/lib/Driver/ToolChains/FreeBSD.cpp b/clang/lib/Driver/ToolChains/FreeBSD.cpp index b17b76233ad30..70e66a2f5c3e7 100644 --- a/clang/lib/Driver/ToolChains/FreeBSD.cpp +++ b/clang/lib/Driver/ToolChains/FreeBSD.cpp @@ -13,8 +13,8 @@ #include "clang/Config/config.h" #include "clang/Driver/CommonArgs.h" #include "clang/Driver/Compilation.h" -#include "clang/Driver/Options.h" #include "clang/Driver/SanitizerArgs.h" +#include "clang/Options/Options.h" #include "llvm/Option/ArgList.h" #include "llvm/Support/VirtualFileSystem.h" @@ -404,7 +404,7 @@ void FreeBSD::AddClangSystemIncludeArgs( llvm::opt::ArgStringList &CC1Args) const { const Driver &D = getDriver(); - if (DriverArgs.hasArg(clang::driver::options::OPT_nostdinc)) + if (DriverArgs.hasArg(options::OPT_nostdinc)) return; if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) { diff --git a/clang/lib/Driver/ToolChains/Fuchsia.cpp b/clang/lib/Driver/ToolChains/Fuchsia.cpp index 507cc03b27513..9edfc4de3d602 100644 --- a/clang/lib/Driver/ToolChains/Fuchsia.cpp +++ b/clang/lib/Driver/ToolChains/Fuchsia.cpp @@ -12,8 +12,8 @@ #include "clang/Driver/Compilation.h" #include "clang/Driver/Driver.h" #include "clang/Driver/MultilibBuilder.h" -#include "clang/Driver/Options.h" #include "clang/Driver/SanitizerArgs.h" +#include "clang/Options/Options.h" #include "llvm/Option/ArgList.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" @@ -344,7 +344,7 @@ Tool *Fuchsia::buildStaticLibTool() const { ToolChain::RuntimeLibType Fuchsia::GetRuntimeLibType(const ArgList &Args) const { - if (Arg *A = Args.getLastArg(clang::driver::options::OPT_rtlib_EQ)) { + if (Arg *A = Args.getLastArg(options::OPT_rtlib_EQ)) { StringRef Value = A->getValue(); if (Value != "compiler-rt") getDriver().Diag(clang::diag::err_drv_invalid_rtlib_name) diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp index b7cd5f6be14a6..d56cb02e14ca9 100644 --- a/clang/lib/Driver/ToolChains/Gnu.cpp +++ b/clang/lib/Driver/ToolChains/Gnu.cpp @@ -20,9 +20,9 @@ #include "clang/Driver/Compilation.h" #include "clang/Driver/Driver.h" #include "clang/Driver/MultilibBuilder.h" -#include "clang/Driver/Options.h" #include "clang/Driver/Tool.h" #include "clang/Driver/ToolChain.h" +#include "clang/Options/Options.h" #include "llvm/ADT/StringSet.h" #include "llvm/ADT/Twine.h" #include "llvm/Option/ArgList.h" @@ -2104,7 +2104,7 @@ Generic_GCC::GCCVersion Generic_GCC::GCCVersion::Parse(StringRef VersionText) { static llvm::StringRef getGCCToolchainDir(const ArgList &Args, llvm::StringRef SysRoot) { - const Arg *A = Args.getLastArg(clang::driver::options::OPT_gcc_toolchain); + const Arg *A = Args.getLastArg(options::OPT_gcc_toolchain); if (A) return A->getValue(); @@ -2157,8 +2157,7 @@ void Generic_GCC::GCCInstallationDetector::init( CandidateBiarchTripleAliases); // If --gcc-install-dir= is specified, skip filesystem detection. - if (const Arg *A = - Args.getLastArg(clang::driver::options::OPT_gcc_install_dir_EQ); + if (const Arg *A = Args.getLastArg(options::OPT_gcc_install_dir_EQ); A && A->getValue()[0]) { StringRef InstallDir = A->getValue(); if (!ScanGCCForMultilibs(TargetTriple, Args, InstallDir, false)) { @@ -2181,8 +2180,7 @@ void Generic_GCC::GCCInstallationDetector::init( // If --gcc-triple is specified use this instead of trying to // auto-detect a triple. - if (const Arg *A = - Args.getLastArg(clang::driver::options::OPT_gcc_triple_EQ)) { + if (const Arg *A = Args.getLastArg(options::OPT_gcc_triple_EQ)) { StringRef GCCTriple = A->getValue(); CandidateTripleAliases.clear(); CandidateTripleAliases.push_back(GCCTriple); diff --git a/clang/lib/Driver/ToolChains/HIPAMD.cpp b/clang/lib/Driver/ToolChains/HIPAMD.cpp index d0d2d2e34b602..b1c30beae1d35 100644 --- a/clang/lib/Driver/ToolChains/HIPAMD.cpp +++ b/clang/lib/Driver/ToolChains/HIPAMD.cpp @@ -15,8 +15,8 @@ #include "clang/Driver/Compilation.h" #include "clang/Driver/Driver.h" #include "clang/Driver/InputInfo.h" -#include "clang/Driver/Options.h" #include "clang/Driver/SanitizerArgs.h" +#include "clang/Options/Options.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" #include "llvm/TargetParser/TargetParser.h" diff --git a/clang/lib/Driver/ToolChains/HIPSPV.cpp b/clang/lib/Driver/ToolChains/HIPSPV.cpp index fb738577c4c44..44265e8a8b95f 100644 --- a/clang/lib/Driver/ToolChains/HIPSPV.cpp +++ b/clang/lib/Driver/ToolChains/HIPSPV.cpp @@ -12,7 +12,7 @@ #include "clang/Driver/Compilation.h" #include "clang/Driver/Driver.h" #include "clang/Driver/InputInfo.h" -#include "clang/Driver/Options.h" +#include "clang/Options/Options.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" @@ -213,7 +213,7 @@ HIPSPVToolChain::getDeviceLibs( // Find device libraries in --hip-device-lib-path and HIP_DEVICE_LIB_PATH. auto HipDeviceLibPathArgs = DriverArgs.getAllArgValues( // --hip-device-lib-path is alias to this option. - clang::driver::options::OPT_rocm_device_lib_path_EQ); + options::OPT_rocm_device_lib_path_EQ); for (auto Path : HipDeviceLibPathArgs) LibraryPaths.push_back(DriverArgs.MakeArgString(Path)); diff --git a/clang/lib/Driver/ToolChains/HIPUtility.cpp b/clang/lib/Driver/ToolChains/HIPUtility.cpp index 732403e69a075..1af2ae6470f1e 100644 --- a/clang/lib/Driver/ToolChains/HIPUtility.cpp +++ b/clang/lib/Driver/ToolChains/HIPUtility.cpp @@ -9,7 +9,7 @@ #include "HIPUtility.h" #include "clang/Driver/CommonArgs.h" #include "clang/Driver/Compilation.h" -#include "clang/Driver/Options.h" +#include "clang/Options/Options.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Object/Archive.h" diff --git a/clang/lib/Driver/ToolChains/Hexagon.cpp b/clang/lib/Driver/ToolChains/Hexagon.cpp index 9f8b676fc7dc2..084f51721315c 100644 --- a/clang/lib/Driver/ToolChains/Hexagon.cpp +++ b/clang/lib/Driver/ToolChains/Hexagon.cpp @@ -11,7 +11,7 @@ #include "clang/Driver/Compilation.h" #include "clang/Driver/Driver.h" #include "clang/Driver/InputInfo.h" -#include "clang/Driver/Options.h" +#include "clang/Options/Options.h" #include "llvm/Option/ArgList.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" diff --git a/clang/lib/Driver/ToolChains/Hurd.cpp b/clang/lib/Driver/ToolChains/Hurd.cpp index 43121233ea7d0..53ee4d4c0cbde 100644 --- a/clang/lib/Driver/ToolChains/Hurd.cpp +++ b/clang/lib/Driver/ToolChains/Hurd.cpp @@ -10,7 +10,7 @@ #include "clang/Config/config.h" #include "clang/Driver/CommonArgs.h" #include "clang/Driver/Driver.h" -#include "clang/Driver/Options.h" +#include "clang/Options/Options.h" #include "llvm/Support/Path.h" #include "llvm/Support/VirtualFileSystem.h" @@ -168,7 +168,7 @@ void Hurd::AddClangSystemIncludeArgs(const ArgList &DriverArgs, const Driver &D = getDriver(); std::string SysRoot = computeSysRoot(); - if (DriverArgs.hasArg(clang::driver::options::OPT_nostdinc)) + if (DriverArgs.hasArg(options::OPT_nostdinc)) return; if (!DriverArgs.hasArg(options::OPT_nostdlibinc)) diff --git a/clang/lib/Driver/ToolChains/Linux.cpp b/clang/lib/Driver/ToolChains/Linux.cpp index f452109134171..f93725c8c9887 100644 --- a/clang/lib/Driver/ToolChains/Linux.cpp +++ b/clang/lib/Driver/ToolChains/Linux.cpp @@ -16,8 +16,8 @@ #include "clang/Driver/CommonArgs.h" #include "clang/Driver/Distro.h" #include "clang/Driver/Driver.h" -#include "clang/Driver/Options.h" #include "clang/Driver/SanitizerArgs.h" +#include "clang/Options/Options.h" #include "llvm/Option/ArgList.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/Support/Path.h" @@ -731,10 +731,10 @@ void Linux::AddClangSystemIncludeArgs(const ArgList &DriverArgs, const Driver &D = getDriver(); std::string SysRoot = computeSysRoot(); - if (DriverArgs.hasArg(clang::driver::options::OPT_nostdinc)) + if (DriverArgs.hasArg(options::OPT_nostdinc)) return; - if (DriverArgs.hasArg(clang::driver::options::OPT_fopenmp)) { + if (DriverArgs.hasArg(clang::options::OPT_fopenmp)) { // Look for system files in our compiler AOMP/include dir first addSystemInclude(DriverArgs, CC1Args, DriverArgs.MakeArgString(D.Dir + "/../include")); diff --git a/clang/lib/Driver/ToolChains/MSP430.cpp b/clang/lib/Driver/ToolChains/MSP430.cpp index 9eca1ad5f2865..3cc56bb7e832e 100644 --- a/clang/lib/Driver/ToolChains/MSP430.cpp +++ b/clang/lib/Driver/ToolChains/MSP430.cpp @@ -12,7 +12,7 @@ #include "clang/Driver/Compilation.h" #include "clang/Driver/InputInfo.h" #include "clang/Driver/Multilib.h" -#include "clang/Driver/Options.h" +#include "clang/Options/Options.h" #include "llvm/Option/ArgList.h" #include "llvm/Support/Path.h" diff --git a/clang/lib/Driver/ToolChains/MSVC.cpp b/clang/lib/Driver/ToolChains/MSVC.cpp index 0d4bbabb9bb8a..e9972cb24c245 100644 --- a/clang/lib/Driver/ToolChains/MSVC.cpp +++ b/clang/lib/Driver/ToolChains/MSVC.cpp @@ -12,8 +12,8 @@ #include "clang/Driver/CommonArgs.h" #include "clang/Driver/Compilation.h" #include "clang/Driver/Driver.h" -#include "clang/Driver/Options.h" #include "clang/Driver/SanitizerArgs.h" +#include "clang/Options/Options.h" #include "llvm/Option/Arg.h" #include "llvm/Option/ArgList.h" #include "llvm/Support/ConvertUTF.h" diff --git a/clang/lib/Driver/ToolChains/Managarm.cpp b/clang/lib/Driver/ToolChains/Managarm.cpp index da4a9072317f4..1bbabdfc631b8 100644 --- a/clang/lib/Driver/ToolChains/Managarm.cpp +++ b/clang/lib/Driver/ToolChains/Managarm.cpp @@ -11,8 +11,8 @@ #include "clang/Config/config.h" #include "clang/Driver/CommonArgs.h" #include "clang/Driver/Driver.h" -#include "clang/Driver/Options.h" #include "clang/Driver/SanitizerArgs.h" +#include "clang/Options/Options.h" #include "llvm/Option/ArgList.h" #include "llvm/Support/Path.h" @@ -136,7 +136,7 @@ void Managarm::AddClangSystemIncludeArgs(const ArgList &DriverArgs, const Driver &D = getDriver(); std::string SysRoot = computeSysRoot(); - if (DriverArgs.hasArg(clang::driver::options::OPT_nostdinc)) + if (DriverArgs.hasArg(options::OPT_nostdinc)) return; if (!DriverArgs.hasArg(options::OPT_nostdlibinc)) diff --git a/clang/lib/Driver/ToolChains/MinGW.cpp b/clang/lib/Driver/ToolChains/MinGW.cpp index bd0e40ae3d7ad..0e8eeeb0523ce 100644 --- a/clang/lib/Driver/ToolChains/MinGW.cpp +++ b/clang/lib/Driver/ToolChains/MinGW.cpp @@ -12,8 +12,8 @@ #include "clang/Driver/Compilation.h" #include "clang/Driver/Driver.h" #include "clang/Driver/InputInfo.h" -#include "clang/Driver/Options.h" #include "clang/Driver/SanitizerArgs.h" +#include "clang/Options/Options.h" #include "llvm/Config/llvm-config.h" // for LLVM_HOST_TRIPLE #include "llvm/Option/ArgList.h" #include "llvm/Support/FileSystem.h" diff --git a/clang/lib/Driver/ToolChains/MipsLinux.cpp b/clang/lib/Driver/ToolChains/MipsLinux.cpp index 7dd3936613296..58d6b5031f536 100644 --- a/clang/lib/Driver/ToolChains/MipsLinux.cpp +++ b/clang/lib/Driver/ToolChains/MipsLinux.cpp @@ -9,7 +9,7 @@ #include "MipsLinux.h" #include "Arch/Mips.h" #include "clang/Driver/Driver.h" -#include "clang/Driver/Options.h" +#include "clang/Options/Options.h" #include "llvm/Option/ArgList.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" @@ -38,7 +38,7 @@ MipsLLVMToolChain::MipsLLVMToolChain(const Driver &D, void MipsLLVMToolChain::AddClangSystemIncludeArgs( const ArgList &DriverArgs, ArgStringList &CC1Args) const { - if (DriverArgs.hasArg(clang::driver::options::OPT_nostdinc)) + if (DriverArgs.hasArg(options::OPT_nostdinc)) return; const Driver &D = getDriver(); diff --git a/clang/lib/Driver/ToolChains/NetBSD.cpp b/clang/lib/Driver/ToolChains/NetBSD.cpp index 8db00deeb80df..ea722b59853d6 100644 --- a/clang/lib/Driver/ToolChains/NetBSD.cpp +++ b/clang/lib/Driver/ToolChains/NetBSD.cpp @@ -14,8 +14,8 @@ #include "clang/Driver/CommonArgs.h" #include "clang/Driver/Compilation.h" #include "clang/Driver/Driver.h" -#include "clang/Driver/Options.h" #include "clang/Driver/SanitizerArgs.h" +#include "clang/Options/Options.h" #include "llvm/Option/ArgList.h" #include "llvm/Support/VirtualFileSystem.h" @@ -466,7 +466,7 @@ void NetBSD::AddClangSystemIncludeArgs( llvm::opt::ArgStringList &CC1Args) const { const Driver &D = getDriver(); - if (DriverArgs.hasArg(clang::driver::options::OPT_nostdinc)) + if (DriverArgs.hasArg(options::OPT_nostdinc)) return; if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) { diff --git a/clang/lib/Driver/ToolChains/OHOS.cpp b/clang/lib/Driver/ToolChains/OHOS.cpp index 00991504e97a8..607eb714f85dc 100644 --- a/clang/lib/Driver/ToolChains/OHOS.cpp +++ b/clang/lib/Driver/ToolChains/OHOS.cpp @@ -12,8 +12,8 @@ #include "clang/Driver/CommonArgs.h" #include "clang/Driver/Compilation.h" #include "clang/Driver/Driver.h" -#include "clang/Driver/Options.h" #include "clang/Driver/SanitizerArgs.h" +#include "clang/Options/Options.h" #include "llvm/Option/ArgList.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/Support/FileSystem.h" @@ -174,7 +174,7 @@ OHOS::OHOS(const Driver &D, const llvm::Triple &Triple, const ArgList &Args) ToolChain::RuntimeLibType OHOS::GetRuntimeLibType( const ArgList &Args) const { - if (Arg *A = Args.getLastArg(clang::driver::options::OPT_rtlib_EQ)) { + if (Arg *A = Args.getLastArg(options::OPT_rtlib_EQ)) { StringRef Value = A->getValue(); if (Value != "compiler-rt") getDriver().Diag(clang::diag::err_drv_invalid_rtlib_name) diff --git a/clang/lib/Driver/ToolChains/OpenBSD.cpp b/clang/lib/Driver/ToolChains/OpenBSD.cpp index 8f589186af343..5e7b4f1a664e6 100644 --- a/clang/lib/Driver/ToolChains/OpenBSD.cpp +++ b/clang/lib/Driver/ToolChains/OpenBSD.cpp @@ -13,8 +13,8 @@ #include "clang/Config/config.h" #include "clang/Driver/CommonArgs.h" #include "clang/Driver/Compilation.h" -#include "clang/Driver/Options.h" #include "clang/Driver/SanitizerArgs.h" +#include "clang/Options/Options.h" #include "llvm/Option/ArgList.h" #include "llvm/Support/Path.h" #include "llvm/Support/VirtualFileSystem.h" @@ -315,7 +315,7 @@ void OpenBSD::AddClangSystemIncludeArgs( llvm::opt::ArgStringList &CC1Args) const { const Driver &D = getDriver(); - if (DriverArgs.hasArg(clang::driver::options::OPT_nostdinc)) + if (DriverArgs.hasArg(options::OPT_nostdinc)) return; if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) { diff --git a/clang/lib/Driver/ToolChains/PPCFreeBSD.cpp b/clang/lib/Driver/ToolChains/PPCFreeBSD.cpp index 8d381c4f14371..76180431ee682 100644 --- a/clang/lib/Driver/ToolChains/PPCFreeBSD.cpp +++ b/clang/lib/Driver/ToolChains/PPCFreeBSD.cpp @@ -8,7 +8,7 @@ #include "PPCFreeBSD.h" #include "clang/Driver/Driver.h" -#include "clang/Driver/Options.h" +#include "clang/Options/Options.h" #include "llvm/Support/Path.h" using namespace clang::driver::toolchains; @@ -16,7 +16,7 @@ using namespace llvm::opt; void PPCFreeBSDToolChain::AddClangSystemIncludeArgs( const ArgList &DriverArgs, ArgStringList &CC1Args) const { - if (!DriverArgs.hasArg(clang::driver::options::OPT_nostdinc) && + if (!DriverArgs.hasArg(options::OPT_nostdinc) && !DriverArgs.hasArg(options::OPT_nobuiltininc)) { const Driver &D = getDriver(); SmallString<128> P(D.ResourceDir); diff --git a/clang/lib/Driver/ToolChains/PPCLinux.cpp b/clang/lib/Driver/ToolChains/PPCLinux.cpp index 768214e416bd7..672ebd5b7b98d 100644 --- a/clang/lib/Driver/ToolChains/PPCLinux.cpp +++ b/clang/lib/Driver/ToolChains/PPCLinux.cpp @@ -8,7 +8,7 @@ #include "PPCLinux.h" #include "clang/Driver/Driver.h" -#include "clang/Driver/Options.h" +#include "clang/Options/Options.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" @@ -58,7 +58,7 @@ PPCLinuxToolChain::PPCLinuxToolChain(const Driver &D, void PPCLinuxToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs, ArgStringList &CC1Args) const { - if (!DriverArgs.hasArg(clang::driver::options::OPT_nostdinc) && + if (!DriverArgs.hasArg(options::OPT_nostdinc) && !DriverArgs.hasArg(options::OPT_nobuiltininc)) { const Driver &D = getDriver(); SmallString<128> P(D.ResourceDir); diff --git a/clang/lib/Driver/ToolChains/PS4CPU.cpp b/clang/lib/Driver/ToolChains/PS4CPU.cpp index 34ec65ae59602..6fe18aa4cceba 100644 --- a/clang/lib/Driver/ToolChains/PS4CPU.cpp +++ b/clang/lib/Driver/ToolChains/PS4CPU.cpp @@ -11,8 +11,8 @@ #include "clang/Driver/CommonArgs.h" #include "clang/Driver/Compilation.h" #include "clang/Driver/Driver.h" -#include "clang/Driver/Options.h" #include "clang/Driver/SanitizerArgs.h" +#include "clang/Options/Options.h" #include "llvm/Option/ArgList.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" diff --git a/clang/lib/Driver/ToolChains/SPIRV.cpp b/clang/lib/Driver/ToolChains/SPIRV.cpp index afb9e63b4b348..22aad7e3f49d7 100644 --- a/clang/lib/Driver/ToolChains/SPIRV.cpp +++ b/clang/lib/Driver/ToolChains/SPIRV.cpp @@ -10,7 +10,7 @@ #include "clang/Driver/Compilation.h" #include "clang/Driver/Driver.h" #include "clang/Driver/InputInfo.h" -#include "clang/Driver/Options.h" +#include "clang/Options/Options.h" using namespace clang::driver; using namespace clang::driver::toolchains; diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index 0232b047a6c4b..85859f344b491 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -20,7 +20,7 @@ SYCLInstallationDetector::SYCLInstallationDetector( void SYCLInstallationDetector::addSYCLIncludeArgs( const ArgList &DriverArgs, ArgStringList &CC1Args) const { - if (DriverArgs.hasArg(clang::driver::options::OPT_nobuiltininc)) + if (DriverArgs.hasArg(options::OPT_nobuiltininc)) return; // Add the SYCL header search locations in the specified order. diff --git a/clang/lib/Driver/ToolChains/Solaris.cpp b/clang/lib/Driver/ToolChains/Solaris.cpp index 64c7d1ceb3a36..ad0f41144f393 100644 --- a/clang/lib/Driver/ToolChains/Solaris.cpp +++ b/clang/lib/Driver/ToolChains/Solaris.cpp @@ -13,9 +13,9 @@ #include "clang/Driver/CommonArgs.h" #include "clang/Driver/Compilation.h" #include "clang/Driver/Driver.h" -#include "clang/Driver/Options.h" #include "clang/Driver/SanitizerArgs.h" #include "clang/Driver/ToolChain.h" +#include "clang/Options/Options.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Option/ArgList.h" #include "llvm/Support/FileSystem.h" @@ -360,7 +360,7 @@ void Solaris::AddClangSystemIncludeArgs(const ArgList &DriverArgs, ArgStringList &CC1Args) const { const Driver &D = getDriver(); - if (DriverArgs.hasArg(clang::driver::options::OPT_nostdinc)) + if (DriverArgs.hasArg(options::OPT_nostdinc)) return; if (!DriverArgs.hasArg(options::OPT_nostdlibinc)) diff --git a/clang/lib/Driver/ToolChains/UEFI.cpp b/clang/lib/Driver/ToolChains/UEFI.cpp index d2be147c7b9f6..7732e37f8061d 100644 --- a/clang/lib/Driver/ToolChains/UEFI.cpp +++ b/clang/lib/Driver/ToolChains/UEFI.cpp @@ -11,8 +11,8 @@ #include "clang/Driver/CommonArgs.h" #include "clang/Driver/Compilation.h" #include "clang/Driver/Driver.h" -#include "clang/Driver/Options.h" #include "clang/Driver/SanitizerArgs.h" +#include "clang/Options/Options.h" #include "llvm/Option/Arg.h" #include "llvm/Option/ArgList.h" #include "llvm/Support/VirtualFileSystem.h" diff --git a/clang/lib/Driver/ToolChains/VEToolchain.cpp b/clang/lib/Driver/ToolChains/VEToolchain.cpp index ad9129046c3e1..78509bcdae0fe 100644 --- a/clang/lib/Driver/ToolChains/VEToolchain.cpp +++ b/clang/lib/Driver/ToolChains/VEToolchain.cpp @@ -10,7 +10,7 @@ #include "clang/Driver/CommonArgs.h" #include "clang/Driver/Compilation.h" #include "clang/Driver/Driver.h" -#include "clang/Driver/Options.h" +#include "clang/Options/Options.h" #include "llvm/Option/ArgList.h" #include "llvm/Support/Path.h" #include // ::getenv @@ -78,7 +78,7 @@ bool VEToolChain::hasBlocksRuntime() const { return false; } void VEToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs, ArgStringList &CC1Args) const { - if (DriverArgs.hasArg(clang::driver::options::OPT_nostdinc)) + if (DriverArgs.hasArg(options::OPT_nostdinc)) return; if (DriverArgs.hasArg(options::OPT_nobuiltininc) && @@ -117,7 +117,7 @@ void VEToolChain::addClangTargetOptions(const ArgList &DriverArgs, void VEToolChain::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs, ArgStringList &CC1Args) const { - if (DriverArgs.hasArg(clang::driver::options::OPT_nostdinc) || + if (DriverArgs.hasArg(options::OPT_nostdinc) || DriverArgs.hasArg(options::OPT_nostdlibinc) || DriverArgs.hasArg(options::OPT_nostdincxx)) return; diff --git a/clang/lib/Driver/ToolChains/WebAssembly.cpp b/clang/lib/Driver/ToolChains/WebAssembly.cpp index 5054868b5ff4d..15c6f19e87fee 100644 --- a/clang/lib/Driver/ToolChains/WebAssembly.cpp +++ b/clang/lib/Driver/ToolChains/WebAssembly.cpp @@ -12,7 +12,7 @@ #include "clang/Driver/CommonArgs.h" #include "clang/Driver/Compilation.h" #include "clang/Driver/Driver.h" -#include "clang/Driver/Options.h" +#include "clang/Options/Options.h" #include "llvm/Config/llvm-config.h" // for LLVM_VERSION_STRING #include "llvm/Option/ArgList.h" #include "llvm/Support/FileSystem.h" @@ -297,7 +297,7 @@ bool WebAssembly::HasNativeLLVMSupport() const { return true; } void WebAssembly::addClangTargetOptions(const ArgList &DriverArgs, ArgStringList &CC1Args, Action::OffloadKind) const { - if (!DriverArgs.hasFlag(clang::driver::options::OPT_fuse_init_array, + if (!DriverArgs.hasFlag(options::OPT_fuse_init_array, options::OPT_fno_use_init_array, true)) CC1Args.push_back("-fno-use-init-array"); @@ -472,7 +472,7 @@ WebAssembly::GetCXXStdlibType(const ArgList &Args) const { void WebAssembly::AddClangSystemIncludeArgs(const ArgList &DriverArgs, ArgStringList &CC1Args) const { - if (DriverArgs.hasArg(clang::driver::options::OPT_nostdinc)) + if (DriverArgs.hasArg(options::OPT_nostdinc)) return; const Driver &D = getDriver(); diff --git a/clang/lib/Driver/ToolChains/XCore.cpp b/clang/lib/Driver/ToolChains/XCore.cpp index 6a2a75cb99739..dd26c11affffb 100644 --- a/clang/lib/Driver/ToolChains/XCore.cpp +++ b/clang/lib/Driver/ToolChains/XCore.cpp @@ -10,7 +10,7 @@ #include "clang/Driver/CommonArgs.h" #include "clang/Driver/Compilation.h" #include "clang/Driver/Driver.h" -#include "clang/Driver/Options.h" +#include "clang/Options/Options.h" #include "llvm/Option/ArgList.h" #include // ::getenv @@ -113,7 +113,7 @@ bool XCoreToolChain::hasBlocksRuntime() const { return false; } void XCoreToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs, ArgStringList &CC1Args) const { - if (DriverArgs.hasArg(clang::driver::options::OPT_nostdinc) || + if (DriverArgs.hasArg(options::OPT_nostdinc) || DriverArgs.hasArg(options::OPT_nostdlibinc)) return; if (const char *cl_include_dir = getenv("XCC_C_INCLUDE_PATH")) { @@ -137,7 +137,7 @@ void XCoreToolChain::addClangTargetOptions(const ArgList &DriverArgs, void XCoreToolChain::AddClangCXXStdlibIncludeArgs( const ArgList &DriverArgs, ArgStringList &CC1Args) const { - if (DriverArgs.hasArg(clang::driver::options::OPT_nostdinc) || + if (DriverArgs.hasArg(options::OPT_nostdinc) || DriverArgs.hasArg(options::OPT_nostdlibinc) || DriverArgs.hasArg(options::OPT_nostdincxx)) return; diff --git a/clang/lib/Driver/ToolChains/ZOS.cpp b/clang/lib/Driver/ToolChains/ZOS.cpp index 9a3c45323a3cf..eac8f623f9a50 100644 --- a/clang/lib/Driver/ToolChains/ZOS.cpp +++ b/clang/lib/Driver/ToolChains/ZOS.cpp @@ -9,7 +9,7 @@ #include "ZOS.h" #include "clang/Driver/CommonArgs.h" #include "clang/Driver/Compilation.h" -#include "clang/Driver/Options.h" +#include "clang/Options/Options.h" #include "llvm/Option/ArgList.h" #include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/WithColor.h" diff --git a/clang/lib/Driver/XRayArgs.cpp b/clang/lib/Driver/XRayArgs.cpp index 0325296f84b19..4c2d11751a363 100644 --- a/clang/lib/Driver/XRayArgs.cpp +++ b/clang/lib/Driver/XRayArgs.cpp @@ -8,8 +8,8 @@ #include "clang/Driver/XRayArgs.h" #include "clang/Driver/CommonArgs.h" #include "clang/Driver/Driver.h" -#include "clang/Driver/Options.h" #include "clang/Driver/ToolChain.h" +#include "clang/Options/Options.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/SpecialCaseList.h" diff --git a/clang/lib/Frontend/CMakeLists.txt b/clang/lib/Frontend/CMakeLists.txt index a916667208845..dac9e0d26f393 100644 --- a/clang/lib/Frontend/CMakeLists.txt +++ b/clang/lib/Frontend/CMakeLists.txt @@ -52,6 +52,7 @@ add_clang_library(clangFrontend clangAST clangBasic clangDriver + clangOptions clangEdit clangLex clangParse diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 66759d363d531..0782dc1b585c3 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -27,7 +27,6 @@ #include "clang/Basic/XRayInstr.h" #include "clang/Config/config.h" #include "clang/Driver/Driver.h" -#include "clang/Driver/Options.h" #include "clang/Frontend/CommandLineSourceLoc.h" #include "clang/Frontend/DependencyOutputOptions.h" #include "clang/Frontend/FrontendDiagnostic.h" @@ -38,6 +37,7 @@ #include "clang/Frontend/Utils.h" #include "clang/Lex/HeaderSearchOptions.h" #include "clang/Lex/PreprocessorOptions.h" +#include "clang/Options/Options.h" #include "clang/Serialization/ASTBitCodes.h" #include "clang/Serialization/ModuleFileExtension.h" #include "clang/StaticAnalyzer/Core/AnalyzerOptions.h" @@ -255,7 +255,7 @@ CowCompilerInvocation::getMutPreprocessorOutputOpts() { using ArgumentConsumer = CompilerInvocation::ArgumentConsumer; #define OPTTABLE_STR_TABLE_CODE -#include "clang/Driver/Options.inc" +#include "clang/Options/Options.inc" #undef OPTTABLE_STR_TABLE_CODE static llvm::StringRef lookupStrInTable(unsigned Offset) { @@ -263,7 +263,7 @@ static llvm::StringRef lookupStrInTable(unsigned Offset) { } #define SIMPLE_ENUM_VALUE_TABLE -#include "clang/Driver/Options.inc" +#include "clang/Options/Options.inc" #undef SIMPLE_ENUM_VALUE_TABLE static std::optional normalizeSimpleFlag(OptSpecifier Opt, @@ -989,7 +989,7 @@ static void GenerateAnalyzerArgs(const AnalyzerOptions &Opts, #define ANALYZER_OPTION_WITH_MARSHALLING(...) \ GENERATE_OPTION_WITH_MARSHALLING(Consumer, __VA_ARGS__) -#include "clang/Driver/Options.inc" +#include "clang/Options/Options.inc" #undef ANALYZER_OPTION_WITH_MARSHALLING if (Opts.AnalysisConstraintsOpt != RangeConstraintsModel) { @@ -1076,7 +1076,7 @@ static bool ParseAnalyzerArgs(AnalyzerOptions &Opts, ArgList &Args, #define ANALYZER_OPTION_WITH_MARSHALLING(...) \ PARSE_OPTION_WITH_MARSHALLING(Args, Diags, __VA_ARGS__) -#include "clang/Driver/Options.inc" +#include "clang/Options/Options.inc" #undef ANALYZER_OPTION_WITH_MARSHALLING if (Arg *A = Args.getLastArg(OPT_analyzer_constraints)) { @@ -1583,7 +1583,7 @@ void CompilerInvocationBase::GenerateCodeGenArgs(const CodeGenOptions &Opts, #define CODEGEN_OPTION_WITH_MARSHALLING(...) \ GENERATE_OPTION_WITH_MARSHALLING(Consumer, __VA_ARGS__) -#include "clang/Driver/Options.inc" +#include "clang/Options/Options.inc" #undef CODEGEN_OPTION_WITH_MARSHALLING if (Opts.OptimizationLevel > 0) { @@ -1888,7 +1888,7 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, #define CODEGEN_OPTION_WITH_MARSHALLING(...) \ PARSE_OPTION_WITH_MARSHALLING(Args, Diags, __VA_ARGS__) -#include "clang/Driver/Options.inc" +#include "clang/Options/Options.inc" #undef CODEGEN_OPTION_WITH_MARSHALLING // At O0 we want to fully disable inlining outside of cases marked with @@ -2379,7 +2379,7 @@ static void GenerateDependencyOutputArgs(const DependencyOutputOptions &Opts, const DependencyOutputOptions &DependencyOutputOpts = Opts; #define DEPENDENCY_OUTPUT_OPTION_WITH_MARSHALLING(...) \ GENERATE_OPTION_WITH_MARSHALLING(Consumer, __VA_ARGS__) -#include "clang/Driver/Options.inc" +#include "clang/Options/Options.inc" #undef DEPENDENCY_OUTPUT_OPTION_WITH_MARSHALLING if (Opts.ShowIncludesDest != ShowIncludesDestination::None) @@ -2414,7 +2414,7 @@ static bool ParseDependencyOutputArgs(DependencyOutputOptions &Opts, DependencyOutputOptions &DependencyOutputOpts = Opts; #define DEPENDENCY_OUTPUT_OPTION_WITH_MARSHALLING(...) \ PARSE_OPTION_WITH_MARSHALLING(Args, Diags, __VA_ARGS__) -#include "clang/Driver/Options.inc" +#include "clang/Options/Options.inc" #undef DEPENDENCY_OUTPUT_OPTION_WITH_MARSHALLING if (Args.hasArg(OPT_show_includes)) { @@ -2542,7 +2542,7 @@ static void GenerateFileSystemArgs(const FileSystemOptions &Opts, #define FILE_SYSTEM_OPTION_WITH_MARSHALLING(...) \ GENERATE_OPTION_WITH_MARSHALLING(Consumer, __VA_ARGS__) -#include "clang/Driver/Options.inc" +#include "clang/Options/Options.inc" #undef FILE_SYSTEM_OPTION_WITH_MARSHALLING } @@ -2554,7 +2554,7 @@ static bool ParseFileSystemArgs(FileSystemOptions &Opts, const ArgList &Args, #define FILE_SYSTEM_OPTION_WITH_MARSHALLING(...) \ PARSE_OPTION_WITH_MARSHALLING(Args, Diags, __VA_ARGS__) -#include "clang/Driver/Options.inc" +#include "clang/Options/Options.inc" #undef FILE_SYSTEM_OPTION_WITH_MARSHALLING return Diags.getNumErrors() == NumErrorsBefore; @@ -2565,7 +2565,7 @@ static void GenerateMigratorArgs(const MigratorOptions &Opts, const MigratorOptions &MigratorOpts = Opts; #define MIGRATOR_OPTION_WITH_MARSHALLING(...) \ GENERATE_OPTION_WITH_MARSHALLING(Consumer, __VA_ARGS__) -#include "clang/Driver/Options.inc" +#include "clang/Options/Options.inc" #undef MIGRATOR_OPTION_WITH_MARSHALLING } @@ -2577,7 +2577,7 @@ static bool ParseMigratorArgs(MigratorOptions &Opts, const ArgList &Args, #define MIGRATOR_OPTION_WITH_MARSHALLING(...) \ PARSE_OPTION_WITH_MARSHALLING(Args, Diags, __VA_ARGS__) -#include "clang/Driver/Options.inc" +#include "clang/Options/Options.inc" #undef MIGRATOR_OPTION_WITH_MARSHALLING return Diags.getNumErrors() == NumErrorsBefore; @@ -2589,7 +2589,7 @@ void CompilerInvocationBase::GenerateDiagnosticArgs( const DiagnosticOptions *DiagnosticOpts = &Opts; #define DIAG_OPTION_WITH_MARSHALLING(...) \ GENERATE_OPTION_WITH_MARSHALLING(Consumer, __VA_ARGS__) -#include "clang/Driver/Options.inc" +#include "clang/Options/Options.inc" #undef DIAG_OPTION_WITH_MARSHALLING if (!Opts.DiagnosticSerializationFile.empty()) @@ -2694,7 +2694,7 @@ bool clang::ParseDiagnosticArgs(DiagnosticOptions &Opts, ArgList &Args, #define DIAG_OPTION_WITH_MARSHALLING(...) \ PARSE_OPTION_WITH_MARSHALLING(Args, *Diags, __VA_ARGS__) -#include "clang/Driver/Options.inc" +#include "clang/Options/Options.inc" #undef DIAG_OPTION_WITH_MARSHALLING llvm::sys::Process::UseANSIEscapeCodes(Opts.UseANSIEscapeCodes); @@ -2844,7 +2844,7 @@ static void GenerateFrontendArgs(const FrontendOptions &Opts, const FrontendOptions &FrontendOpts = Opts; #define FRONTEND_OPTION_WITH_MARSHALLING(...) \ GENERATE_OPTION_WITH_MARSHALLING(Consumer, __VA_ARGS__) -#include "clang/Driver/Options.inc" +#include "clang/Options/Options.inc" #undef FRONTEND_OPTION_WITH_MARSHALLING std::optional ProgramActionOpt = @@ -3014,7 +3014,7 @@ static bool ParseFrontendArgs(FrontendOptions &Opts, ArgList &Args, #define FRONTEND_OPTION_WITH_MARSHALLING(...) \ PARSE_OPTION_WITH_MARSHALLING(Args, Diags, __VA_ARGS__) -#include "clang/Driver/Options.inc" +#include "clang/Options/Options.inc" #undef FRONTEND_OPTION_WITH_MARSHALLING Opts.ProgramAction = frontend::ParseSyntaxOnly; @@ -3296,7 +3296,7 @@ static void GenerateHeaderSearchArgs(const HeaderSearchOptions &Opts, const HeaderSearchOptions *HeaderSearchOpts = &Opts; #define HEADER_SEARCH_OPTION_WITH_MARSHALLING(...) \ GENERATE_OPTION_WITH_MARSHALLING(Consumer, __VA_ARGS__) -#include "clang/Driver/Options.inc" +#include "clang/Options/Options.inc" #undef HEADER_SEARCH_OPTION_WITH_MARSHALLING if (Opts.UseLibcxx) @@ -3411,7 +3411,7 @@ static bool ParseHeaderSearchArgs(HeaderSearchOptions &Opts, ArgList &Args, #define HEADER_SEARCH_OPTION_WITH_MARSHALLING(...) \ PARSE_OPTION_WITH_MARSHALLING(Args, Diags, __VA_ARGS__) -#include "clang/Driver/Options.inc" +#include "clang/Options/Options.inc" #undef HEADER_SEARCH_OPTION_WITH_MARSHALLING if (const Arg *A = Args.getLastArg(OPT_stdlib_EQ)) @@ -3744,7 +3744,7 @@ void CompilerInvocationBase::GenerateLangArgs(const LangOptions &Opts, #define LANG_OPTION_WITH_MARSHALLING(...) \ GENERATE_OPTION_WITH_MARSHALLING(Consumer, __VA_ARGS__) -#include "clang/Driver/Options.inc" +#include "clang/Options/Options.inc" #undef LANG_OPTION_WITH_MARSHALLING // The '-fcf-protection=' option is generated by CodeGenOpts generator. @@ -4153,7 +4153,7 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args, #define LANG_OPTION_WITH_MARSHALLING(...) \ PARSE_OPTION_WITH_MARSHALLING(Args, Diags, __VA_ARGS__) -#include "clang/Driver/Options.inc" +#include "clang/Options/Options.inc" #undef LANG_OPTION_WITH_MARSHALLING if (const Arg *A = Args.getLastArg(OPT_fcf_protection_EQ)) { @@ -4874,7 +4874,7 @@ static void GeneratePreprocessorArgs(const PreprocessorOptions &Opts, #define PREPROCESSOR_OPTION_WITH_MARSHALLING(...) \ GENERATE_OPTION_WITH_MARSHALLING(Consumer, __VA_ARGS__) -#include "clang/Driver/Options.inc" +#include "clang/Options/Options.inc" #undef PREPROCESSOR_OPTION_WITH_MARSHALLING if (Opts.PCHWithHdrStop && !Opts.PCHWithHdrStopCreate) @@ -4948,7 +4948,7 @@ static bool ParsePreprocessorArgs(PreprocessorOptions &Opts, ArgList &Args, #define PREPROCESSOR_OPTION_WITH_MARSHALLING(...) \ PARSE_OPTION_WITH_MARSHALLING(Args, Diags, __VA_ARGS__) -#include "clang/Driver/Options.inc" +#include "clang/Options/Options.inc" #undef PREPROCESSOR_OPTION_WITH_MARSHALLING Opts.PCHWithHdrStop = Args.hasArg(OPT_pch_through_hdrstop_create) || @@ -5041,7 +5041,7 @@ GeneratePreprocessorOutputArgs(const PreprocessorOutputOptions &Opts, #define PREPROCESSOR_OUTPUT_OPTION_WITH_MARSHALLING(...) \ GENERATE_OPTION_WITH_MARSHALLING(Consumer, __VA_ARGS__) -#include "clang/Driver/Options.inc" +#include "clang/Options/Options.inc" #undef PREPROCESSOR_OUTPUT_OPTION_WITH_MARSHALLING bool Generate_dM = isStrictlyPreprocessorAction(Action) && !Opts.ShowCPP; @@ -5062,7 +5062,7 @@ static bool ParsePreprocessorOutputArgs(PreprocessorOutputOptions &Opts, #define PREPROCESSOR_OUTPUT_OPTION_WITH_MARSHALLING(...) \ PARSE_OPTION_WITH_MARSHALLING(Args, Diags, __VA_ARGS__) -#include "clang/Driver/Options.inc" +#include "clang/Options/Options.inc" #undef PREPROCESSOR_OUTPUT_OPTION_WITH_MARSHALLING Opts.ShowCPP = isStrictlyPreprocessorAction(Action) && !Args.hasArg(OPT_dM); @@ -5077,7 +5077,7 @@ static void GenerateTargetArgs(const TargetOptions &Opts, const TargetOptions *TargetOpts = &Opts; #define TARGET_OPTION_WITH_MARSHALLING(...) \ GENERATE_OPTION_WITH_MARSHALLING(Consumer, __VA_ARGS__) -#include "clang/Driver/Options.inc" +#include "clang/Options/Options.inc" #undef TARGET_OPTION_WITH_MARSHALLING if (!Opts.SDKVersion.empty()) @@ -5096,7 +5096,7 @@ static bool ParseTargetArgs(TargetOptions &Opts, ArgList &Args, #define TARGET_OPTION_WITH_MARSHALLING(...) \ PARSE_OPTION_WITH_MARSHALLING(Args, Diags, __VA_ARGS__) -#include "clang/Driver/Options.inc" +#include "clang/Options/Options.inc" #undef TARGET_OPTION_WITH_MARSHALLING if (Arg *A = Args.getLastArg(options::OPT_target_sdk_version_EQ)) { diff --git a/clang/lib/Frontend/CreateInvocationFromCommandLine.cpp b/clang/lib/Frontend/CreateInvocationFromCommandLine.cpp index 99212b81fe064..73b81ed906808 100644 --- a/clang/lib/Frontend/CreateInvocationFromCommandLine.cpp +++ b/clang/lib/Frontend/CreateInvocationFromCommandLine.cpp @@ -14,11 +14,11 @@ #include "clang/Driver/Action.h" #include "clang/Driver/Compilation.h" #include "clang/Driver/Driver.h" -#include "clang/Driver/Options.h" #include "clang/Driver/Tool.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/FrontendDiagnostic.h" #include "clang/Frontend/Utils.h" +#include "clang/Options/Options.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Option/ArgList.h" @@ -61,11 +61,11 @@ clang::createInvocation(ArrayRef ArgList, if (!C) return nullptr; - if (C->getArgs().hasArg(driver::options::OPT_fdriver_only)) + if (C->getArgs().hasArg(options::OPT_fdriver_only)) return nullptr; // Just print the cc1 options if -### was present. - if (C->getArgs().hasArg(driver::options::OPT__HASH_HASH_HASH)) { + if (C->getArgs().hasArg(options::OPT__HASH_HASH_HASH)) { C->getJobs().Print(llvm::errs(), "\n", true); return nullptr; } diff --git a/clang/lib/FrontendTool/CMakeLists.txt b/clang/lib/FrontendTool/CMakeLists.txt index 061e54c3e62d0..66213f76eb968 100644 --- a/clang/lib/FrontendTool/CMakeLists.txt +++ b/clang/lib/FrontendTool/CMakeLists.txt @@ -7,6 +7,7 @@ set(link_libs clangBasic clangCodeGen clangDriver + clangOptions clangExtractAPI clangFrontend clangRewriteFrontend diff --git a/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp b/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp index c8aad4daa1c10..e571193c6a9c8 100644 --- a/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp +++ b/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp @@ -13,7 +13,6 @@ #include "clang/CodeGen/CodeGenAction.h" #include "clang/Config/config.h" -#include "clang/Driver/Options.h" #include "clang/ExtractAPI/FrontendActions.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/CompilerInvocation.h" @@ -22,6 +21,7 @@ #include "clang/Frontend/FrontendPluginRegistry.h" #include "clang/Frontend/Utils.h" #include "clang/FrontendTool/Utils.h" +#include "clang/Options/Options.h" #include "clang/Rewrite/Frontend/FrontendActions.h" #include "clang/StaticAnalyzer/Frontend/AnalyzerHelpFlags.h" #include "clang/StaticAnalyzer/Frontend/FrontendActions.h" @@ -215,11 +215,11 @@ bool ExecuteCompilerInvocation(CompilerInstance *Clang) { // Honor -help. if (Clang->getFrontendOpts().ShowHelp) { - driver::getDriverOptTable().printHelp( + getDriverOptTable().printHelp( llvm::outs(), "clang -cc1 [options] file...", "LLVM 'Clang' Compiler: http://clang.llvm.org", /*ShowHidden=*/false, /*ShowAllAliases=*/false, - llvm::opt::Visibility(driver::options::CC1Option)); + llvm::opt::Visibility(options::CC1Option)); return true; } diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp index 76338065d2231..7764fa7dc92b9 100644 --- a/clang/lib/Interpreter/Interpreter.cpp +++ b/clang/lib/Interpreter/Interpreter.cpp @@ -33,7 +33,6 @@ #include "clang/Driver/Compilation.h" #include "clang/Driver/Driver.h" #include "clang/Driver/Job.h" -#include "clang/Driver/Options.h" #include "clang/Driver/Tool.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/FrontendAction.h" @@ -43,6 +42,7 @@ #include "clang/Interpreter/Interpreter.h" #include "clang/Interpreter/Value.h" #include "clang/Lex/PreprocessorOptions.h" +#include "clang/Options/Options.h" #include "clang/Sema/Lookup.h" #include "clang/Serialization/ObjectFilePCHContainerReader.h" #include "llvm/ExecutionEngine/JITSymbol.h" @@ -185,7 +185,7 @@ IncrementalCompilerBuilder::create(std::string TT, llvm::ArrayRef RF = llvm::ArrayRef(ClangArgv); std::unique_ptr Compilation(Driver.BuildCompilation(RF)); - if (Compilation->getArgs().hasArg(driver::options::OPT_v)) + if (Compilation->getArgs().hasArg(options::OPT_v)) Compilation->getJobs().Print(llvm::errs(), "\n", /*Quote=*/false); auto ErrOrCC1Args = GetCC1Arguments(&Diags, Compilation.get()); diff --git a/clang/lib/Interpreter/InterpreterUtils.h b/clang/lib/Interpreter/InterpreterUtils.h index fbf9814b0d4a7..4efe8b9fbc6cc 100644 --- a/clang/lib/Interpreter/InterpreterUtils.h +++ b/clang/lib/Interpreter/InterpreterUtils.h @@ -21,11 +21,11 @@ #include "clang/Driver/Compilation.h" #include "clang/Driver/Driver.h" #include "clang/Driver/Job.h" -#include "clang/Driver/Options.h" #include "clang/Driver/Tool.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/TextDiagnosticBuffer.h" #include "clang/Lex/PreprocessorOptions.h" +#include "clang/Options/Options.h" #include "clang/Sema/Lookup.h" #include "llvm/IR/Module.h" diff --git a/clang/lib/Options/CMakeLists.txt b/clang/lib/Options/CMakeLists.txt new file mode 100644 index 0000000000000..a762e9918b41c --- /dev/null +++ b/clang/lib/Options/CMakeLists.txt @@ -0,0 +1,18 @@ +set(LLVM_LINK_COMPONENTS + Option + Support +) + +add_clang_library(clangOptions + DriverOptions.cpp + OptionUtils.cpp + + DEPENDS + ClangDriverOptions + # These generated headers are included transitively. + target_parser_gen + + LINK_LIBS + clangBasic + ${system_libs} +) diff --git a/clang/lib/Driver/DriverOptions.cpp b/clang/lib/Options/DriverOptions.cpp similarity index 76% rename from clang/lib/Driver/DriverOptions.cpp rename to clang/lib/Options/DriverOptions.cpp index cde1f8989935b..d91e9291fb2f6 100644 --- a/clang/lib/Driver/DriverOptions.cpp +++ b/clang/lib/Options/DriverOptions.cpp @@ -6,33 +6,32 @@ // //===----------------------------------------------------------------------===// -#include "clang/Driver/Options.h" +#include "clang/Options/Options.h" #include "llvm/Option/OptTable.h" #include -using namespace clang::driver; -using namespace clang::driver::options; +using namespace clang::options; using namespace llvm::opt; #define OPTTABLE_STR_TABLE_CODE -#include "clang/Driver/Options.inc" +#include "clang/Options/Options.inc" #undef OPTTABLE_STR_TABLE_CODE #define OPTTABLE_VALUES_CODE -#include "clang/Driver/Options.inc" +#include "clang/Options/Options.inc" #undef OPTTABLE_VALUES_CODE #define OPTTABLE_PREFIXES_TABLE_CODE -#include "clang/Driver/Options.inc" +#include "clang/Options/Options.inc" #undef OPTTABLE_PREFIXES_TABLE_CODE #define OPTTABLE_PREFIXES_UNION_CODE -#include "clang/Driver/Options.inc" +#include "clang/Options/Options.inc" #undef OPTTABLE_PREFIXES_UNION_CODE static constexpr OptTable::Info InfoTable[] = { #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__), -#include "clang/Driver/Options.inc" +#include "clang/Options/Options.inc" #undef OPTION }; @@ -44,9 +43,9 @@ class DriverOptTable : public PrecomputedOptTable { : PrecomputedOptTable(OptionStrTable, OptionPrefixesTable, InfoTable, OptionPrefixesUnion) {} }; -} +} // anonymous namespace -const llvm::opt::OptTable &clang::driver::getDriverOptTable() { +const llvm::opt::OptTable &clang::getDriverOptTable() { static DriverOptTable Table; return Table; } diff --git a/clang/lib/Driver/OptionUtils.cpp b/clang/lib/Options/OptionUtils.cpp similarity index 97% rename from clang/lib/Driver/OptionUtils.cpp rename to clang/lib/Options/OptionUtils.cpp index 1f36ffc03cab3..fcafd3c83c6b3 100644 --- a/clang/lib/Driver/OptionUtils.cpp +++ b/clang/lib/Options/OptionUtils.cpp @@ -6,9 +6,9 @@ // //===----------------------------------------------------------------------===// +#include "clang/Options/OptionUtils.h" #include "clang/Basic/Diagnostic.h" #include "clang/Basic/DiagnosticDriver.h" -#include "clang/Driver/OptionUtils.h" #include "llvm/Option/ArgList.h" using namespace clang; diff --git a/clang/lib/Tooling/CMakeLists.txt b/clang/lib/Tooling/CMakeLists.txt index fc1f1f9f9d367..faaa53276d0e6 100644 --- a/clang/lib/Tooling/CMakeLists.txt +++ b/clang/lib/Tooling/CMakeLists.txt @@ -40,6 +40,7 @@ add_clang_library(clangTooling clangASTMatchers clangBasic clangDriver + clangOptions clangFormat clangFrontend clangLex diff --git a/clang/lib/Tooling/InterpolatingCompilationDatabase.cpp b/clang/lib/Tooling/InterpolatingCompilationDatabase.cpp index 28568426a6c48..e9b72388ae4df 100644 --- a/clang/lib/Tooling/InterpolatingCompilationDatabase.cpp +++ b/clang/lib/Tooling/InterpolatingCompilationDatabase.cpp @@ -44,8 +44,8 @@ #include "clang/Basic/LangStandard.h" #include "clang/Driver/Driver.h" -#include "clang/Driver/Options.h" #include "clang/Driver/Types.h" +#include "clang/Options/Options.h" #include "clang/Tooling/CompilationDatabase.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" @@ -164,11 +164,11 @@ struct TransferableCommand { // We parse each argument individually so that we can retain the exact // spelling of each argument; re-rendering is lossy for aliased flags. // E.g. in CL mode, /W4 maps to -Wall. - auto &OptTable = clang::driver::getDriverOptTable(); + auto &OptTable = getDriverOptTable(); if (!OldArgs.empty()) Cmd.CommandLine.emplace_back(OldArgs.front()); for (unsigned Pos = 1; Pos < OldArgs.size();) { - using namespace driver::options; + using namespace options; const unsigned OldPos = Pos; std::unique_ptr Arg(OptTable.ParseOneArg( @@ -296,14 +296,14 @@ struct TransferableCommand { // Try to interpret the argument as a type specifier, e.g. '-x'. std::optional tryParseTypeArg(const llvm::opt::Arg &Arg) { const llvm::opt::Option &Opt = Arg.getOption(); - using namespace driver::options; + using namespace options; if (ClangCLMode) { if (Opt.matches(OPT__SLASH_TC) || Opt.matches(OPT__SLASH_Tc)) return types::TY_C; if (Opt.matches(OPT__SLASH_TP) || Opt.matches(OPT__SLASH_Tp)) return types::TY_CXX; } else { - if (Opt.matches(driver::options::OPT_x)) + if (Opt.matches(options::OPT_x)) return types::lookupTypeForTypeSpecifier(Arg.getValue()); } return std::nullopt; @@ -311,7 +311,7 @@ struct TransferableCommand { // Try to interpret the argument as '-std='. std::optional tryParseStdArg(const llvm::opt::Arg &Arg) { - using namespace driver::options; + using namespace options; if (Arg.getOption().matches(ClangCLMode ? OPT__SLASH_std : OPT_std_EQ)) { // "c++latest" is not a recognized LangStandard, but it's accepted by // the clang driver in CL mode. diff --git a/clang/lib/Tooling/Tooling.cpp b/clang/lib/Tooling/Tooling.cpp index e8eef5ed9c9fa..1f6a5c94601fc 100644 --- a/clang/lib/Tooling/Tooling.cpp +++ b/clang/lib/Tooling/Tooling.cpp @@ -21,7 +21,6 @@ #include "clang/Driver/Compilation.h" #include "clang/Driver/Driver.h" #include "clang/Driver/Job.h" -#include "clang/Driver/Options.h" #include "clang/Driver/Tool.h" #include "clang/Driver/ToolChain.h" #include "clang/Frontend/ASTUnit.h" @@ -32,6 +31,7 @@ #include "clang/Frontend/TextDiagnosticPrinter.h" #include "clang/Lex/HeaderSearchOptions.h" #include "clang/Lex/PreprocessorOptions.h" +#include "clang/Options/Options.h" #include "clang/Tooling/ArgumentsAdjusters.h" #include "clang/Tooling/CompilationDatabase.h" #include "llvm/ADT/ArrayRef.h" @@ -270,17 +270,15 @@ void addTargetAndModeForProgramName(std::vector &CommandLine, StringRef InvokedAs) { if (CommandLine.empty() || InvokedAs.empty()) return; - const auto &Table = driver::getDriverOptTable(); + const auto &Table = getDriverOptTable(); // --target=X - StringRef TargetOPT = - Table.getOption(driver::options::OPT_target).getPrefixedName(); + StringRef TargetOPT = Table.getOption(options::OPT_target).getPrefixedName(); // -target X StringRef TargetOPTLegacy = - Table.getOption(driver::options::OPT_target_legacy_spelling) - .getPrefixedName(); + Table.getOption(options::OPT_target_legacy_spelling).getPrefixedName(); // --driver-mode=X StringRef DriverModeOPT = - Table.getOption(driver::options::OPT_driver_mode).getPrefixedName(); + Table.getOption(options::OPT_driver_mode).getPrefixedName(); auto TargetMode = driver::ToolChain::getTargetAndModeFromProgramName(InvokedAs); // No need to search for target args if we don't have a target/mode to insert. diff --git a/clang/test/CIR/CodeGen/cxx-rewritten-binary-operator.cpp b/clang/test/CIR/CodeGen/cxx-rewritten-binary-operator.cpp new file mode 100644 index 0000000000000..ac4cac429cb0f --- /dev/null +++ b/clang/test/CIR/CodeGen/cxx-rewritten-binary-operator.cpp @@ -0,0 +1,39 @@ +// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR +// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-llvm %s -o %t-cir.ll +// RUN: FileCheck --input-file=%t-cir.ll %s -check-prefix=LLVM +// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -Wno-unused-value -emit-llvm %s -o %t.ll +// RUN: FileCheck --input-file=%t.ll %s -check-prefix=OGCG + +struct HasOpEq { + bool operator==(const HasOpEq &) const; +}; + +void cxx_rewritten_binary_operator_scalar_expr() { + HasOpEq a; + HasOpEq b; + bool neq = a != b; +} + +// CIR: %[[A_ADDR:.*]] = cir.alloca !rec_HasOpEq, !cir.ptr, ["a"] +// CIR: %[[B_ADDR:.*]] = cir.alloca !rec_HasOpEq, !cir.ptr, ["b"] +// CIR: %[[NEQ_ADDR:.*]] = cir.alloca !cir.bool, !cir.ptr, ["neq", init] +// CIR: %[[EQ:.*]] = cir.call @_ZNK7HasOpEqeqERKS_(%[[A_ADDR]], %[[B_ADDR]]) : (!cir.ptr, !cir.ptr) -> !cir.bool +// CIR: %[[NEQ:.*]] = cir.unary(not, %[[EQ]]) : !cir.bool, !cir.bool +// CIR: cir.store{{.*}} %[[NEQ]], %[[NEQ_ADDR]] : !cir.bool, !cir.ptr + +// LLVM: %[[A_ADDR:.*]] = alloca %struct.HasOpEq, i64 1, align 1 +// LLVM: %[[B_ADDR:.*]] = alloca %struct.HasOpEq, i64 1, align 1 +// LLVM: %[[NEQ_ADDR:.*]] = alloca i8, i64 1, align 1 +// LLVM: %[[EQ:.*]] = call i1 @_ZNK7HasOpEqeqERKS_(ptr %[[A_ADDR]], ptr %[[B_ADDR]]) +// LLVM: %[[NEQ_I1:.*]] = xor i1 %[[EQ]], true +// LLVM: %[[NEQ:.*]] = zext i1 %[[NEQ_I1]] to i8 +// LLVM: store i8 %[[NEQ]], ptr %[[NEQ_ADDR]], align 1 + +// OGCG: %[[A_ADDR:.*]] = alloca %struct.HasOpEq, align 1 +// OGCG: %[[B_ADDR:.*]] = alloca %struct.HasOpEq, align 1 +// OGCG: %[[NEQ_ADDR:.*]] = alloca i8, align 1 +// OGCG: %[[EQ:.*]] = call {{.*}} zeroext i1 @_ZNK7HasOpEqeqERKS_(ptr {{.*}} %[[A_ADDR]], ptr {{.*}} %[[B_ADDR]]) +// OGCG: %[[NEQ_I1:.*]] = xor i1 %[[EQ]], true +// OGCG: %[[NEQ:.*]] = zext i1 %[[NEQ_I1]] to i8 +// OGCG: store i8 %[[NEQ]], ptr %[[NEQ_ADDR]], align 1 diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c index de4cb2fd0b055..ce8e2f04e487c 100644 --- a/clang/test/CodeGen/X86/avx2-builtins.c +++ b/clang/test/CodeGen/X86/avx2-builtins.c @@ -109,6 +109,9 @@ __m256i test_mm256_alignr_epi8(__m256i a, __m256i b) { // CHECK: shufflevector <32 x i8> %{{.*}}, <32 x i8> %{{.*}}, <32 x i32> return _mm256_alignr_epi8(a, b, 2); } +TEST_CONSTEXPR(match_v32qi(_mm256_alignr_epi8(((__m256i)(__v32qs){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), ((__m256i)(__v32qs){33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}), 2), 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 1, 2, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 17, 18)); +TEST_CONSTEXPR(match_v32qi(_mm256_alignr_epi8(((__m256i)(__v32qs){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), ((__m256i)(__v32qs){33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}), 16), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)); +TEST_CONSTEXPR(match_v32qi(_mm256_alignr_epi8(((__m256i)(__v32qs){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), ((__m256i)(__v32qs){33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}), 32), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); __m256i test2_mm256_alignr_epi8(__m256i a, __m256i b) { // CHECK-LABEL: test2_mm256_alignr_epi8 diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c index 0b73c7b14d869..2749dc5741b58 100644 --- a/clang/test/CodeGen/X86/avx512bw-builtins.c +++ b/clang/test/CodeGen/X86/avx512bw-builtins.c @@ -3057,6 +3057,9 @@ __m512i test_mm512_alignr_epi8(__m512i __A,__m512i __B){ // CHECK: shufflevector <64 x i8> %{{.*}}, <64 x i8> %{{.*}}, <64 x i32> return _mm512_alignr_epi8(__A, __B, 2); } +TEST_CONSTEXPR(match_v64qi(_mm512_alignr_epi8(((__m512i)(__v64qs){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}), ((__m512i)(__v64qs){65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 127}), 2), 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 1, 2, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 17, 18, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 33, 34, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 127, 49, 50)); +TEST_CONSTEXPR(match_v64qi(_mm512_alignr_epi8(((__m512i)(__v64qs){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}), ((__m512i)(__v64qs){65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 127}), 16), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64)); +TEST_CONSTEXPR(match_v64qi(_mm512_alignr_epi8(((__m512i)(__v64qs){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}), ((__m512i)(__v64qs){65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 127}), 32), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); __m512i test_mm512_mask_alignr_epi8(__m512i __W, __mmask64 __U, __m512i __A,__m512i __B){ // CHECK-LABEL: test_mm512_mask_alignr_epi8 @@ -3064,6 +3067,7 @@ __m512i test_mm512_mask_alignr_epi8(__m512i __W, __mmask64 __U, __m512i __A,__m5 // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}} return _mm512_mask_alignr_epi8(__W, __U, __A, __B, 2); } +TEST_CONSTEXPR(match_v64qi(_mm512_mask_alignr_epi8(((__m512i)(__v64qs){127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127}), (__mmask64)0x000000000000000f, ((__m512i)(__v64qs){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}), ((__m512i)(__v64qs){65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 127}), 2), 67, 68, 69, 70, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127)); __m512i test_mm512_maskz_alignr_epi8(__mmask64 __U, __m512i __A,__m512i __B){ // CHECK-LABEL: test_mm512_maskz_alignr_epi8 @@ -3071,6 +3075,7 @@ __m512i test_mm512_maskz_alignr_epi8(__mmask64 __U, __m512i __A,__m512i __B){ // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}} return _mm512_maskz_alignr_epi8(__U, __A, __B, 2); } +TEST_CONSTEXPR(match_v64qi(_mm512_maskz_alignr_epi8((__mmask64)0x000000000000000f, ((__m512i)(__v64qs){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}), ((__m512i)(__v64qs){65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 127}), 2), 67, 68, 69, 70, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); diff --git a/clang/test/CodeGen/X86/avx512vlbw-builtins.c b/clang/test/CodeGen/X86/avx512vlbw-builtins.c index 28e6afbc24564..7a5af2dc8742f 100644 --- a/clang/test/CodeGen/X86/avx512vlbw-builtins.c +++ b/clang/test/CodeGen/X86/avx512vlbw-builtins.c @@ -3538,6 +3538,7 @@ __m128i test_mm_mask_alignr_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128 // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} return _mm_mask_alignr_epi8(__W, __U, __A, __B, 2); } +TEST_CONSTEXPR(match_v16qi(_mm_mask_alignr_epi8(((__m128i)(__v16qs){127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127}), (__mmask16)0x000f, ((__m128i)(__v16qs){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), ((__m128i)(__v16qs){17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), 2), 19, 20, 21, 22, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127)); __m128i test_mm_maskz_alignr_epi8(__mmask16 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_alignr_epi8 @@ -3545,6 +3546,7 @@ __m128i test_mm_maskz_alignr_epi8(__mmask16 __U, __m128i __A, __m128i __B) { // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} return _mm_maskz_alignr_epi8(__U, __A, __B, 2); } +TEST_CONSTEXPR(match_v16qi( _mm_maskz_alignr_epi8((__mmask16)0x000f, ((__m128i)(__v16qs){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), ((__m128i)(__v16qs){17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}),2), 19, 20, 21, 22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); __m256i test_mm256_mask_alignr_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_alignr_epi8 @@ -3552,6 +3554,7 @@ __m256i test_mm256_mask_alignr_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} return _mm256_mask_alignr_epi8(__W, __U, __A, __B, 2); } +TEST_CONSTEXPR(match_v32qi(_mm256_mask_alignr_epi8(((__m256i)(__v32qs){127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127}), (__mmask32)0xf000000f, ((__m256i)(__v32qs){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), ((__m256i)(__v32qs){33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}), 2), 35, 36, 37, 38, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 63, 64, 17, 18)); __m256i test_mm256_maskz_alignr_epi8(__mmask32 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_alignr_epi8 @@ -3559,6 +3562,7 @@ __m256i test_mm256_maskz_alignr_epi8(__mmask32 __U, __m256i __A, __m256i __B) { // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} return _mm256_maskz_alignr_epi8(__U, __A, __B, 2); } +TEST_CONSTEXPR(match_v32qi(_mm256_maskz_alignr_epi8((__mmask32)0xf000000f, ((__m256i)(__v32qs){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), ((__m256i)(__v32qs){33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}), 2), 35, 36, 37, 38, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 63, 64, 17, 18)); __m128i test_mm_dbsad_epu8(__m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_dbsad_epu8 diff --git a/clang/test/CodeGen/X86/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c index 273138063a1b1..ad8a81c61ad43 100644 --- a/clang/test/CodeGen/X86/mmx-builtins.c +++ b/clang/test/CodeGen/X86/mmx-builtins.c @@ -102,6 +102,8 @@ __m64 test_mm_alignr_pi8(__m64 a, __m64 b) { // CHECK: shufflevector <16 x i8> {{%.*}}, <16 x i8> zeroinitializer, <16 x i32> return _mm_alignr_pi8(a, b, 2); } +TEST_CONSTEXPR(match_v8qi(_mm_alignr_pi8(((__m64)(__v8qs){1, 2, 3, 4, 5, 6, 7, 8}), ((__m64)(__v8qs){9, 10, 11, 12, 13, 14, 15, 16}), 2), 11, 12, 13, 14, 15, 16, 1, 2)); +TEST_CONSTEXPR(match_v8qi(_mm_alignr_pi8(((__m64)(__v8qs){1, 2, 3, 4, 5, 6, 7, 8}), ((__m64)(__v8qs){9, 10, 11, 12, 13, 14, 15, 16}), 16), 0, 0, 0, 0, 0, 0, 0, 0)); __m64 test_mm_and_si64(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_and_si64 diff --git a/clang/test/CodeGen/X86/ssse3-builtins.c b/clang/test/CodeGen/X86/ssse3-builtins.c index b7a4a2fe7ccd7..193fa37f65d14 100644 --- a/clang/test/CodeGen/X86/ssse3-builtins.c +++ b/clang/test/CodeGen/X86/ssse3-builtins.c @@ -48,6 +48,8 @@ __m128i test_mm_alignr_epi8(__m128i a, __m128i b) { // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> return _mm_alignr_epi8(a, b, 2); } +TEST_CONSTEXPR(match_v16qi(_mm_alignr_epi8(((__m128i)(__v16qi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), ((__m128i)(__v16qi){17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), 2), 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2)); +TEST_CONSTEXPR(match_v16qi(_mm_alignr_epi8(((__m128i)(__v16qi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), ((__m128i)(__v16qi){17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), 32), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); __m128i test2_mm_alignr_epi8(__m128i a, __m128i b) { // CHECK-LABEL: test2_mm_alignr_epi8 diff --git a/clang/tools/clang-check/CMakeLists.txt b/clang/tools/clang-check/CMakeLists.txt index 5493aa4237aee..1efcc91fcaec9 100644 --- a/clang/tools/clang-check/CMakeLists.txt +++ b/clang/tools/clang-check/CMakeLists.txt @@ -14,6 +14,7 @@ clang_target_link_libraries(clang-check clangBasic clangDriver clangFrontend + clangOptions clangRewriteFrontend clangSerialization clangStaticAnalyzerFrontend diff --git a/clang/tools/clang-check/ClangCheck.cpp b/clang/tools/clang-check/ClangCheck.cpp index fa6dd06a1ee58..80255c647b98f 100644 --- a/clang/tools/clang-check/ClangCheck.cpp +++ b/clang/tools/clang-check/ClangCheck.cpp @@ -16,9 +16,9 @@ //===----------------------------------------------------------------------===// #include "clang/AST/ASTConsumer.h" -#include "clang/Driver/Options.h" #include "clang/Frontend/ASTConsumers.h" #include "clang/Frontend/CompilerInstance.h" +#include "clang/Options/Options.h" #include "clang/Rewrite/Frontend/FixItRewriter.h" #include "clang/Rewrite/Frontend/FrontendActions.h" #include "clang/StaticAnalyzer/Frontend/FrontendActions.h" @@ -34,8 +34,8 @@ #include "llvm/Support/Signals.h" #include "llvm/Support/TargetSelect.h" -using namespace clang::driver; using namespace clang::tooling; +using namespace clang; using namespace llvm; static cl::extrahelp CommonHelp(CommonOptionsParser::HelpMessage); diff --git a/clang/tools/clang-installapi/CMakeLists.txt b/clang/tools/clang-installapi/CMakeLists.txt index 9c0d9dff7dc7f..54bc80486472f 100644 --- a/clang/tools/clang-installapi/CMakeLists.txt +++ b/clang/tools/clang-installapi/CMakeLists.txt @@ -25,6 +25,7 @@ clang_target_link_libraries(clang-installapi clangAST clangInstallAPI clangBasic + clangOptions clangDriver clangFrontend clangTooling diff --git a/clang/tools/clang-installapi/ClangInstallAPI.cpp b/clang/tools/clang-installapi/ClangInstallAPI.cpp index 4e66485343b89..8bef9690ad855 100644 --- a/clang/tools/clang-installapi/ClangInstallAPI.cpp +++ b/clang/tools/clang-installapi/ClangInstallAPI.cpp @@ -35,7 +35,7 @@ using namespace clang; using namespace clang::installapi; -using namespace clang::driver::options; +using namespace clang::options; using namespace llvm::opt; using namespace llvm::MachO; @@ -71,7 +71,7 @@ static bool runFrontend(StringRef ProgName, Twine Label, bool Verbose, static bool run(ArrayRef Args, const char *ProgName) { // Setup Diagnostics engine. DiagnosticOptions DiagOpts; - const llvm::opt::OptTable &ClangOpts = clang::driver::getDriverOptTable(); + const llvm::opt::OptTable &ClangOpts = getDriverOptTable(); unsigned MissingArgIndex, MissingArgCount; llvm::opt::InputArgList ParsedArgs = ClangOpts.ParseArgs( ArrayRef(Args).slice(1), MissingArgIndex, MissingArgCount); diff --git a/clang/tools/clang-installapi/Options.cpp b/clang/tools/clang-installapi/Options.cpp index 64324a3f8b010..f484d6f33ad8f 100644 --- a/clang/tools/clang-installapi/Options.cpp +++ b/clang/tools/clang-installapi/Options.cpp @@ -26,8 +26,6 @@ using namespace llvm; using namespace llvm::opt; using namespace llvm::MachO; -namespace drv = clang::driver::options; - namespace clang { namespace installapi { @@ -109,7 +107,7 @@ getArgListFromJSON(const StringRef Input, llvm::opt::OptTable *Table, bool Options::processDriverOptions(InputArgList &Args) { // Handle inputs. - for (const StringRef Path : Args.getAllArgValues(drv::OPT_INPUT)) { + for (const StringRef Path : Args.getAllArgValues(options::OPT_INPUT)) { // Assume any input that is not a directory is a filelist. // InstallAPI does not accept multiple directories, so retain the last one. if (FM->getOptionalDirectoryRef(Path)) @@ -120,7 +118,7 @@ bool Options::processDriverOptions(InputArgList &Args) { // Handle output. SmallString OutputPath; - if (auto *Arg = Args.getLastArg(drv::OPT_o)) { + if (auto *Arg = Args.getLastArg(options::OPT_o)) { OutputPath = Arg->getValue(); if (OutputPath != "-") FM->makeAbsolutePath(OutputPath); @@ -132,10 +130,10 @@ bool Options::processDriverOptions(InputArgList &Args) { } // Do basic error checking first for mixing -target and -arch options. - auto *ArgArch = Args.getLastArgNoClaim(drv::OPT_arch); - auto *ArgTarget = Args.getLastArgNoClaim(drv::OPT_target); + auto *ArgArch = Args.getLastArgNoClaim(options::OPT_arch); + auto *ArgTarget = Args.getLastArgNoClaim(options::OPT_target); auto *ArgTargetVariant = - Args.getLastArgNoClaim(drv::OPT_darwin_target_variant); + Args.getLastArgNoClaim(options::OPT_darwin_target_variant); if (ArgArch && (ArgTarget || ArgTargetVariant)) { Diags->Report(clang::diag::err_drv_argument_not_allowed_with) << ArgArch->getAsString(Args) @@ -143,7 +141,7 @@ bool Options::processDriverOptions(InputArgList &Args) { return false; } - auto *ArgMinTargetOS = Args.getLastArgNoClaim(drv::OPT_mtargetos_EQ); + auto *ArgMinTargetOS = Args.getLastArgNoClaim(options::OPT_mtargetos_EQ); if ((ArgTarget || ArgTargetVariant) && ArgMinTargetOS) { Diags->Report(clang::diag::err_drv_cannot_mix_options) << ArgTarget->getAsString(Args) << ArgMinTargetOS->getAsString(Args); @@ -152,7 +150,7 @@ bool Options::processDriverOptions(InputArgList &Args) { // Capture target triples first. if (ArgTarget) { - for (const Arg *A : Args.filtered(drv::OPT_target)) { + for (const Arg *A : Args.filtered(options::OPT_target)) { A->claim(); llvm::Triple TargetTriple(A->getValue()); Target TAPITarget = Target(TargetTriple); @@ -168,7 +166,7 @@ bool Options::processDriverOptions(InputArgList &Args) { // Capture target variants. DriverOpts.Zippered = ArgTargetVariant != nullptr; - for (Arg *A : Args.filtered(drv::OPT_darwin_target_variant)) { + for (Arg *A : Args.filtered(options::OPT_darwin_target_variant)) { A->claim(); Triple Variant(A->getValue()); if (Variant.getVendor() != Triple::Apple) { @@ -213,7 +211,7 @@ bool Options::processDriverOptions(InputArgList &Args) { DriverOpts.Targets[TAPIVariant] = Variant; } - DriverOpts.Verbose = Args.hasArgNoClaim(drv::OPT_v); + DriverOpts.Verbose = Args.hasArgNoClaim(options::OPT_v); return true; } @@ -407,7 +405,7 @@ bool Options::processOptionList(InputArgList &Args, bool Options::processLinkerOptions(InputArgList &Args) { // Handle required arguments. - if (const Arg *A = Args.getLastArg(drv::OPT_install__name)) + if (const Arg *A = Args.getLastArg(options::OPT_install__name)) LinkerOpts.InstallName = A->getValue(); if (LinkerOpts.InstallName.empty()) { Diags->Report(diag::err_no_install_name); @@ -415,28 +413,29 @@ bool Options::processLinkerOptions(InputArgList &Args) { } // Defaulted or optional arguments. - if (auto *Arg = Args.getLastArg(drv::OPT_current__version)) + if (auto *Arg = Args.getLastArg(options::OPT_current__version)) LinkerOpts.CurrentVersion.parse64(Arg->getValue()); - if (auto *Arg = Args.getLastArg(drv::OPT_compatibility__version)) + if (auto *Arg = Args.getLastArg(options::OPT_compatibility__version)) LinkerOpts.CompatVersion.parse64(Arg->getValue()); - if (auto *Arg = Args.getLastArg(drv::OPT_compatibility__version)) + if (auto *Arg = Args.getLastArg(options::OPT_compatibility__version)) LinkerOpts.CompatVersion.parse64(Arg->getValue()); - if (auto *Arg = Args.getLastArg(drv::OPT_umbrella)) + if (auto *Arg = Args.getLastArg(options::OPT_umbrella)) LinkerOpts.ParentUmbrella = Arg->getValue(); - LinkerOpts.IsDylib = Args.hasArg(drv::OPT_dynamiclib); + LinkerOpts.IsDylib = Args.hasArg(options::OPT_dynamiclib); - for (auto *Arg : Args.filtered(drv::OPT_alias_list)) { + for (auto *Arg : Args.filtered(options::OPT_alias_list)) { LinkerOpts.AliasLists.emplace_back(Arg->getValue()); Arg->claim(); } - LinkerOpts.AppExtensionSafe = Args.hasFlag( - drv::OPT_fapplication_extension, drv::OPT_fno_application_extension, - /*Default=*/LinkerOpts.AppExtensionSafe); + LinkerOpts.AppExtensionSafe = + Args.hasFlag(options::OPT_fapplication_extension, + options::OPT_fno_application_extension, + /*Default=*/LinkerOpts.AppExtensionSafe); if (::getenv("LD_NO_ENCRYPT") != nullptr) LinkerOpts.AppExtensionSafe = true; @@ -446,7 +445,7 @@ bool Options::processLinkerOptions(InputArgList &Args) { // Capture library paths. PathSeq LibraryPaths; - for (const Arg *A : Args.filtered(drv::OPT_L)) { + for (const Arg *A : Args.filtered(options::OPT_L)) { LibraryPaths.emplace_back(A->getValue()); A->claim(); } @@ -461,7 +460,7 @@ bool Options::processLinkerOptions(InputArgList &Args) { // invocations. bool Options::processFrontendOptions(InputArgList &Args) { // Capture language mode. - if (auto *A = Args.getLastArgNoClaim(drv::OPT_x)) { + if (auto *A = Args.getLastArgNoClaim(options::OPT_x)) { FEOpts.LangMode = llvm::StringSwitch(A->getValue()) .Case("c", clang::Language::C) .Case("c++", clang::Language::CXX) @@ -475,15 +474,15 @@ bool Options::processFrontendOptions(InputArgList &Args) { return false; } } - for (auto *A : Args.filtered(drv::OPT_ObjC, drv::OPT_ObjCXX)) { - if (A->getOption().matches(drv::OPT_ObjC)) + for (auto *A : Args.filtered(options::OPT_ObjC, options::OPT_ObjCXX)) { + if (A->getOption().matches(options::OPT_ObjC)) FEOpts.LangMode = clang::Language::ObjC; else FEOpts.LangMode = clang::Language::ObjCXX; } // Capture Sysroot. - if (const Arg *A = Args.getLastArgNoClaim(drv::OPT_isysroot)) { + if (const Arg *A = Args.getLastArgNoClaim(options::OPT_isysroot)) { SmallString Path(A->getValue()); FM->makeAbsolutePath(Path); if (!FM->getOptionalDirectoryRef(Path)) { @@ -502,13 +501,13 @@ bool Options::processFrontendOptions(InputArgList &Args) { } // Capture system frameworks for all platforms. - for (const Arg *A : Args.filtered(drv::OPT_iframework)) + for (const Arg *A : Args.filtered(options::OPT_iframework)) FEOpts.SystemFwkPaths.emplace_back(A->getValue(), std::optional{}); // Capture framework paths. PathSeq FrameworkPaths; - for (const Arg *A : Args.filtered(drv::OPT_F)) + for (const Arg *A : Args.filtered(options::OPT_F)) FrameworkPaths.emplace_back(A->getValue()); if (!FrameworkPaths.empty()) diff --git a/clang/tools/driver/CMakeLists.txt b/clang/tools/driver/CMakeLists.txt index a9fa61f9f75ee..db9e0de9b59e0 100644 --- a/clang/tools/driver/CMakeLists.txt +++ b/clang/tools/driver/CMakeLists.txt @@ -63,6 +63,7 @@ clang_target_link_libraries(clang clangDriver clangFrontend clangFrontendTool + clangOptions clangSerialization ) diff --git a/clang/tools/driver/cc1_main.cpp b/clang/tools/driver/cc1_main.cpp index 52cffa4ccbe1f..2aef75597fc5f 100644 --- a/clang/tools/driver/cc1_main.cpp +++ b/clang/tools/driver/cc1_main.cpp @@ -17,7 +17,6 @@ #include "clang/CodeGen/ObjectFilePCHContainerWriter.h" #include "clang/Config/config.h" #include "clang/Driver/DriverDiagnostic.h" -#include "clang/Driver/Options.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/CompilerInvocation.h" #include "clang/Frontend/FrontendDiagnostic.h" @@ -25,6 +24,7 @@ #include "clang/Frontend/TextDiagnosticPrinter.h" #include "clang/Frontend/Utils.h" #include "clang/FrontendTool/Utils.h" +#include "clang/Options/Options.h" #include "clang/Serialization/ObjectFilePCHContainerReader.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" diff --git a/clang/tools/driver/cc1as_main.cpp b/clang/tools/driver/cc1as_main.cpp index 50da2f8449a22..f13812f2a8383 100644 --- a/clang/tools/driver/cc1as_main.cpp +++ b/clang/tools/driver/cc1as_main.cpp @@ -14,10 +14,10 @@ #include "clang/Basic/Diagnostic.h" #include "clang/Basic/DiagnosticOptions.h" #include "clang/Driver/DriverDiagnostic.h" -#include "clang/Driver/Options.h" #include "clang/Frontend/FrontendDiagnostic.h" #include "clang/Frontend/TextDiagnosticPrinter.h" #include "clang/Frontend/Utils.h" +#include "clang/Options/Options.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" @@ -59,8 +59,7 @@ #include #include using namespace clang; -using namespace clang::driver; -using namespace clang::driver::options; +using namespace clang::options; using namespace llvm; using namespace llvm::opt; @@ -688,8 +687,7 @@ int cc1as_main(ArrayRef Argv, const char *Argv0, void *MainAddr) { getDriverOptTable().printHelp( llvm::outs(), "clang -cc1as [options] file...", "Clang Integrated Assembler", /*ShowHidden=*/false, - /*ShowAllAliases=*/false, - llvm::opt::Visibility(driver::options::CC1AsOption)); + /*ShowAllAliases=*/false, llvm::opt::Visibility(options::CC1AsOption)); return 0; } diff --git a/clang/tools/driver/driver.cpp b/clang/tools/driver/driver.cpp index 6cf64d70c9399..7dd50b9f3e26a 100644 --- a/clang/tools/driver/driver.cpp +++ b/clang/tools/driver/driver.cpp @@ -18,13 +18,13 @@ #include "clang/Config/config.h" #include "clang/Driver/Compilation.h" #include "clang/Driver/DriverDiagnostic.h" -#include "clang/Driver/Options.h" #include "clang/Driver/ToolChain.h" #include "clang/Frontend/ChainedDiagnosticConsumer.h" #include "clang/Frontend/CompilerInvocation.h" #include "clang/Frontend/SerializedDiagnosticPrinter.h" #include "clang/Frontend/TextDiagnosticPrinter.h" #include "clang/Frontend/Utils.h" +#include "clang/Options/Options.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" @@ -350,8 +350,8 @@ int clang_main(int Argc, char **Argv, const llvm::ToolContext &ToolContext) { unsigned NumParallelJobs = getLastArgIntValue(ArgList, options::OPT_parallel_jobs_EQ, 1, Diags); UseNewCC1Process = - ArgList.hasFlag(clang::driver::options::OPT_fno_integrated_cc1, - clang::driver::options::OPT_fintegrated_cc1, + ArgList.hasFlag(clang::options::OPT_fno_integrated_cc1, + clang::options::OPT_fintegrated_cc1, /*Default=*/NumParallelJobs > 1 ? true : CLANG_SPAWN_CC1); if (!DiagOpts->DiagnosticSerializationFile.empty()) { diff --git a/clang/unittests/Driver/DXCModeTest.cpp b/clang/unittests/Driver/DXCModeTest.cpp index 62274235c53f5..e0454f190b35a 100644 --- a/clang/unittests/Driver/DXCModeTest.cpp +++ b/clang/unittests/Driver/DXCModeTest.cpp @@ -131,8 +131,8 @@ TEST(DxcModeTest, ValidatorVersionValidation) { TC.TranslateArgs(*DAL, "0", Action::OffloadKind::OFK_None)}; EXPECT_NE(TranslatedArgs, nullptr); if (TranslatedArgs) { - auto *A = TranslatedArgs->getLastArg( - clang::driver::options::OPT_dxil_validator_version); + auto *A = + TranslatedArgs->getLastArg(clang::options::OPT_dxil_validator_version); EXPECT_NE(A, nullptr); if (A) { EXPECT_STREQ(A->getValue(), "1.1"); diff --git a/clang/www/OpenProjects.html b/clang/www/OpenProjects.html index ae0ec1d4d12cb..3e5e84b5b2ed4 100755 --- a/clang/www/OpenProjects.html +++ b/clang/www/OpenProjects.html @@ -38,7 +38,7 @@

Open Clang Projects

  • documenting diagnostic group flags (adding code examples of what is diagnosed, or other relevant information), or
  • -
  • documenting +
  • documenting command line options, or
  • help with completing other missing documentation.
  • diff --git a/flang/docs/CMakeLists.txt b/flang/docs/CMakeLists.txt index 568f942cb4aa6..b183d6add1059 100644 --- a/flang/docs/CMakeLists.txt +++ b/flang/docs/CMakeLists.txt @@ -88,7 +88,7 @@ function (gen_rst_file_from_td output_file td_option source target) endif() get_filename_component(TABLEGEN_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/${source}" DIRECTORY) list(APPEND LLVM_TABLEGEN_FLAGS "-I${TABLEGEN_INCLUDE_DIR}") - list(APPEND LLVM_TABLEGEN_FLAGS "-I${CMAKE_CURRENT_SOURCE_DIR}/../../clang/include/clang/Driver/") + list(APPEND LLVM_TABLEGEN_FLAGS "-I${CMAKE_CURRENT_SOURCE_DIR}/../../clang/include/clang/Options/") clang_tablegen(Source/${output_file} ${td_option} SOURCE ${source} TARGET ${target}) endfunction() diff --git a/flang/docs/FlangDriver.md b/flang/docs/FlangDriver.md index 3286171bb1499..9953f2252218b 100644 --- a/flang/docs/FlangDriver.md +++ b/flang/docs/FlangDriver.md @@ -76,7 +76,7 @@ will ignore it when used without `Xflang`. As hinted above, `flang` and `flang -fc1` are two separate tools. The fact that these tools are accessed through one binary, `flang`, is just an implementation detail. Each tool has a separate list of options, albeit defined -in the same file: `clang/include/clang/Driver/Options.td`. +in the same file: `clang/include/clang/Options/Options.td`. The separation helps us split various tasks and allows us to implement more specialised tools. In particular, `flang` is not aware of various @@ -112,7 +112,7 @@ in terms of Clang's driver library, `clangDriver`. This approach allows us to: as linkers and assemblers. One implication of this dependency on Clang is that all of Flang's compiler options are defined alongside Clang's options in -`clang/include/clang/Driver/Options.td`. For options that are common for both +`clang/include/clang/Options/Options.td`. For options that are common for both Flang and Clang, the corresponding definitions are shared. Internally, a `clangDriver` based compiler driver works by creating actions @@ -242,7 +242,7 @@ Adding a new compiler option in Flang consists of two steps: ### Option Definition All of Flang's compiler and frontend driver options are defined in -`clang/include/clang/Driver/Options.td` in Clang. When adding a new option to +`clang/include/clang/Options/Options.td` in Clang. When adding a new option to Flang, you will either: * extend the existing definition for an option that is already available in one of Clang's drivers (e.g. `clang`), but not yet available in Flang, or @@ -314,7 +314,7 @@ add, you will have to add a dedicated entry in that enum (e.g. `ParseSyntaxOnly` for `-fsyntax-only`) and a corresponding `case` in `ParseFrontendArgs` function in the `CompilerInvocation.cpp` file, e.g.: ```cpp - case clang::driver::options::OPT_fsyntax_only: + case clang::options::OPT_fsyntax_only: opts.programAction = ParseSyntaxOnly; break; ``` diff --git a/flang/include/flang/Evaluate/tools.h b/flang/include/flang/Evaluate/tools.h index 7f64d230f7348..4248e3a5461f5 100644 --- a/flang/include/flang/Evaluate/tools.h +++ b/flang/include/flang/Evaluate/tools.h @@ -1110,6 +1110,9 @@ bool IsArraySection(const Expr &expr); // Predicate: does an expression contain constant? bool HasConstant(const Expr &); +// Predicate: Does an expression contain a component +bool HasStructureComponent(const Expr &expr); + // Utilities for attaching the location of the declaration of a symbol // of interest to a message. Handles the case of USE association gracefully. parser::Message *AttachDeclaration(parser::Message &, const Symbol &); diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h index 1c4d2daef2a11..353260b2e5c02 100644 --- a/flang/include/flang/Parser/dump-parse-tree.h +++ b/flang/include/flang/Parser/dump-parse-tree.h @@ -588,6 +588,8 @@ class ParseTreeDumper { NODE(parser, OmpExpectation) NODE_ENUM(OmpExpectation, Value) NODE(parser, OmpFailClause) + NODE(parser, OmpFallbackModifier) + NODE_ENUM(OmpFallbackModifier, Value) NODE(parser, OmpFromClause) NODE(OmpFromClause, Modifier) NODE(parser, OmpGrainsizeClause) diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index cd9429b9631d6..2f6b95b2fa2a8 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -4000,6 +4000,17 @@ struct OmpExpectation { WRAPPER_CLASS_BOILERPLATE(OmpExpectation, Value); }; +// Ref: [6.1:tbd] +// +// fallback-modifier -> +// FALLBACK(fallback-mode) // since 6.1 +// fallback-mode -> +// ABORT | DEFAULT_MEM | NULL // since 6.1 +struct OmpFallbackModifier { + ENUM_CLASS(Value, Abort, Default_Mem, Null); + WRAPPER_CLASS_BOILERPLATE(OmpFallbackModifier, Value); +}; + // REF: [5.1:217-220], [5.2:293-294] // // OmpInteropRuntimeIdentifier -> // since 5.2 @@ -4129,9 +4140,8 @@ struct OmpOrderModifier { // // prescriptiveness -> // STRICT // since 5.1 -// FALLBACK // since 6.1 struct OmpPrescriptiveness { - ENUM_CLASS(Value, Strict, Fallback) + ENUM_CLASS(Value, Strict) WRAPPER_CLASS_BOILERPLATE(OmpPrescriptiveness, Value); }; @@ -4512,7 +4522,7 @@ struct OmpDynamicAllocatorsClause { struct OmpDynGroupprivateClause { TUPLE_CLASS_BOILERPLATE(OmpDynGroupprivateClause); - MODIFIER_BOILERPLATE(OmpAccessGroup, OmpPrescriptiveness); + MODIFIER_BOILERPLATE(OmpAccessGroup, OmpFallbackModifier); std::tuple t; }; diff --git a/flang/include/flang/Semantics/openmp-modifiers.h b/flang/include/flang/Semantics/openmp-modifiers.h index bfa3aa4939cb1..283bf2a4c895e 100644 --- a/flang/include/flang/Semantics/openmp-modifiers.h +++ b/flang/include/flang/Semantics/openmp-modifiers.h @@ -67,6 +67,7 @@ template const OmpModifierDescriptor &OmpGetDescriptor(); #define DECLARE_DESCRIPTOR(name) \ template <> const OmpModifierDescriptor &OmpGetDescriptor() +DECLARE_DESCRIPTOR(parser::OmpAccessGroup); DECLARE_DESCRIPTOR(parser::OmpAlignment); DECLARE_DESCRIPTOR(parser::OmpAlignModifier); DECLARE_DESCRIPTOR(parser::OmpAllocatorComplexModifier); @@ -82,6 +83,7 @@ DECLARE_DESCRIPTOR(parser::OmpDependenceType); DECLARE_DESCRIPTOR(parser::OmpDeviceModifier); DECLARE_DESCRIPTOR(parser::OmpDirectiveNameModifier); DECLARE_DESCRIPTOR(parser::OmpExpectation); +DECLARE_DESCRIPTOR(parser::OmpFallbackModifier); DECLARE_DESCRIPTOR(parser::OmpInteropPreference); DECLARE_DESCRIPTOR(parser::OmpInteropType); DECLARE_DESCRIPTOR(parser::OmpIterator); diff --git a/flang/lib/Evaluate/tools.cpp b/flang/lib/Evaluate/tools.cpp index bd06acc21e47f..117b2249a9179 100644 --- a/flang/lib/Evaluate/tools.cpp +++ b/flang/lib/Evaluate/tools.cpp @@ -1210,6 +1210,20 @@ bool HasConstant(const Expr &expr) { return HasConstantHelper{}(expr); } +// HasStructureComponent() +struct HasStructureComponentHelper + : public AnyTraverse { + using Base = AnyTraverse; + HasStructureComponentHelper() : Base(*this) {} + using Base::operator(); + + bool operator()(const Component &) const { return true; } +}; + +bool HasStructureComponent(const Expr &expr) { + return HasStructureComponentHelper{}(expr); +} + parser::Message *AttachDeclaration( parser::Message &message, const Symbol &symbol) { const Symbol *unhosted{&symbol}; diff --git a/flang/lib/Frontend/CMakeLists.txt b/flang/lib/Frontend/CMakeLists.txt index 2b3bc0e9c2269..bb0b4a39cec9b 100644 --- a/flang/lib/Frontend/CMakeLists.txt +++ b/flang/lib/Frontend/CMakeLists.txt @@ -76,6 +76,7 @@ add_flang_library(flangFrontend CLANG_LIBS clangBasic clangDriver + clangOptions ) target_precompile_headers(flangFrontend PRIVATE diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index 5eba5e4cc8a53..8842970dde613 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -26,8 +26,8 @@ #include "clang/Basic/DiagnosticOptions.h" #include "clang/Driver/CommonArgs.h" #include "clang/Driver/Driver.h" -#include "clang/Driver/OptionUtils.h" -#include "clang/Driver/Options.h" +#include "clang/Options/OptionUtils.h" +#include "clang/Options/Options.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" @@ -82,11 +82,11 @@ static bool parseShowColorsArgs(const llvm::opt::ArgList &args, for (auto *a : args) { const llvm::opt::Option &opt = a->getOption(); - if (opt.matches(clang::driver::options::OPT_fcolor_diagnostics)) { + if (opt.matches(clang::options::OPT_fcolor_diagnostics)) { showColors = Colors_On; - } else if (opt.matches(clang::driver::options::OPT_fno_color_diagnostics)) { + } else if (opt.matches(clang::options::OPT_fno_color_diagnostics)) { showColors = Colors_Off; - } else if (opt.matches(clang::driver::options::OPT_fdiagnostics_color_EQ)) { + } else if (opt.matches(clang::options::OPT_fdiagnostics_color_EQ)) { llvm::StringRef value(a->getValue()); if (value == "always") showColors = Colors_On; @@ -107,15 +107,13 @@ static unsigned getOptimizationLevel(llvm::opt::ArgList &args, clang::DiagnosticsEngine &diags) { unsigned defaultOpt = 0; - if (llvm::opt::Arg *a = - args.getLastArg(clang::driver::options::OPT_O_Group)) { - if (a->getOption().matches(clang::driver::options::OPT_O0)) + if (llvm::opt::Arg *a = args.getLastArg(clang::options::OPT_O_Group)) { + if (a->getOption().matches(clang::options::OPT_O0)) return 0; - assert(a->getOption().matches(clang::driver::options::OPT_O)); + assert(a->getOption().matches(clang::options::OPT_O)); - return getLastArgIntValue(args, clang::driver::options::OPT_O, defaultOpt, - diags); + return getLastArgIntValue(args, clang::options::OPT_O, defaultOpt, diags); } return defaultOpt; @@ -133,7 +131,7 @@ static bool parseDebugArgs(Fortran::frontend::CodeGenOptions &opts, clang::DiagnosticsEngine &diags) { using DebugInfoKind = llvm::codegenoptions::DebugInfoKind; if (llvm::opt::Arg *arg = - args.getLastArg(clang::driver::options::OPT_debug_info_kind_EQ)) { + args.getLastArg(clang::options::OPT_debug_info_kind_EQ)) { std::optional val = llvm::StringSwitch>(arg->getValue()) .Case("line-tables-only", llvm::codegenoptions::DebugLineTablesOnly) @@ -158,13 +156,13 @@ static bool parseDebugArgs(Fortran::frontend::CodeGenOptions &opts, diags.Report(debugWarning) << arg->getValue(); } opts.DwarfVersion = - getLastArgIntValue(args, clang::driver::options::OPT_dwarf_version_EQ, + getLastArgIntValue(args, clang::options::OPT_dwarf_version_EQ, /*Default=*/0, diags); if (const llvm::opt::Arg *a = - args.getLastArg(clang::driver::options::OPT_split_dwarf_file)) + args.getLastArg(clang::options::OPT_split_dwarf_file)) opts.SplitDwarfFile = a->getValue(); if (const llvm::opt::Arg *a = - args.getLastArg(clang::driver::options::OPT_split_dwarf_output)) + args.getLastArg(clang::options::OPT_split_dwarf_output)) opts.SplitDwarfOutput = a->getValue(); } return true; @@ -174,7 +172,7 @@ static void parseDoConcurrentMapping(Fortran::frontend::CodeGenOptions &opts, llvm::opt::ArgList &args, clang::DiagnosticsEngine &diags) { llvm::opt::Arg *arg = - args.getLastArg(clang::driver::options::OPT_fdo_concurrent_to_openmp_EQ); + args.getLastArg(clang::options::OPT_fdo_concurrent_to_openmp_EQ); if (!arg) return; @@ -199,7 +197,7 @@ static void parseDoConcurrentMapping(Fortran::frontend::CodeGenOptions &opts, static bool parseVectorLibArg(Fortran::frontend::CodeGenOptions &opts, llvm::opt::ArgList &args, clang::DiagnosticsEngine &diags) { - llvm::opt::Arg *arg = args.getLastArg(clang::driver::options::OPT_fveclib); + llvm::opt::Arg *arg = args.getLastArg(clang::options::OPT_fveclib); if (!arg) return true; @@ -237,7 +235,7 @@ parseOptimizationRemark(clang::DiagnosticsEngine &diags, CodeGenOptions::OptRemark result; for (llvm::opt::Arg *a : args) { - if (a->getOption().matches(clang::driver::options::OPT_R_Joined)) { + if (a->getOption().matches(clang::options::OPT_R_Joined)) { llvm::StringRef value = a->getValue(); if (value == remarkOptName) { @@ -274,39 +272,39 @@ static void parseCodeGenArgs(Fortran::frontend::CodeGenOptions &opts, clang::DiagnosticsEngine &diags) { opts.OptimizationLevel = getOptimizationLevel(args, diags); - if (args.hasFlag(clang::driver::options::OPT_fdebug_pass_manager, - clang::driver::options::OPT_fno_debug_pass_manager, false)) + if (args.hasFlag(clang::options::OPT_fdebug_pass_manager, + clang::options::OPT_fno_debug_pass_manager, false)) opts.DebugPassManager = 1; - if (args.hasFlag(clang::driver::options::OPT_fstack_arrays, - clang::driver::options::OPT_fno_stack_arrays, false)) + if (args.hasFlag(clang::options::OPT_fstack_arrays, + clang::options::OPT_fno_stack_arrays, false)) opts.StackArrays = 1; - if (args.getLastArg(clang::driver::options::OPT_floop_interchange)) + if (args.getLastArg(clang::options::OPT_floop_interchange)) opts.InterchangeLoops = 1; - if (args.getLastArg(clang::driver::options::OPT_fexperimental_loop_fusion)) + if (args.getLastArg(clang::options::OPT_fexperimental_loop_fusion)) opts.FuseLoops = 1; - if (args.getLastArg(clang::driver::options::OPT_vectorize_loops)) + if (args.getLastArg(clang::options::OPT_vectorize_loops)) opts.VectorizeLoop = 1; - if (args.getLastArg(clang::driver::options::OPT_vectorize_slp)) + if (args.getLastArg(clang::options::OPT_vectorize_slp)) opts.VectorizeSLP = 1; - if (args.hasFlag(clang::driver::options::OPT_floop_versioning, - clang::driver::options::OPT_fno_loop_versioning, false)) + if (args.hasFlag(clang::options::OPT_floop_versioning, + clang::options::OPT_fno_loop_versioning, false)) opts.LoopVersioning = 1; - opts.UnrollLoops = args.hasFlag(clang::driver::options::OPT_funroll_loops, - clang::driver::options::OPT_fno_unroll_loops, + opts.UnrollLoops = args.hasFlag(clang::options::OPT_funroll_loops, + clang::options::OPT_fno_unroll_loops, (opts.OptimizationLevel > 1)); opts.AliasAnalysis = opts.OptimizationLevel > 0; // -mframe-pointer=none/non-leaf/reserved/all option. if (const llvm::opt::Arg *a = - args.getLastArg(clang::driver::options::OPT_mframe_pointer_EQ)) { + args.getLastArg(clang::options::OPT_mframe_pointer_EQ)) { std::optional val = llvm::StringSwitch>(a->getValue()) .Case("none", llvm::FramePointerKind::None) @@ -322,7 +320,7 @@ static void parseCodeGenArgs(Fortran::frontend::CodeGenOptions &opts, opts.setFramePointer(val.value()); } - for (auto *a : args.filtered(clang::driver::options::OPT_fpass_plugin_EQ)) + for (auto *a : args.filtered(clang::options::OPT_fpass_plugin_EQ)) opts.LLVMPassPlugins.push_back(a->getValue()); opts.Reciprocals = clang::driver::tools::parseMRecipOption(diags, args); @@ -331,15 +329,14 @@ static void parseCodeGenArgs(Fortran::frontend::CodeGenOptions &opts, clang::driver::tools::parseMPreferVectorWidthOption(diags, args); // -fembed-offload-object option - for (auto *a : - args.filtered(clang::driver::options::OPT_fembed_offload_object_EQ)) + for (auto *a : args.filtered(clang::options::OPT_fembed_offload_object_EQ)) opts.OffloadObjects.push_back(a->getValue()); - if (args.hasArg(clang::driver::options::OPT_finstrument_functions)) + if (args.hasArg(clang::options::OPT_finstrument_functions)) opts.InstrumentFunctions = 1; - if (const llvm::opt::Arg *a = args.getLastArg( - clang::driver::options::OPT_mcode_object_version_EQ)) { + if (const llvm::opt::Arg *a = + args.getLastArg(clang::options::OPT_mcode_object_version_EQ)) { llvm::StringRef s = a->getValue(); if (s == "6") opts.CodeObjectVersion = llvm::CodeObjectVersionKind::COV_6; @@ -353,36 +350,36 @@ static void parseCodeGenArgs(Fortran::frontend::CodeGenOptions &opts, // -f[no-]save-optimization-record[=] if (const llvm::opt::Arg *a = - args.getLastArg(clang::driver::options::OPT_opt_record_file)) + args.getLastArg(clang::options::OPT_opt_record_file)) opts.OptRecordFile = a->getValue(); // Optimization file format. Defaults to yaml if (const llvm::opt::Arg *a = - args.getLastArg(clang::driver::options::OPT_opt_record_format)) + args.getLastArg(clang::options::OPT_opt_record_format)) opts.OptRecordFormat = a->getValue(); // Specifies, using a regex, which successful optimization passes(middle and // backend), to include in the final optimization record file generated. If // not provided -fsave-optimization-record will include all passes. if (const llvm::opt::Arg *a = - args.getLastArg(clang::driver::options::OPT_opt_record_passes)) + args.getLastArg(clang::options::OPT_opt_record_passes)) opts.OptRecordPasses = a->getValue(); // Create OptRemark that allows printing of all successful optimization // passes applied. opts.OptimizationRemark = - parseOptimizationRemark(diags, args, clang::driver::options::OPT_Rpass_EQ, + parseOptimizationRemark(diags, args, clang::options::OPT_Rpass_EQ, /*remarkOptName=*/"pass"); // Create OptRemark that allows all missed optimization passes to be printed. - opts.OptimizationRemarkMissed = parseOptimizationRemark( - diags, args, clang::driver::options::OPT_Rpass_missed_EQ, - /*remarkOptName=*/"pass-missed"); + opts.OptimizationRemarkMissed = + parseOptimizationRemark(diags, args, clang::options::OPT_Rpass_missed_EQ, + /*remarkOptName=*/"pass-missed"); // Create OptRemark that allows all optimization decisions made by LLVM // to be printed. opts.OptimizationRemarkAnalysis = parseOptimizationRemark( - diags, args, clang::driver::options::OPT_Rpass_analysis_EQ, + diags, args, clang::options::OPT_Rpass_analysis_EQ, /*remarkOptName=*/"pass-analysis"); if (opts.getDebugInfo() == llvm::codegenoptions::NoDebugInfo) { @@ -400,23 +397,22 @@ static void parseCodeGenArgs(Fortran::frontend::CodeGenOptions &opts, opts.setDebugInfo(llvm::codegenoptions::LocTrackingOnly); } - if (auto *a = args.getLastArg(clang::driver::options::OPT_save_temps_EQ)) + if (auto *a = args.getLastArg(clang::options::OPT_save_temps_EQ)) opts.SaveTempsDir = a->getValue(); // -record-command-line option. if (const llvm::opt::Arg *a = - args.getLastArg(clang::driver::options::OPT_record_command_line)) { + args.getLastArg(clang::options::OPT_record_command_line)) { opts.RecordCommandLine = a->getValue(); } // -mlink-builtin-bitcode - for (auto *a : - args.filtered(clang::driver::options::OPT_mlink_builtin_bitcode)) + for (auto *a : args.filtered(clang::options::OPT_mlink_builtin_bitcode)) opts.BuiltinBCLibs.push_back(a->getValue()); // -mrelocation-model option. if (const llvm::opt::Arg *a = - args.getLastArg(clang::driver::options::OPT_mrelocation_model)) { + args.getLastArg(clang::options::OPT_mrelocation_model)) { llvm::StringRef modelName = a->getValue(); auto relocModel = llvm::StringSwitch>(modelName) @@ -435,31 +431,30 @@ static void parseCodeGenArgs(Fortran::frontend::CodeGenOptions &opts, } // -pic-level and -pic-is-pie option. - if (int picLevel = getLastArgIntValue( - args, clang::driver::options::OPT_pic_level, 0, diags)) { + if (int picLevel = + getLastArgIntValue(args, clang::options::OPT_pic_level, 0, diags)) { if (picLevel > 2) diags.Report(clang::diag::err_drv_invalid_value) - << args.getLastArg(clang::driver::options::OPT_pic_level) - ->getAsString(args) + << args.getLastArg(clang::options::OPT_pic_level)->getAsString(args) << picLevel; opts.PICLevel = picLevel; - if (args.hasArg(clang::driver::options::OPT_pic_is_pie)) + if (args.hasArg(clang::options::OPT_pic_is_pie)) opts.IsPIE = 1; } - if (args.hasArg(clang::driver::options::OPT_fprofile_generate)) { + if (args.hasArg(clang::options::OPT_fprofile_generate)) { opts.setProfileInstr(llvm::driver::ProfileInstrKind::ProfileIRInstr); } - if (auto A = args.getLastArg(clang::driver::options::OPT_fprofile_use_EQ)) { + if (auto A = args.getLastArg(clang::options::OPT_fprofile_use_EQ)) { opts.setProfileUse(llvm::driver::ProfileInstrKind::ProfileIRInstr); opts.ProfileInstrumentUsePath = A->getValue(); } // -mcmodel option. if (const llvm::opt::Arg *a = - args.getLastArg(clang::driver::options::OPT_mcmodel_EQ)) { + args.getLastArg(clang::options::OPT_mcmodel_EQ)) { llvm::StringRef modelName = a->getValue(); std::optional codeModel = getCodeModel(modelName); @@ -470,8 +465,8 @@ static void parseCodeGenArgs(Fortran::frontend::CodeGenOptions &opts, << a->getAsString(args) << modelName; } - if (const llvm::opt::Arg *arg = args.getLastArg( - clang::driver::options::OPT_mlarge_data_threshold_EQ)) { + if (const llvm::opt::Arg *arg = + args.getLastArg(clang::options::OPT_mlarge_data_threshold_EQ)) { uint64_t LDT; if (llvm::StringRef(arg->getValue()).getAsInteger(/*Radix=*/10, LDT)) { diags.Report(clang::diag::err_drv_invalid_value) @@ -481,25 +476,24 @@ static void parseCodeGenArgs(Fortran::frontend::CodeGenOptions &opts, } // This option is compatible with -f[no-]underscoring in gfortran. - if (args.hasFlag(clang::driver::options::OPT_fno_underscoring, - clang::driver::options::OPT_funderscoring, false)) { + if (args.hasFlag(clang::options::OPT_fno_underscoring, + clang::options::OPT_funderscoring, false)) { opts.Underscoring = 0; } - if (args.hasFlag(clang::driver::options::OPT_foffload_global_filtering, - clang::driver::options::OPT_fno_offload_global_filtering, - false)) { + if (args.hasFlag(clang::options::OPT_foffload_global_filtering, + clang::options::OPT_fno_offload_global_filtering, false)) { opts.OffloadGlobalFiltering = 1; } parseDoConcurrentMapping(opts, args, diags); opts.DeferDescriptorMapping = - args.hasFlag(clang::driver::options::OPT_fdefer_desc_map, - clang::driver::options::OPT_fno_defer_desc_map, true); + args.hasFlag(clang::options::OPT_fdefer_desc_map, + clang::options::OPT_fno_defer_desc_map, true); if (const llvm::opt::Arg *arg = - args.getLastArg(clang::driver::options::OPT_complex_range_EQ)) { + args.getLastArg(clang::options::OPT_complex_range_EQ)) { llvm::StringRef argValue = llvm::StringRef(arg->getValue()); if (argValue == "full") { opts.setComplexRange(CodeGenOptions::ComplexRangeKind::CX_Full); @@ -520,46 +514,42 @@ static void parseCodeGenArgs(Fortran::frontend::CodeGenOptions &opts, /// \param [in] opts The target options instance to update /// \param [in] args The list of input arguments (from the compiler invocation) static void parseTargetArgs(TargetOptions &opts, llvm::opt::ArgList &args) { - if (const llvm::opt::Arg *a = - args.getLastArg(clang::driver::options::OPT_triple)) + if (const llvm::opt::Arg *a = args.getLastArg(clang::options::OPT_triple)) opts.triple = a->getValue(); - opts.atomicIgnoreDenormalMode = args.hasFlag( - clang::driver::options::OPT_fatomic_ignore_denormal_mode, - clang::driver::options::OPT_fno_atomic_ignore_denormal_mode, false); - opts.atomicFineGrainedMemory = args.hasFlag( - clang::driver::options::OPT_fatomic_fine_grained_memory, - clang::driver::options::OPT_fno_atomic_fine_grained_memory, false); + opts.atomicIgnoreDenormalMode = + args.hasFlag(clang::options::OPT_fatomic_ignore_denormal_mode, + clang::options::OPT_fno_atomic_ignore_denormal_mode, false); + opts.atomicFineGrainedMemory = + args.hasFlag(clang::options::OPT_fatomic_fine_grained_memory, + clang::options::OPT_fno_atomic_fine_grained_memory, false); opts.atomicRemoteMemory = - args.hasFlag(clang::driver::options::OPT_fatomic_remote_memory, - clang::driver::options::OPT_fno_atomic_remote_memory, false); + args.hasFlag(clang::options::OPT_fatomic_remote_memory, + clang::options::OPT_fno_atomic_remote_memory, false); - if (const llvm::opt::Arg *a = - args.getLastArg(clang::driver::options::OPT_target_cpu)) + if (const llvm::opt::Arg *a = args.getLastArg(clang::options::OPT_target_cpu)) opts.cpu = a->getValue(); - if (const llvm::opt::Arg *a = - args.getLastArg(clang::driver::options::OPT_tune_cpu)) + if (const llvm::opt::Arg *a = args.getLastArg(clang::options::OPT_tune_cpu)) opts.cpuToTuneFor = a->getValue(); for (const llvm::opt::Arg *currentArg : - args.filtered(clang::driver::options::OPT_target_feature)) + args.filtered(clang::options::OPT_target_feature)) opts.featuresAsWritten.emplace_back(currentArg->getValue()); - if (args.hasArg(clang::driver::options::OPT_fdisable_real_10)) + if (args.hasArg(clang::options::OPT_fdisable_real_10)) opts.disabledRealKinds.push_back(10); - if (args.hasArg(clang::driver::options::OPT_fdisable_real_3)) + if (args.hasArg(clang::options::OPT_fdisable_real_3)) opts.disabledRealKinds.push_back(3); - if (args.hasArg(clang::driver::options::OPT_fdisable_integer_2)) + if (args.hasArg(clang::options::OPT_fdisable_integer_2)) opts.disabledIntegerKinds.push_back(2); - if (args.hasArg(clang::driver::options::OPT_fdisable_integer_16)) + if (args.hasArg(clang::options::OPT_fdisable_integer_16)) opts.disabledIntegerKinds.push_back(16); - if (const llvm::opt::Arg *a = - args.getLastArg(clang::driver::options::OPT_mabi_EQ)) { + if (const llvm::opt::Arg *a = args.getLastArg(clang::options::OPT_mabi_EQ)) { opts.abi = a->getValue(); llvm::StringRef V = a->getValue(); if (V == "vec-extabi") { @@ -569,9 +559,8 @@ static void parseTargetArgs(TargetOptions &opts, llvm::opt::ArgList &args) { } } - opts.asmVerbose = - args.hasFlag(clang::driver::options::OPT_fverbose_asm, - clang::driver::options::OPT_fno_verbose_asm, false); + opts.asmVerbose = args.hasFlag(clang::options::OPT_fverbose_asm, + clang::options::OPT_fno_verbose_asm, false); } // Tweak the frontend configuration based on the frontend action static void setUpFrontendBasedOnAction(FrontendOptions &opts) { @@ -604,11 +593,11 @@ static bool parseFrontendArgs(FrontendOptions &opts, llvm::opt::ArgList &args, // Treat multiple action options as an invocation error. Note that `clang // -cc1` does accept multiple action options, but will only consider the // rightmost one. - if (args.hasMultipleArgs(clang::driver::options::OPT_Action_Group)) { + if (args.hasMultipleArgs(clang::options::OPT_Action_Group)) { llvm::SmallString<32> buf; llvm::raw_svector_ostream os(buf); for (const llvm::opt::Arg *arg : - args.filtered(clang::driver::options::OPT_Action_Group)) { + args.filtered(clang::options::OPT_Action_Group)) { if (buf.size()) os << ", "; os << "'" << arg->getSpelling() << "'"; @@ -619,99 +608,99 @@ static bool parseFrontendArgs(FrontendOptions &opts, llvm::opt::ArgList &args, // Identify the action (i.e. opts.ProgramAction) if (const llvm::opt::Arg *a = - args.getLastArg(clang::driver::options::OPT_Action_Group)) { + args.getLastArg(clang::options::OPT_Action_Group)) { switch (a->getOption().getID()) { default: { llvm_unreachable("Invalid option in group!"); } - case clang::driver::options::OPT_test_io: + case clang::options::OPT_test_io: opts.programAction = InputOutputTest; break; - case clang::driver::options::OPT_E: + case clang::options::OPT_E: opts.programAction = PrintPreprocessedInput; break; - case clang::driver::options::OPT_fsyntax_only: + case clang::options::OPT_fsyntax_only: opts.programAction = ParseSyntaxOnly; break; - case clang::driver::options::OPT_emit_fir: + case clang::options::OPT_emit_fir: opts.programAction = EmitFIR; break; - case clang::driver::options::OPT_emit_hlfir: + case clang::options::OPT_emit_hlfir: opts.programAction = EmitHLFIR; break; - case clang::driver::options::OPT_emit_llvm: + case clang::options::OPT_emit_llvm: opts.programAction = EmitLLVM; break; - case clang::driver::options::OPT_emit_llvm_bc: + case clang::options::OPT_emit_llvm_bc: opts.programAction = EmitLLVMBitcode; break; - case clang::driver::options::OPT_emit_obj: + case clang::options::OPT_emit_obj: opts.programAction = EmitObj; break; - case clang::driver::options::OPT_S: + case clang::options::OPT_S: opts.programAction = EmitAssembly; break; - case clang::driver::options::OPT_fdebug_unparse: + case clang::options::OPT_fdebug_unparse: opts.programAction = DebugUnparse; break; - case clang::driver::options::OPT_fdebug_unparse_no_sema: + case clang::options::OPT_fdebug_unparse_no_sema: opts.programAction = DebugUnparseNoSema; break; - case clang::driver::options::OPT_fdebug_unparse_with_symbols: + case clang::options::OPT_fdebug_unparse_with_symbols: opts.programAction = DebugUnparseWithSymbols; break; - case clang::driver::options::OPT_fdebug_unparse_with_modules: + case clang::options::OPT_fdebug_unparse_with_modules: opts.programAction = DebugUnparseWithModules; break; - case clang::driver::options::OPT_fdebug_dump_symbols: + case clang::options::OPT_fdebug_dump_symbols: opts.programAction = DebugDumpSymbols; break; - case clang::driver::options::OPT_fdebug_dump_parse_tree: + case clang::options::OPT_fdebug_dump_parse_tree: opts.programAction = DebugDumpParseTree; break; - case clang::driver::options::OPT_fdebug_dump_pft: + case clang::options::OPT_fdebug_dump_pft: opts.programAction = DebugDumpPFT; break; - case clang::driver::options::OPT_fdebug_dump_all: + case clang::options::OPT_fdebug_dump_all: opts.programAction = DebugDumpAll; break; - case clang::driver::options::OPT_fdebug_dump_parse_tree_no_sema: + case clang::options::OPT_fdebug_dump_parse_tree_no_sema: opts.programAction = DebugDumpParseTreeNoSema; break; - case clang::driver::options::OPT_fdebug_dump_provenance: + case clang::options::OPT_fdebug_dump_provenance: opts.programAction = DebugDumpProvenance; break; - case clang::driver::options::OPT_fdebug_dump_parsing_log: + case clang::options::OPT_fdebug_dump_parsing_log: opts.programAction = DebugDumpParsingLog; break; - case clang::driver::options::OPT_fdebug_measure_parse_tree: + case clang::options::OPT_fdebug_measure_parse_tree: opts.programAction = DebugMeasureParseTree; break; - case clang::driver::options::OPT_fdebug_pre_fir_tree: + case clang::options::OPT_fdebug_pre_fir_tree: opts.programAction = DebugPreFIRTree; break; - case clang::driver::options::OPT_fget_symbols_sources: + case clang::options::OPT_fget_symbols_sources: opts.programAction = GetSymbolsSources; break; - case clang::driver::options::OPT_fget_definition: + case clang::options::OPT_fget_definition: opts.programAction = GetDefinition; break; - case clang::driver::options::OPT_init_only: + case clang::options::OPT_init_only: opts.programAction = InitOnly; break; // TODO: - // case clang::driver::options::OPT_emit_llvm: - // case clang::driver::options::OPT_emit_llvm_only: - // case clang::driver::options::OPT_emit_codegen_only: - // case clang::driver::options::OPT_emit_module: + // case clang::options::OPT_emit_llvm: + // case clang::options::OPT_emit_llvm_only: + // case clang::options::OPT_emit_codegen_only: + // case clang::options::OPT_emit_module: // (...) } // Parse the values provided with `-fget-definition` (there should be 3 // integers) if (llvm::opt::OptSpecifier(a->getOption().getID()) == - clang::driver::options::OPT_fget_definition) { + clang::options::OPT_fget_definition) { unsigned optVals[3] = {0, 0, 0}; for (unsigned i = 0; i < 3; i++) { @@ -731,27 +720,25 @@ static bool parseFrontendArgs(FrontendOptions &opts, llvm::opt::ArgList &args, } // Parsing -load option and storing shared object path - if (llvm::opt::Arg *a = args.getLastArg(clang::driver::options::OPT_load)) { + if (llvm::opt::Arg *a = args.getLastArg(clang::options::OPT_load)) { opts.plugins.push_back(a->getValue()); } // Parsing -plugin option and storing plugin name and setting action - if (const llvm::opt::Arg *a = - args.getLastArg(clang::driver::options::OPT_plugin)) { + if (const llvm::opt::Arg *a = args.getLastArg(clang::options::OPT_plugin)) { opts.programAction = PluginAction; opts.actionName = a->getValue(); } - opts.outputFile = args.getLastArgValue(clang::driver::options::OPT_o); - opts.showHelp = args.hasArg(clang::driver::options::OPT_help); - opts.showVersion = args.hasArg(clang::driver::options::OPT_version); + opts.outputFile = args.getLastArgValue(clang::options::OPT_o); + opts.showHelp = args.hasArg(clang::options::OPT_help); + opts.showVersion = args.hasArg(clang::options::OPT_version); opts.printSupportedCPUs = - args.hasArg(clang::driver::options::OPT_print_supported_cpus); + args.hasArg(clang::options::OPT_print_supported_cpus); // Get the input kind (from the value passed via `-x`) InputKind dashX(Language::Unknown); - if (const llvm::opt::Arg *a = - args.getLastArg(clang::driver::options::OPT_x)) { + if (const llvm::opt::Arg *a = args.getLastArg(clang::options::OPT_x)) { llvm::StringRef xValue = a->getValue(); // Principal languages. dashX = llvm::StringSwitch(xValue) @@ -778,7 +765,7 @@ static bool parseFrontendArgs(FrontendOptions &opts, llvm::opt::ArgList &args, // Collect the input files and save them in our instance of FrontendOptions. std::vector inputs = - args.getAllArgValues(clang::driver::options::OPT_INPUT); + args.getAllArgValues(clang::options::OPT_INPUT); opts.inputs.clear(); if (inputs.empty()) // '-' is the default input if none is given. @@ -798,18 +785,16 @@ static bool parseFrontendArgs(FrontendOptions &opts, llvm::opt::ArgList &args, } // Set fortranForm based on options -ffree-form and -ffixed-form. - if (const auto *arg = - args.getLastArg(clang::driver::options::OPT_ffixed_form, - clang::driver::options::OPT_ffree_form)) { - opts.fortranForm = - arg->getOption().matches(clang::driver::options::OPT_ffixed_form) - ? FortranForm::FixedForm - : FortranForm::FreeForm; + if (const auto *arg = args.getLastArg(clang::options::OPT_ffixed_form, + clang::options::OPT_ffree_form)) { + opts.fortranForm = arg->getOption().matches(clang::options::OPT_ffixed_form) + ? FortranForm::FixedForm + : FortranForm::FreeForm; } // Set fixedFormColumns based on -ffixed-line-length= if (const auto *arg = - args.getLastArg(clang::driver::options::OPT_ffixed_line_length_EQ)) { + args.getLastArg(clang::options::OPT_ffixed_line_length_EQ)) { llvm::StringRef argValue = llvm::StringRef(arg->getValue()); std::int64_t columns = -1; if (argValue == "none") { @@ -831,8 +816,7 @@ static bool parseFrontendArgs(FrontendOptions &opts, llvm::opt::ArgList &args, } // Set conversion based on -fconvert= - if (const auto *arg = - args.getLastArg(clang::driver::options::OPT_fconvert_EQ)) { + if (const auto *arg = args.getLastArg(clang::options::OPT_fconvert_EQ)) { const char *argValue = arg->getValue(); if (auto convert = parseConvertArg(argValue)) opts.envDefaults.push_back({"FORT_CONVERT", *convert}); @@ -842,65 +826,61 @@ static bool parseFrontendArgs(FrontendOptions &opts, llvm::opt::ArgList &args, } // -f{no-}implicit-none - opts.features.Enable( - Fortran::common::LanguageFeature::ImplicitNoneTypeAlways, - args.hasFlag(clang::driver::options::OPT_fimplicit_none, - clang::driver::options::OPT_fno_implicit_none, false)); + opts.features.Enable(Fortran::common::LanguageFeature::ImplicitNoneTypeAlways, + args.hasFlag(clang::options::OPT_fimplicit_none, + clang::options::OPT_fno_implicit_none, + false)); // -f{no-}implicit-none-ext - opts.features.Enable( - Fortran::common::LanguageFeature::ImplicitNoneExternal, - args.hasFlag(clang::driver::options::OPT_fimplicit_none_ext, - clang::driver::options::OPT_fno_implicit_none_ext, false)); + opts.features.Enable(Fortran::common::LanguageFeature::ImplicitNoneExternal, + args.hasFlag(clang::options::OPT_fimplicit_none_ext, + clang::options::OPT_fno_implicit_none_ext, + false)); // -f{no-}backslash opts.features.Enable(Fortran::common::LanguageFeature::BackslashEscapes, - args.hasFlag(clang::driver::options::OPT_fbackslash, - clang::driver::options::OPT_fno_backslash, - false)); + args.hasFlag(clang::options::OPT_fbackslash, + clang::options::OPT_fno_backslash, false)); // -f{no-}logical-abbreviations opts.features.Enable( Fortran::common::LanguageFeature::LogicalAbbreviations, - args.hasFlag(clang::driver::options::OPT_flogical_abbreviations, - clang::driver::options::OPT_fno_logical_abbreviations, - false)); + args.hasFlag(clang::options::OPT_flogical_abbreviations, + clang::options::OPT_fno_logical_abbreviations, false)); // -f{no-}unsigned opts.features.Enable(Fortran::common::LanguageFeature::Unsigned, - args.hasFlag(clang::driver::options::OPT_funsigned, - clang::driver::options::OPT_fno_unsigned, - false)); + args.hasFlag(clang::options::OPT_funsigned, + clang::options::OPT_fno_unsigned, false)); // -f{no-}xor-operator - opts.features.Enable( - Fortran::common::LanguageFeature::XOROperator, - args.hasFlag(clang::driver::options::OPT_fxor_operator, - clang::driver::options::OPT_fno_xor_operator, false)); + opts.features.Enable(Fortran::common::LanguageFeature::XOROperator, + args.hasFlag(clang::options::OPT_fxor_operator, + clang::options::OPT_fno_xor_operator, + false)); // -fno-automatic - if (args.hasArg(clang::driver::options::OPT_fno_automatic)) { + if (args.hasArg(clang::options::OPT_fno_automatic)) { opts.features.Enable(Fortran::common::LanguageFeature::DefaultSave); } // -f{no}-save-main-program - opts.features.Enable( - Fortran::common::LanguageFeature::SaveMainProgram, - args.hasFlag(clang::driver::options::OPT_fsave_main_program, - clang::driver::options::OPT_fno_save_main_program, false)); + opts.features.Enable(Fortran::common::LanguageFeature::SaveMainProgram, + args.hasFlag(clang::options::OPT_fsave_main_program, + clang::options::OPT_fno_save_main_program, + false)); // -ffast-amd-memory-allocator - if (args.hasArg(clang::driver::options::OPT_ffast_amd_memory_allocator)) { + if (args.hasArg(clang::options::OPT_ffast_amd_memory_allocator)) { opts.features.Enable( (Fortran::common::LanguageFeature::AmdMemoryAllocator)); } - if (args.hasArg( - clang::driver::options::OPT_falternative_parameter_statement)) { + if (args.hasArg(clang::options::OPT_falternative_parameter_statement)) { opts.features.Enable(Fortran::common::LanguageFeature::OldStyleParameter); } if (const llvm::opt::Arg *arg = - args.getLastArg(clang::driver::options::OPT_finput_charset_EQ)) { + args.getLastArg(clang::options::OPT_finput_charset_EQ)) { llvm::StringRef argValue = arg->getValue(); if (argValue == "utf-8") { opts.encoding = Fortran::parser::Encoding::UTF_8; @@ -945,9 +925,9 @@ static std::string getOpenMPHeadersDir(const char *argv) { static void parsePreprocessorArgs(Fortran::frontend::PreprocessorOptions &opts, llvm::opt::ArgList &args) { // Add macros from the command line. - for (const auto *currentArg : args.filtered(clang::driver::options::OPT_D, - clang::driver::options::OPT_U)) { - if (currentArg->getOption().matches(clang::driver::options::OPT_D)) { + for (const auto *currentArg : + args.filtered(clang::options::OPT_D, clang::options::OPT_U)) { + if (currentArg->getOption().matches(clang::options::OPT_D)) { opts.addMacroDef(currentArg->getValue()); } else { opts.addMacroUndef(currentArg->getValue()); @@ -955,34 +935,33 @@ static void parsePreprocessorArgs(Fortran::frontend::PreprocessorOptions &opts, } // Add the ordered list of -I's. - for (const auto *currentArg : args.filtered(clang::driver::options::OPT_I)) + for (const auto *currentArg : args.filtered(clang::options::OPT_I)) opts.searchDirectoriesFromDashI.emplace_back(currentArg->getValue()); // Prepend the ordered list of -intrinsic-modules-path // to the default location to search. for (const auto *currentArg : - args.filtered(clang::driver::options::OPT_fintrinsic_modules_path)) + args.filtered(clang::options::OPT_fintrinsic_modules_path)) opts.searchDirectoriesFromIntrModPath.emplace_back(currentArg->getValue()); // -cpp/-nocpp - if (const auto *currentArg = args.getLastArg( - clang::driver::options::OPT_cpp, clang::driver::options::OPT_nocpp)) - opts.macrosFlag = - (currentArg->getOption().matches(clang::driver::options::OPT_cpp)) - ? PPMacrosFlag::Include - : PPMacrosFlag::Exclude; + if (const auto *currentArg = + args.getLastArg(clang::options::OPT_cpp, clang::options::OPT_nocpp)) + opts.macrosFlag = (currentArg->getOption().matches(clang::options::OPT_cpp)) + ? PPMacrosFlag::Include + : PPMacrosFlag::Exclude; // Enable -cpp based on -x unless explicitly disabled with -nocpp if (opts.macrosFlag != PPMacrosFlag::Exclude) - if (const auto *dashX = args.getLastArg(clang::driver::options::OPT_x)) + if (const auto *dashX = args.getLastArg(clang::options::OPT_x)) opts.macrosFlag = llvm::StringSwitch(dashX->getValue()) .Case("f95-cpp-input", PPMacrosFlag::Include) .Default(opts.macrosFlag); - opts.noReformat = args.hasArg(clang::driver::options::OPT_fno_reformat); + opts.noReformat = args.hasArg(clang::options::OPT_fno_reformat); opts.preprocessIncludeLines = - args.hasArg(clang::driver::options::OPT_fpreprocess_include_lines); - opts.noLineDirectives = args.hasArg(clang::driver::options::OPT_P); - opts.showMacros = args.hasArg(clang::driver::options::OPT_dM); + args.hasArg(clang::options::OPT_fpreprocess_include_lines); + opts.noLineDirectives = args.hasArg(clang::options::OPT_P); + opts.showMacros = args.hasArg(clang::options::OPT_dM); } /// Parses all semantic related arguments and populates the variables @@ -993,7 +972,7 @@ static bool parseSemaArgs(CompilerInvocation &res, llvm::opt::ArgList &args, // -J/module-dir option std::vector moduleDirList = - args.getAllArgValues(clang::driver::options::OPT_module_dir); + args.getAllArgValues(clang::options::OPT_module_dir); // User can only specify one -J/-module-dir directory, but may repeat // -J/-module-dir as long as the directory is the same each time. // https://gcc.gnu.org/onlinedocs/gfortran/Directory-Options.html @@ -1012,25 +991,25 @@ static bool parseSemaArgs(CompilerInvocation &res, llvm::opt::ArgList &args, res.setModuleDir(moduleDirList[0]); // -fdebug-module-writer option - if (args.hasArg(clang::driver::options::OPT_fdebug_module_writer)) { + if (args.hasArg(clang::options::OPT_fdebug_module_writer)) { res.setDebugModuleDir(true); } // -fhermetic-module-files option - if (args.hasArg(clang::driver::options::OPT_fhermetic_module_files)) { + if (args.hasArg(clang::options::OPT_fhermetic_module_files)) { res.setHermeticModuleFileOutput(true); } // -module-suffix if (const auto *moduleSuffix = - args.getLastArg(clang::driver::options::OPT_module_suffix)) { + args.getLastArg(clang::options::OPT_module_suffix)) { res.setModuleFileSuffix(moduleSuffix->getValue()); } // -f{no-}analyzed-objects-for-unparse - res.setUseAnalyzedObjectsForUnparse(args.hasFlag( - clang::driver::options::OPT_fanalyzed_objects_for_unparse, - clang::driver::options::OPT_fno_analyzed_objects_for_unparse, true)); + res.setUseAnalyzedObjectsForUnparse( + args.hasFlag(clang::options::OPT_fanalyzed_objects_for_unparse, + clang::options::OPT_fno_analyzed_objects_for_unparse, true)); return diags.getNumErrors() == numErrorsBefore; } @@ -1047,7 +1026,7 @@ static bool parseDiagArgs(CompilerInvocation &res, llvm::opt::ArgList &args, // chosen to match clang's behavior. // -pedantic - if (args.hasArg(clang::driver::options::OPT_pedantic)) { + if (args.hasArg(clang::options::OPT_pedantic)) { features.WarnOnAllNonstandard(); features.WarnOnAllUsage(); res.setEnableConformanceChecks(); @@ -1057,9 +1036,8 @@ static bool parseDiagArgs(CompilerInvocation &res, llvm::opt::ArgList &args, // -Werror option // TODO: Currently throws a Diagnostic for anything other than -W, // this has to change when other -W's are supported. - if (args.hasArg(clang::driver::options::OPT_W_Joined)) { - const auto &wArgs = - args.getAllArgValues(clang::driver::options::OPT_W_Joined); + if (args.hasArg(clang::options::OPT_W_Joined)) { + const auto &wArgs = args.getAllArgValues(clang::options::OPT_W_Joined); for (const auto &wArg : wArgs) { if (wArg == "error") { res.setWarnAsErr(true); @@ -1076,7 +1054,7 @@ static bool parseDiagArgs(CompilerInvocation &res, llvm::opt::ArgList &args, } // -w - if (args.hasArg(clang::driver::options::OPT_w)) { + if (args.hasArg(clang::options::OPT_w)) { features.DisableAllWarnings(); res.setDisableWarnings(); } @@ -1096,7 +1074,7 @@ static bool parseDialectArgs(CompilerInvocation &res, llvm::opt::ArgList &args, unsigned numErrorsBefore = diags.getNumErrors(); // -fd-lines-as-code - if (args.hasArg(clang::driver::options::OPT_fd_lines_as_code)) { + if (args.hasArg(clang::options::OPT_fd_lines_as_code)) { if (res.getFrontendOpts().fortranForm == FortranForm::FreeForm) { const unsigned fdLinesAsWarning = diags.getCustomDiagID( clang::DiagnosticsEngine::Warning, @@ -1109,7 +1087,7 @@ static bool parseDialectArgs(CompilerInvocation &res, llvm::opt::ArgList &args, } // -fd-lines-as-comments - if (args.hasArg(clang::driver::options::OPT_fd_lines_as_comments)) { + if (args.hasArg(clang::options::OPT_fd_lines_as_comments)) { if (res.getFrontendOpts().fortranForm == FortranForm::FreeForm) { const unsigned fdLinesAsWarning = diags.getCustomDiagID( clang::DiagnosticsEngine::Warning, @@ -1122,18 +1100,18 @@ static bool parseDialectArgs(CompilerInvocation &res, llvm::opt::ArgList &args, } // -fdefault* family - if (args.hasArg(clang::driver::options::OPT_fdefault_real_8)) { + if (args.hasArg(clang::options::OPT_fdefault_real_8)) { res.getDefaultKinds().set_defaultRealKind(8); res.getDefaultKinds().set_doublePrecisionKind(16); } - if (args.hasArg(clang::driver::options::OPT_fdefault_integer_8)) { + if (args.hasArg(clang::options::OPT_fdefault_integer_8)) { res.getDefaultKinds().set_defaultIntegerKind(8); res.getDefaultKinds().set_subscriptIntegerKind(8); res.getDefaultKinds().set_sizeIntegerKind(8); res.getDefaultKinds().set_defaultLogicalKind(8); } - if (args.hasArg(clang::driver::options::OPT_fdefault_double_8)) { - if (!args.hasArg(clang::driver::options::OPT_fdefault_real_8)) { + if (args.hasArg(clang::options::OPT_fdefault_double_8)) { + if (!args.hasArg(clang::options::OPT_fdefault_real_8)) { // -fdefault-double-8 has to be used with -fdefault-real-8 // to be compatible with gfortran const unsigned diagID = diags.getCustomDiagID( @@ -1144,18 +1122,18 @@ static bool parseDialectArgs(CompilerInvocation &res, llvm::opt::ArgList &args, // https://gcc.gnu.org/onlinedocs/gfortran/Fortran-Dialect-Options.html res.getDefaultKinds().set_doublePrecisionKind(8); } - if (args.hasArg(clang::driver::options::OPT_flarge_sizes)) + if (args.hasArg(clang::options::OPT_flarge_sizes)) res.getDefaultKinds().set_sizeIntegerKind(8); // -x cuda - auto language = args.getLastArgValue(clang::driver::options::OPT_x); + auto language = args.getLastArgValue(clang::options::OPT_x); if (language == "cuda") { res.getFrontendOpts().features.Enable( Fortran::common::LanguageFeature::CUDA); } // -fopenacc - if (args.hasArg(clang::driver::options::OPT_fopenacc)) { + if (args.hasArg(clang::options::OPT_fopenacc)) { res.getFrontendOpts().features.Enable( Fortran::common::LanguageFeature::OpenACC); } @@ -1163,8 +1141,8 @@ static bool parseDialectArgs(CompilerInvocation &res, llvm::opt::ArgList &args, // -std=f2018 // TODO: Set proper options when more fortran standards // are supported. - if (args.hasArg(clang::driver::options::OPT_std_EQ)) { - auto standard = args.getLastArgValue(clang::driver::options::OPT_std_EQ); + if (args.hasArg(clang::options::OPT_std_EQ)) { + auto standard = args.getLastArgValue(clang::options::OPT_std_EQ); // We only allow f2018 as the given standard if (standard == "f2018") { res.setEnableConformanceChecks(); @@ -1177,7 +1155,7 @@ static bool parseDialectArgs(CompilerInvocation &res, llvm::opt::ArgList &args, } } // -fcoarray - if (args.hasArg(clang::driver::options::OPT_fcoarray)) { + if (args.hasArg(clang::options::OPT_fcoarray)) { res.getFrontendOpts().features.Enable( Fortran::common::LanguageFeature::Coarray); const unsigned diagID = @@ -1195,13 +1173,12 @@ static bool parseDialectArgs(CompilerInvocation &res, llvm::opt::ArgList &args, /// generated. static bool parseOpenMPArgs(CompilerInvocation &res, llvm::opt::ArgList &args, clang::DiagnosticsEngine &diags) { - llvm::opt::Arg *arg = args.getLastArg(clang::driver::options::OPT_fopenmp, - clang::driver::options::OPT_fno_openmp); - if (!arg || - arg->getOption().matches(clang::driver::options::OPT_fno_openmp)) { - bool isSimdSpecified = args.hasFlag( - clang::driver::options::OPT_fopenmp_simd, - clang::driver::options::OPT_fno_openmp_simd, /*Default=*/false); + llvm::opt::Arg *arg = args.getLastArg(clang::options::OPT_fopenmp, + clang::options::OPT_fno_openmp); + if (!arg || arg->getOption().matches(clang::options::OPT_fno_openmp)) { + bool isSimdSpecified = + args.hasFlag(clang::options::OPT_fopenmp_simd, + clang::options::OPT_fno_openmp_simd, /*Default=*/false); if (!isSimdSpecified) return true; res.getLangOpts().OpenMPSimd = 1; @@ -1217,8 +1194,7 @@ static bool parseOpenMPArgs(CompilerInvocation &res, llvm::opt::ArgList &args, res.getLangOpts().OpenMPVersion = newestFullySupported; res.getFrontendOpts().features.Enable( Fortran::common::LanguageFeature::OpenMP); - if (auto *arg = - args.getLastArg(clang::driver::options::OPT_fopenmp_version_EQ)) { + if (auto *arg = args.getLastArg(clang::options::OPT_fopenmp_version_EQ)) { llvm::ArrayRef ompVersions = llvm::omp::getOpenMPVersions(); unsigned oldVersions[] = {11, 20, 25, 30}; unsigned version = 0; @@ -1273,16 +1249,16 @@ static bool parseOpenMPArgs(CompilerInvocation &res, llvm::opt::ArgList &args, } } - if (args.hasArg(clang::driver::options::OPT_fopenmp_force_usm)) { + if (args.hasArg(clang::options::OPT_fopenmp_force_usm)) { res.getLangOpts().OpenMPForceUSM = 1; } - if (args.hasArg(clang::driver::options::OPT_fopenmp_is_target_device)) { + if (args.hasArg(clang::options::OPT_fopenmp_is_target_device)) { res.getLangOpts().OpenMPIsTargetDevice = 1; // Get OpenMP host file path if any and report if a non existent file is // found - if (auto *arg = args.getLastArg( - clang::driver::options::OPT_fopenmp_host_ir_file_path)) { + if (auto *arg = + args.getLastArg(clang::options::OPT_fopenmp_host_ir_file_path)) { res.getLangOpts().OMPHostIRFile = arg->getValue(); if (!llvm::sys::fs::exists(res.getLangOpts().OMPHostIRFile)) diags.Report(clang::diag::err_omp_host_ir_file_not_found) @@ -1290,37 +1266,34 @@ static bool parseOpenMPArgs(CompilerInvocation &res, llvm::opt::ArgList &args, } if (args.hasFlag( - clang::driver::options::OPT_fopenmp_assume_teams_oversubscription, - clang::driver::options:: - OPT_fno_openmp_assume_teams_oversubscription, + clang::options::OPT_fopenmp_assume_teams_oversubscription, + clang::options::OPT_fno_openmp_assume_teams_oversubscription, /*Default=*/false)) res.getLangOpts().OpenMPTeamSubscription = true; - if (args.hasArg(clang::driver::options::OPT_fopenmp_assume_no_thread_state)) + if (args.hasArg(clang::options::OPT_fopenmp_assume_no_thread_state)) res.getLangOpts().OpenMPNoThreadState = 1; - if (args.hasArg( - clang::driver::options::OPT_fopenmp_assume_no_nested_parallelism)) + if (args.hasArg(clang::options::OPT_fopenmp_assume_no_nested_parallelism)) res.getLangOpts().OpenMPNoNestedParallelism = 1; if (args.hasFlag( - clang::driver::options::OPT_fopenmp_assume_threads_oversubscription, - clang::driver::options:: - OPT_fno_openmp_assume_threads_oversubscription, + clang::options::OPT_fopenmp_assume_threads_oversubscription, + clang::options::OPT_fno_openmp_assume_threads_oversubscription, /*Default=*/false)) res.getLangOpts().OpenMPThreadSubscription = true; - if ((args.hasArg(clang::driver::options::OPT_fopenmp_target_debug) || - args.hasArg(clang::driver::options::OPT_fopenmp_target_debug_EQ))) { - res.getLangOpts().OpenMPTargetDebug = getLastArgIntValue( - args, clang::driver::options::OPT_fopenmp_target_debug_EQ, - res.getLangOpts().OpenMPTargetDebug, diags); + if ((args.hasArg(clang::options::OPT_fopenmp_target_debug) || + args.hasArg(clang::options::OPT_fopenmp_target_debug_EQ))) { + res.getLangOpts().OpenMPTargetDebug = + getLastArgIntValue(args, clang::options::OPT_fopenmp_target_debug_EQ, + res.getLangOpts().OpenMPTargetDebug, diags); if (!res.getLangOpts().OpenMPTargetDebug && - args.hasArg(clang::driver::options::OPT_fopenmp_target_debug)) + args.hasArg(clang::options::OPT_fopenmp_target_debug)) res.getLangOpts().OpenMPTargetDebug = 1; } - if (args.hasArg(clang::driver::options::OPT_no_offloadlib)) + if (args.hasArg(clang::options::OPT_no_offloadlib)) res.getLangOpts().NoGPULib = 1; } if (llvm::Triple(res.getTargetOpts().triple).isGPU()) { @@ -1336,8 +1309,7 @@ static bool parseOpenMPArgs(CompilerInvocation &res, llvm::opt::ArgList &args, } // Get the OpenMP target triples if any. - if (auto *arg = - args.getLastArg(clang::driver::options::OPT_offload_targets_EQ)) { + if (auto *arg = args.getLastArg(clang::options::OPT_offload_targets_EQ)) { enum ArchPtrSize { Arch16Bit, Arch32Bit, Arch64Bit }; auto getArchPtrSize = [](const llvm::Triple &triple) { if (triple.isArch16Bit()) @@ -1366,8 +1338,7 @@ static bool parseOpenMPArgs(CompilerInvocation &res, llvm::opt::ArgList &args, } } - if (args.hasArg( - clang::driver::options::OPT_famd_allow_threadprivate_equivalence)) + if (args.hasArg(clang::options::OPT_famd_allow_threadprivate_equivalence)) res.getLangOpts().AllowThreadprivateEquivalence = true; return diags.getNumErrors() == numErrorsBefore; @@ -1385,7 +1356,7 @@ static bool parseIntegerOverflowArgs(CompilerInvocation &invoc, clang::DiagnosticsEngine &diags) { Fortran::common::LangOptions &opts = invoc.getLangOpts(); - if (args.getLastArg(clang::driver::options::OPT_fwrapv)) + if (args.getLastArg(clang::options::OPT_fwrapv)) opts.setSignedOverflowBehavior(Fortran::common::LangOptions::SOB_Defined); return true; @@ -1404,7 +1375,7 @@ static bool parseFloatingPointArgs(CompilerInvocation &invoc, Fortran::common::LangOptions &opts = invoc.getLangOpts(); if (const llvm::opt::Arg *a = - args.getLastArg(clang::driver::options::OPT_ffp_contract)) { + args.getLastArg(clang::options::OPT_ffp_contract)) { const llvm::StringRef val = a->getValue(); enum Fortran::common::LangOptions::FPModeKind fpContractMode; @@ -1421,31 +1392,31 @@ static bool parseFloatingPointArgs(CompilerInvocation &invoc, opts.setFPContractMode(fpContractMode); } - if (args.getLastArg(clang::driver::options::OPT_menable_no_infs)) { + if (args.getLastArg(clang::options::OPT_menable_no_infs)) { opts.NoHonorInfs = true; } - if (args.getLastArg(clang::driver::options::OPT_menable_no_nans)) { + if (args.getLastArg(clang::options::OPT_menable_no_nans)) { opts.NoHonorNaNs = true; } - if (args.getLastArg(clang::driver::options::OPT_fapprox_func)) { + if (args.getLastArg(clang::options::OPT_fapprox_func)) { opts.ApproxFunc = true; } - if (args.getLastArg(clang::driver::options::OPT_fno_signed_zeros)) { + if (args.getLastArg(clang::options::OPT_fno_signed_zeros)) { opts.NoSignedZeros = true; } - if (args.getLastArg(clang::driver::options::OPT_mreassociate)) { + if (args.getLastArg(clang::options::OPT_mreassociate)) { opts.AssociativeMath = true; } - if (args.getLastArg(clang::driver::options::OPT_freciprocal_math)) { + if (args.getLastArg(clang::options::OPT_freciprocal_math)) { opts.ReciprocalMath = true; } - if (args.getLastArg(clang::driver::options::OPT_ffast_math)) { + if (args.getLastArg(clang::options::OPT_ffast_math)) { opts.NoHonorInfs = true; opts.NoHonorNaNs = true; opts.AssociativeMath = true; @@ -1455,7 +1426,7 @@ static bool parseFloatingPointArgs(CompilerInvocation &invoc, opts.setFPContractMode(Fortran::common::LangOptions::FPM_Fast); } - if (args.hasArg(clang::driver::options::OPT_fno_fast_real_mod)) + if (args.hasArg(clang::options::OPT_fno_fast_real_mod)) opts.NoFastRealMod = true; return true; @@ -1470,10 +1441,8 @@ static bool parseFloatingPointArgs(CompilerInvocation &invoc, /// \param [out] diags DiagnosticsEngine to report erros with static bool parseVScaleArgs(CompilerInvocation &invoc, llvm::opt::ArgList &args, clang::DiagnosticsEngine &diags) { - const auto *vscaleMin = - args.getLastArg(clang::driver::options::OPT_mvscale_min_EQ); - const auto *vscaleMax = - args.getLastArg(clang::driver::options::OPT_mvscale_max_EQ); + const auto *vscaleMin = args.getLastArg(clang::options::OPT_mvscale_min_EQ); + const auto *vscaleMax = args.getLastArg(clang::options::OPT_mvscale_max_EQ); if (!vscaleMin && !vscaleMax) return true; @@ -1521,8 +1490,7 @@ static bool parseLinkerOptionsArgs(CompilerInvocation &invoc, // TODO: support --dependent-lib on other platforms when MLIR supports // !llvm.dependent.lib - if (args.hasArg(clang::driver::options::OPT_dependent_lib) && - !triple.isOSWindows()) { + if (args.hasArg(clang::options::OPT_dependent_lib) && !triple.isOSWindows()) { const unsigned diagID = diags.getCustomDiagID(clang::DiagnosticsEngine::Error, "--dependent-lib is only supported on Windows"); @@ -1530,12 +1498,10 @@ static bool parseLinkerOptionsArgs(CompilerInvocation &invoc, return false; } - opts.DependentLibs = - args.getAllArgValues(clang::driver::options::OPT_dependent_lib); + opts.DependentLibs = args.getAllArgValues(clang::options::OPT_dependent_lib); // -flto=full/thin option. - if (const llvm::opt::Arg *a = - args.getLastArg(clang::driver::options::OPT_flto_EQ)) { + if (const llvm::opt::Arg *a = args.getLastArg(clang::options::OPT_flto_EQ)) { llvm::StringRef s = a->getValue(); assert((s == "full" || s == "thin") && "Unknown LTO mode."); if (s == "full") @@ -1546,10 +1512,10 @@ static bool parseLinkerOptionsArgs(CompilerInvocation &invoc, // -ffat-lto-objects if (const llvm::opt::Arg *arg = - args.getLastArg(clang::driver::options::OPT_ffat_lto_objects, - clang::driver::options::OPT_fno_fat_lto_objects)) { + args.getLastArg(clang::options::OPT_ffat_lto_objects, + clang::options::OPT_fno_fat_lto_objects)) { opts.PrepareForFatLTO = - arg->getOption().matches(clang::driver::options::OPT_ffat_lto_objects); + arg->getOption().matches(clang::options::OPT_ffat_lto_objects); if (opts.PrepareForFatLTO) { assert((opts.PrepareForFullLTO || opts.PrepareForThinLTO) && "Unknown LTO mode"); @@ -1590,8 +1556,8 @@ bool CompilerInvocation::createFromArgs( llvm::Triple::normalize(llvm::sys::getDefaultTargetTriple()); // Parse the arguments - const llvm::opt::OptTable &opts = clang::driver::getDriverOptTable(); - llvm::opt::Visibility visibilityMask(clang::driver::options::FC1Option); + const llvm::opt::OptTable &opts = clang::getDriverOptTable(); + llvm::opt::Visibility visibilityMask(clang::options::FC1Option); unsigned missingArgIndex, missingArgCount; llvm::opt::InputArgList args = opts.ParseArgs( commandLineArgs, missingArgIndex, missingArgCount, visibilityMask); @@ -1604,7 +1570,7 @@ bool CompilerInvocation::createFromArgs( } // Issue errors on unknown arguments - for (const auto *a : args.filtered(clang::driver::options::OPT_UNKNOWN)) { + for (const auto *a : args.filtered(clang::options::OPT_UNKNOWN)) { auto argString = a->getAsString(args); std::string nearest; if (opts.findNearest(argString, nearest, visibilityMask) > 1) @@ -1616,15 +1582,15 @@ bool CompilerInvocation::createFromArgs( } // -flang-experimental-hlfir - if (args.hasArg(clang::driver::options::OPT_flang_experimental_hlfir) || - args.hasArg(clang::driver::options::OPT_emit_hlfir)) { + if (args.hasArg(clang::options::OPT_flang_experimental_hlfir) || + args.hasArg(clang::options::OPT_emit_hlfir)) { invoc.loweringOpts.setLowerToHighLevelFIR(true); } // -flang-deprecated-no-hlfir - if (args.hasArg(clang::driver::options::OPT_flang_deprecated_no_hlfir) && - !args.hasArg(clang::driver::options::OPT_emit_hlfir)) { - if (args.hasArg(clang::driver::options::OPT_flang_experimental_hlfir)) { + if (args.hasArg(clang::options::OPT_flang_deprecated_no_hlfir) && + !args.hasArg(clang::options::OPT_emit_hlfir)) { + if (args.hasArg(clang::options::OPT_flang_experimental_hlfir)) { const unsigned diagID = diags.getCustomDiagID( clang::DiagnosticsEngine::Error, "Options '-flang-experimental-hlfir' and " @@ -1635,13 +1601,13 @@ bool CompilerInvocation::createFromArgs( } // -fno-ppc-native-vector-element-order - if (args.hasArg(clang::driver::options::OPT_fno_ppc_native_vec_elem_order)) { + if (args.hasArg(clang::options::OPT_fno_ppc_native_vec_elem_order)) { invoc.loweringOpts.setNoPPCNativeVecElemOrder(true); } // -f[no-]init-global-zero - if (args.hasFlag(clang::driver::options::OPT_finit_global_zero, - clang::driver::options::OPT_fno_init_global_zero, + if (args.hasFlag(clang::options::OPT_finit_global_zero, + clang::options::OPT_fno_init_global_zero, /*default=*/true)) invoc.loweringOpts.setInitGlobalZero(true); else @@ -1650,8 +1616,8 @@ bool CompilerInvocation::createFromArgs( // Preserve all the remark options requested, i.e. -Rpass, -Rpass-missed or // -Rpass-analysis. This will be used later when processing and outputting the // remarks generated by LLVM in ExecuteCompilerInvocation.cpp. - for (auto *a : args.filtered(clang::driver::options::OPT_R_Group)) { - if (a->getOption().matches(clang::driver::options::OPT_R_value_Group)) + for (auto *a : args.filtered(clang::options::OPT_R_Group)) { + if (a->getOption().matches(clang::options::OPT_R_value_Group)) // This is -Rfoo=, where foo is the name of the diagnostic // group. Add only the remark option name to the diagnostics. e.g. for // -Rpass= we will add the string "pass". @@ -1664,20 +1630,19 @@ bool CompilerInvocation::createFromArgs( } // -frealloc-lhs is the default. - if (!args.hasFlag(clang::driver::options::OPT_frealloc_lhs, - clang::driver::options::OPT_fno_realloc_lhs, true)) + if (!args.hasFlag(clang::options::OPT_frealloc_lhs, + clang::options::OPT_fno_realloc_lhs, true)) invoc.loweringOpts.setReallocateLHS(false); - invoc.loweringOpts.setRepackArrays( - args.hasFlag(clang::driver::options::OPT_frepack_arrays, - clang::driver::options::OPT_fno_repack_arrays, - /*default=*/false)); + invoc.loweringOpts.setRepackArrays(args.hasFlag( + clang::options::OPT_frepack_arrays, clang::options::OPT_fno_repack_arrays, + /*default=*/false)); invoc.loweringOpts.setStackRepackArrays( - args.hasFlag(clang::driver::options::OPT_fstack_repack_arrays, - clang::driver::options::OPT_fno_stack_repack_arrays, + args.hasFlag(clang::options::OPT_fstack_repack_arrays, + clang::options::OPT_fno_stack_repack_arrays, /*default=*/false)); - if (auto *arg = args.getLastArg( - clang::driver::options::OPT_frepack_arrays_contiguity_EQ)) + if (auto *arg = + args.getLastArg(clang::options::OPT_frepack_arrays_contiguity_EQ)) invoc.loweringOpts.setRepackArraysWhole(arg->getValue() == llvm::StringRef{"whole"}); @@ -1697,10 +1662,8 @@ bool CompilerInvocation::createFromArgs( // `mlirArgs`. Instead, you can use // * `-mllvm `, or // * `-mmlir `. - invoc.frontendOpts.llvmArgs = - args.getAllArgValues(clang::driver::options::OPT_mllvm); - invoc.frontendOpts.mlirArgs = - args.getAllArgValues(clang::driver::options::OPT_mmlir); + invoc.frontendOpts.llvmArgs = args.getAllArgValues(clang::options::OPT_mllvm); + invoc.frontendOpts.mlirArgs = args.getAllArgValues(clang::options::OPT_mmlir); success &= parseLangOptionsArgs(invoc, args, diags); @@ -1724,7 +1687,7 @@ bool CompilerInvocation::createFromArgs( } // Process the timing-related options. - if (args.hasArg(clang::driver::options::OPT_ftime_report)) + if (args.hasArg(clang::options::OPT_ftime_report)) invoc.enableTimers = true; invoc.setArgv0(argv0); diff --git a/flang/lib/FrontendTool/CMakeLists.txt b/flang/lib/FrontendTool/CMakeLists.txt index faf56e9d955a1..b69436c36d438 100644 --- a/flang/lib/FrontendTool/CMakeLists.txt +++ b/flang/lib/FrontendTool/CMakeLists.txt @@ -18,5 +18,6 @@ add_flang_library(flangFrontendTool CLANG_LIBS clangBasic + clangOptions clangDriver ) diff --git a/flang/lib/FrontendTool/ExecuteCompilerInvocation.cpp b/flang/lib/FrontendTool/ExecuteCompilerInvocation.cpp index 09ac129d3e689..7586be59ba01b 100644 --- a/flang/lib/FrontendTool/ExecuteCompilerInvocation.cpp +++ b/flang/lib/FrontendTool/ExecuteCompilerInvocation.cpp @@ -23,7 +23,7 @@ #include "mlir/IR/MLIRContext.h" #include "mlir/Pass/PassManager.h" #include "clang/Basic/DiagnosticFrontend.h" -#include "clang/Driver/Options.h" +#include "clang/Options/Options.h" #include "llvm/Option/OptTable.h" #include "llvm/Option/Option.h" #include "llvm/Support/BuryPointer.h" @@ -153,10 +153,10 @@ updateDiagEngineForOptRemarks(clang::DiagnosticsEngine &diagsEng, bool executeCompilerInvocation(CompilerInstance *flang) { // Honor -help. if (flang->getFrontendOpts().showHelp) { - clang::driver::getDriverOptTable().printHelp( + clang::getDriverOptTable().printHelp( llvm::outs(), "flang -fc1 [options] file...", "LLVM 'Flang' Compiler", /*ShowHidden=*/false, /*ShowAllAliases=*/false, - llvm::opt::Visibility(clang::driver::options::FC1Option)); + llvm::opt::Visibility(clang::options::FC1Option)); return true; } diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp index ce05ab22c880f..9cdd46137adbf 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp @@ -406,6 +406,11 @@ bool ClauseProcessor::processMergeable( return markClauseOccurrence(result.mergeable); } +bool ClauseProcessor::processNogroup( + mlir::omp::NogroupClauseOps &result) const { + return markClauseOccurrence(result.nogroup); +} + bool ClauseProcessor::processNowait(mlir::omp::NowaitClauseOps &result) const { return markClauseOccurrence(result.nowait); } diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.h b/flang/lib/Lower/OpenMP/ClauseProcessor.h index fecf3ca4af9dd..00cf9bbe4c48b 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.h +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.h @@ -89,6 +89,7 @@ class ClauseProcessor { bool processInclusive(mlir::Location currentLocation, mlir::omp::InclusiveClauseOps &result) const; bool processMergeable(mlir::omp::MergeableClauseOps &result) const; + bool processNogroup(mlir::omp::NogroupClauseOps &result) const; bool processNowait(mlir::omp::NowaitClauseOps &result) const; bool processNumTasks(lower::StatementContext &stmtCtx, mlir::omp::NumTasksClauseOps &result) const; diff --git a/flang/lib/Lower/OpenMP/Clauses.cpp b/flang/lib/Lower/OpenMP/Clauses.cpp index 2575b7081039b..002b7c1888e73 100644 --- a/flang/lib/Lower/OpenMP/Clauses.cpp +++ b/flang/lib/Lower/OpenMP/Clauses.cpp @@ -396,8 +396,6 @@ makePrescriptiveness(parser::OmpPrescriptiveness::Value v) { switch (v) { case parser::OmpPrescriptiveness::Value::Strict: return clause::Prescriptiveness::Strict; - case parser::OmpPrescriptiveness::Value::Fallback: - return clause::Prescriptiveness::Fallback; } llvm_unreachable("Unexpected prescriptiveness"); } @@ -799,21 +797,31 @@ DynGroupprivate make(const parser::OmpClause::DynGroupprivate &inp, semantics::SemanticsContext &semaCtx) { // imp.v -> OmpDyngroupprivateClause CLAUSET_ENUM_CONVERT( // - convert, parser::OmpAccessGroup::Value, DynGroupprivate::AccessGroup, + makeAccessGroup, parser::OmpAccessGroup::Value, + DynGroupprivate::AccessGroup, // clang-format off MS(Cgroup, Cgroup) // clang-format on ); + CLAUSET_ENUM_CONVERT( // + makeFallback, parser::OmpFallbackModifier::Value, + DynGroupprivate::Fallback, + // clang-format off + MS(Abort, Abort) + MS(Default_Mem, Default_Mem) + MS(Null, Null) + // clang-format on + ); + auto &mods = semantics::OmpGetModifiers(inp.v); auto *m0 = semantics::OmpGetUniqueModifier(mods); - auto *m1 = semantics::OmpGetUniqueModifier(mods); + auto *m1 = semantics::OmpGetUniqueModifier(mods); auto &size = std::get(inp.v.t); - return DynGroupprivate{ - {/*AccessGroup=*/maybeApplyToV(convert, m0), - /*Prescriptiveness=*/maybeApplyToV(makePrescriptiveness, m1), - /*Size=*/makeExpr(size, semaCtx)}}; + return DynGroupprivate{{/*AccessGroup=*/maybeApplyToV(makeAccessGroup, m0), + /*Fallback=*/maybeApplyToV(makeFallback, m1), + /*Size=*/makeExpr(size, semaCtx)}}; } Enter make(const parser::OmpClause::Enter &inp, diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp index b0dec20ca3a1f..472273e6e16e1 100644 --- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp +++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp @@ -347,7 +347,8 @@ void DataSharingProcessor::insertLastPrivateCompare(mlir::Operation *op) { if (!hasLastPrivate) return; - if (mlir::isa(op) || mlir::isa(op)) { + if (mlir::isa(op) || mlir::isa(op) || + mlir::isa(op)) { mlir::omp::LoopRelatedClauseOps result; llvm::SmallVector iv; collectLoopRelatedInfo(converter, converter.getCurrentLocation(), eval, @@ -413,7 +414,7 @@ void DataSharingProcessor::insertLastPrivateCompare(mlir::Operation *op) { } else { TODO(converter.getCurrentLocation(), "lastprivate clause in constructs other than " - "simd/worksharing-loop"); + "simd/worksharing-loop/taskloop"); } } diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index ea1b53fb5185f..d1366443c1647 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -1770,25 +1770,27 @@ static void genTaskgroupClauses( cp.processTaskReduction(loc, clauseOps, taskReductionSyms); } -static void genTaskloopClauses(lower::AbstractConverter &converter, - semantics::SemanticsContext &semaCtx, - lower::StatementContext &stmtCtx, - const List &clauses, mlir::Location loc, - mlir::omp::TaskloopOperands &clauseOps) { +static void genTaskloopClauses( + lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, + lower::StatementContext &stmtCtx, const List &clauses, + mlir::Location loc, mlir::omp::TaskloopOperands &clauseOps, + llvm::SmallVectorImpl &reductionSyms, + llvm::SmallVectorImpl &inReductionSyms) { ClauseProcessor cp(converter, semaCtx, clauses); cp.processAllocate(clauseOps); cp.processFinal(stmtCtx, clauseOps); cp.processGrainsize(stmtCtx, clauseOps); cp.processIf(llvm::omp::Directive::OMPD_taskloop, clauseOps); + cp.processInReduction(loc, clauseOps, inReductionSyms); cp.processMergeable(clauseOps); + cp.processNogroup(clauseOps); cp.processNumTasks(stmtCtx, clauseOps); cp.processPriority(stmtCtx, clauseOps); + cp.processReduction(loc, clauseOps, reductionSyms); cp.processUntied(clauseOps); - cp.processTODO( - loc, llvm::omp::Directive::OMPD_taskloop); + cp.processTODO(loc, llvm::omp::Directive::OMPD_taskloop); } static void genTaskwaitClauses(lower::AbstractConverter &converter, @@ -2999,8 +3001,11 @@ static mlir::omp::TaskloopOp genStandaloneTaskloop( lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { mlir::omp::TaskloopOperands taskloopClauseOps; + llvm::SmallVector reductionSyms; + llvm::SmallVector inReductionSyms; + genTaskloopClauses(converter, semaCtx, stmtCtx, item->clauses, loc, - taskloopClauseOps); + taskloopClauseOps, reductionSyms, inReductionSyms); DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval, /*shouldCollectPreDeterminedSymbols=*/true, enableDelayedPrivatization, symTable); @@ -3015,6 +3020,10 @@ static mlir::omp::TaskloopOp genStandaloneTaskloop( EntryBlockArgs taskloopArgs; taskloopArgs.priv.syms = dsp.getDelayedPrivSymbols(); taskloopArgs.priv.vars = taskloopClauseOps.privateVars; + taskloopArgs.reduction.syms = reductionSyms; + taskloopArgs.reduction.vars = taskloopClauseOps.reductionVars; + taskloopArgs.inReduction.syms = inReductionSyms; + taskloopArgs.inReduction.vars = taskloopClauseOps.inReductionVars; auto taskLoopOp = genWrapperOp( converter, loc, taskloopClauseOps, taskloopArgs); diff --git a/flang/lib/Optimizer/CodeGen/PassDetail.h b/flang/lib/Optimizer/CodeGen/PassDetail.h index f7030131beff9..252da029dc0c8 100644 --- a/flang/lib/Optimizer/CodeGen/PassDetail.h +++ b/flang/lib/Optimizer/CodeGen/PassDetail.h @@ -18,7 +18,7 @@ namespace fir { -#define GEN_PASS_CLASSES +#define GEN_PASS_DECL #include "flang/Optimizer/CodeGen/CGPasses.h.inc" } // namespace fir diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index 4374acbbe51bf..e2da60ed19de8 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -791,6 +791,12 @@ TYPE_PARSER(construct(OmpDirectiveNameParser{})) TYPE_PARSER(construct( // "PRESENT" >> pure(OmpExpectation::Value::Present))) +TYPE_PARSER(construct("FALLBACK"_tok >> + parenthesized( // + "ABORT" >> pure(OmpFallbackModifier::Value::Abort) || + "DEFAULT_MEM" >> pure(OmpFallbackModifier::Value::Default_Mem) || + "NULL" >> pure(OmpFallbackModifier::Value::Null)))) + TYPE_PARSER(construct( construct(charLiteralConstant) || construct(scalarIntConstantExpr))) @@ -857,8 +863,7 @@ TYPE_PARSER(construct( "SIMD" >> pure(OmpOrderingModifier::Value::Simd))) TYPE_PARSER(construct( - "STRICT" >> pure(OmpPrescriptiveness::Value::Strict) || - "FALLBACK" >> pure(OmpPrescriptiveness::Value::Fallback))) + "STRICT" >> pure(OmpPrescriptiveness::Value::Strict))) TYPE_PARSER(construct( // "PRESENT" >> pure(OmpPresentModifier::Value::Present))) @@ -925,7 +930,7 @@ TYPE_PARSER( // sourced(construct( Parser{})) || sourced(construct( - Parser{}))) + Parser{}))) TYPE_PARSER( sourced(construct(Parser{}))) diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp index dc0f083c9fc95..53e74298f96ac 100644 --- a/flang/lib/Parser/unparse.cpp +++ b/flang/lib/Parser/unparse.cpp @@ -2286,6 +2286,11 @@ class UnparseVisitor { Walk(std::get(x.t)); Walk(": ", std::get>>(x.t)); } + void Unparse(const OmpFallbackModifier &x) { + Word("FALLBACK("); + Walk(x.v); + Put(")"); + } void Unparse(const OmpDynGroupprivateClause &x) { using Modifier = OmpDynGroupprivateClause::Modifier; Walk(std::get>>(x.t), ": "); @@ -2795,6 +2800,7 @@ class UnparseVisitor { OmpDeviceTypeClause, DeviceTypeDescription) // OMP device_type WALK_NESTED_ENUM(OmpReductionModifier, Value) // OMP reduction-modifier WALK_NESTED_ENUM(OmpExpectation, Value) // OMP motion-expectation + WALK_NESTED_ENUM(OmpFallbackModifier, Value) // OMP fallback-modifier WALK_NESTED_ENUM(OmpInteropType, Value) // OMP InteropType WALK_NESTED_ENUM(OmpOrderClause, Ordering) // OMP ordering WALK_NESTED_ENUM(OmpOrderModifier, Value) // OMP order-modifier diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index 4bd4fe79763d8..eaf1faac5e2e2 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -682,6 +682,13 @@ void OmpStructureChecker::Enter(const parser::OmpClause::Hint &x) { } } +void OmpStructureChecker::Enter(const parser::OmpClause::DynGroupprivate &x) { + CheckAllowedClause(llvm::omp::Clause::OMPC_dyn_groupprivate); + parser::CharBlock source{GetContext().clauseSource}; + + OmpVerifyModifiers(x.v, llvm::omp::OMPC_dyn_groupprivate, source, context_); +} + void OmpStructureChecker::Enter(const parser::OmpDirectiveSpecification &x) { // OmpDirectiveSpecification exists on its own only in METADIRECTIVE. // In other cases it's a part of other constructs that handle directive @@ -3332,6 +3339,32 @@ void OmpStructureChecker::Leave(const parser::OmpClauseList &) { } } + // Default access-group for DYN_GROUPPRIVATE is "cgroup". On a given + // construct there can be at most one DYN_GROUPPRIVATE with a given + // access-group. + const parser::OmpClause + *accGrpClause[parser::OmpAccessGroup::Value_enumSize] = {nullptr}; + for (auto [_, clause] : + FindClauses(llvm::omp::Clause::OMPC_dyn_groupprivate)) { + auto &wrapper{std::get(clause->u)}; + auto &modifiers{OmpGetModifiers(wrapper.v)}; + auto accGrp{parser::OmpAccessGroup::Value::Cgroup}; + if (auto *ag{OmpGetUniqueModifier(modifiers)}) { + accGrp = ag->v; + } + auto &firstClause{accGrpClause[llvm::to_underlying(accGrp)]}; + if (firstClause) { + context_ + .Say(clause->source, + "The access-group modifier can only occur on a single clause in a construct"_err_en_US) + .Attach(firstClause->source, + "Previous clause with access-group modifier"_en_US); + break; + } else { + firstClause = clause; + } + } + CheckRequireAtLeastOneOf(); } @@ -4689,10 +4722,12 @@ void OmpStructureChecker::Enter(const parser::OmpClause::Copyin &x) { void OmpStructureChecker::CheckStructureComponent( const parser::OmpObjectList &objects, llvm::omp::Clause clauseId) { auto CheckComponent{[&](const parser::Designator &designator) { - if (auto *dataRef{std::get_if(&designator.u)}) { + if (const parser::DataRef *dataRef{ + std::get_if(&designator.u)}) { if (!IsDataRefTypeParamInquiry(dataRef)) { - if (auto *comp{parser::Unwrap(*dataRef)}) { - context_.Say(comp->component.source, + const auto expr{AnalyzeExpr(context_, designator)}; + if (expr.has_value() && evaluate::HasStructureComponent(expr.value())) { + context_.Say(designator.source, "A variable that is part of another variable cannot appear on the %s clause"_err_en_US, parser::ToUpperCaseLetters(getClauseName(clauseId).str())); } @@ -5489,7 +5524,6 @@ CHECK_SIMPLE_CLAUSE(Default, OMPC_default) CHECK_SIMPLE_CLAUSE(Depobj, OMPC_depobj) CHECK_SIMPLE_CLAUSE(DeviceType, OMPC_device_type) CHECK_SIMPLE_CLAUSE(DistSchedule, OMPC_dist_schedule) -CHECK_SIMPLE_CLAUSE(DynGroupprivate, OMPC_dyn_groupprivate) CHECK_SIMPLE_CLAUSE(Exclusive, OMPC_exclusive) CHECK_SIMPLE_CLAUSE(Fail, OMPC_fail) CHECK_SIMPLE_CLAUSE(Filter, OMPC_filter) diff --git a/flang/lib/Semantics/openmp-modifiers.cpp b/flang/lib/Semantics/openmp-modifiers.cpp index 717fb0351ba5b..f191b4de2d579 100644 --- a/flang/lib/Semantics/openmp-modifiers.cpp +++ b/flang/lib/Semantics/openmp-modifiers.cpp @@ -74,6 +74,22 @@ unsigned OmpModifierDescriptor::since(llvm::omp::Clause id) const { // Note: The intent for these functions is to have them be automatically- // generated in the future. +template <> +const OmpModifierDescriptor &OmpGetDescriptor() { + static const OmpModifierDescriptor desc{ + /*name=*/"access-group", + /*props=*/ + { + {61, {OmpProperty::Unique}}, + }, + /*clauses=*/ + { + {61, {Clause::OMPC_dyn_groupprivate}}, + }, + }; + return desc; +} + template <> const OmpModifierDescriptor &OmpGetDescriptor() { static const OmpModifierDescriptor desc{ @@ -321,6 +337,22 @@ const OmpModifierDescriptor &OmpGetDescriptor() { return desc; } +template <> +const OmpModifierDescriptor &OmpGetDescriptor() { + static const OmpModifierDescriptor desc{ + /*name=*/"fallback-modifier", + /*props=*/ + { + {61, {OmpProperty::Unique}}, + }, + /*clauses=*/ + { + {61, {Clause::OMPC_dyn_groupprivate}}, + }, + }; + return desc; +} + template <> const OmpModifierDescriptor &OmpGetDescriptor() { static const OmpModifierDescriptor desc{ diff --git a/flang/test/Lower/OpenMP/Todo/taskloop-inreduction.f90 b/flang/test/Lower/OpenMP/Todo/taskloop-inreduction.f90 deleted file mode 100644 index 8acc399a92abe..0000000000000 --- a/flang/test/Lower/OpenMP/Todo/taskloop-inreduction.f90 +++ /dev/null @@ -1,13 +0,0 @@ -! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -fopenmp-version=50 -o - %s 2>&1 | FileCheck %s -! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -fopenmp-version=50 -o - %s 2>&1 | FileCheck %s - -! CHECK: not yet implemented: Unhandled clause IN_REDUCTION in TASKLOOP construct -subroutine omp_taskloop_inreduction() - integer x - x = 0 - !$omp taskloop in_reduction(+:x) - do i = 1, 100 - x = x + 1 - end do - !$omp end taskloop -end subroutine omp_taskloop_inreduction diff --git a/flang/test/Lower/OpenMP/Todo/taskloop-lastprivate.f90 b/flang/test/Lower/OpenMP/Todo/taskloop-lastprivate.f90 deleted file mode 100644 index 54f2580daf283..0000000000000 --- a/flang/test/Lower/OpenMP/Todo/taskloop-lastprivate.f90 +++ /dev/null @@ -1,13 +0,0 @@ -! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s -! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s - -! CHECK: not yet implemented: Unhandled clause LASTPRIVATE in TASKLOOP construct -subroutine omp_taskloop_lastprivate() - integer x - x = 0 - !$omp taskloop lastprivate(x) - do i = 1, 100 - x = x + 1 - end do - !$omp end taskloop -end subroutine omp_taskloop_lastprivate diff --git a/flang/test/Lower/OpenMP/Todo/taskloop-nogroup.f90 b/flang/test/Lower/OpenMP/Todo/taskloop-nogroup.f90 deleted file mode 100644 index 2a0c5985290e2..0000000000000 --- a/flang/test/Lower/OpenMP/Todo/taskloop-nogroup.f90 +++ /dev/null @@ -1,13 +0,0 @@ -! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s -! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s - -! CHECK: not yet implemented: Unhandled clause NOGROUP in TASKLOOP construct -subroutine omp_taskloop_nogroup() - integer x - x = 0 - !$omp taskloop nogroup - do i = 1, 100 - x = x + 1 - end do - !$omp end taskloop -end subroutine omp_taskloop_nogroup diff --git a/flang/test/Lower/OpenMP/Todo/taskloop-reduction.f90 b/flang/test/Lower/OpenMP/Todo/taskloop-reduction.f90 deleted file mode 100644 index 0c16bd227257f..0000000000000 --- a/flang/test/Lower/OpenMP/Todo/taskloop-reduction.f90 +++ /dev/null @@ -1,13 +0,0 @@ -! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -fopenmp-version=50 -o - %s 2>&1 | FileCheck %s -! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -fopenmp-version=50 -o - %s 2>&1 | FileCheck %s - -! CHECK: not yet implemented: Unhandled clause REDUCTION in TASKLOOP construct -subroutine omp_taskloop_reduction() - integer x - x = 0 - !$omp taskloop reduction(+:x) - do i = 1, 100 - x = x + 1 - end do - !$omp end taskloop -end subroutine omp_taskloop_reduction diff --git a/flang/test/Lower/OpenMP/taskloop.f90 b/flang/test/Lower/OpenMP/taskloop.f90 index 4a06e4def0c83..d23eef2d4ac2d 100644 --- a/flang/test/Lower/OpenMP/taskloop.f90 +++ b/flang/test/Lower/OpenMP/taskloop.f90 @@ -2,6 +2,15 @@ ! RUN: bbc -emit-hlfir %openmp_flags -o - %s 2>&1 | FileCheck %s ! RUN: %flang_fc1 -emit-hlfir %openmp_flags -o - %s 2>&1 | FileCheck %s +! CHECK-LABEL: omp.private +! CHECK-SAME: {type = private} @[[LAST_PRIVATE_I:.*]] : i32 + +! CHECK-LABEL: omp.private +! CHECK-SAME: {type = private} @[[LAST_PRIVATE_X:.*]] : i32 + +! CHECK-LABEL: omp.private +! CHECK-SAME: {type = private} @[[QFOMP_TASKLOOP_NOGROUPEI_PRIVATE_I32:.*]] : i32 + ! CHECK-LABEL: omp.private ! CHECK-SAME: {type = private} @[[OMP_TASKLOOP_UNTIEDEI_PRIVATE_I32:.*]] : i32 @@ -195,3 +204,46 @@ subroutine omp_taskloop_untied() end do !$omp end taskloop end subroutine + +!=============================================================================== +! `nogroup` clause +!=============================================================================== + +subroutine omp_taskloop_nogroup() + ! CHECK: omp.taskloop nogroup + !$omp taskloop nogroup + do i = 1, 10 + call foo() + end do + !$omp end taskloop +end subroutine + +!=============================================================================== +! `lastprivate` clause +!=============================================================================== + +! CHECK-LABEL: func.func @_QPomp_taskloop_lastprivate +! CHECK: %[[ALLOCA_I:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFomp_taskloop_lastprivateEi"} +! CHECK: %[[DECL_I:.*]]:2 = hlfir.declare %[[ALLOCA_I]] {uniq_name = "_QFomp_taskloop_lastprivateEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[ALLOCA_X:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFomp_taskloop_lastprivateEx"} +! CHECK: %[[DECL_X:.*]]:2 = hlfir.declare %[[ALLOCA_X]] {uniq_name = "_QFomp_taskloop_lastprivateEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +subroutine omp_taskloop_lastprivate() + integer x + x = 0 + ! CHECK: omp.taskloop private(@[[LAST_PRIVATE_X]] %[[DECL_X]]#0 -> %[[ARG0]], @[[LAST_PRIVATE_I]] %[[DECL_I]]#0 -> %[[ARG1]] : !fir.ref, !fir.ref) { + !$omp taskloop lastprivate(x) + do i = 1, 100 + ! CHECK: %[[DECL_ARG0:.*]]:2 = hlfir.declare %[[ARG0]] {uniq_name = "_QFomp_taskloop_lastprivateEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) + ! CHECK: %[[LOAD_ARG0:.*]] = fir.load %[[DECL_ARG0]]#0 : !fir.ref + ! CHECK: %[[RES_ADD:.*]] = arith.addi %[[LOAD_ARG0]], %{{.*}} : i32 + ! CHECK: hlfir.assign %[[RES_ADD]] to %[[DECL_ARG0]]#0 : i32, !fir.ref + x = x + 1 + ! CHECK: %[[SELCT_RESULT:.*]] = arith.select %{{.*}}, %{{.*}}, %{{.*}} : i1 + ! CHECK: fir.if %[[SELCT_RESULT]] { + ! CHECK: %[[LOADED_SUM:.*]] = fir.load %[[DECL_ARG0]]#0 : !fir.ref + ! CHECK: hlfir.assign %[[LOADED_SUM]] to %[[DECL_X]]#0 : i32, !fir.ref + ! CHECK: } + ! CHECK: omp.yield + end do + !$omp end taskloop +end subroutine omp_taskloop_lastprivate diff --git a/flang/test/Lower/taskloop-inreduction.f90 b/flang/test/Lower/taskloop-inreduction.f90 new file mode 100644 index 0000000000000..e7d3f96115fbd --- /dev/null +++ b/flang/test/Lower/taskloop-inreduction.f90 @@ -0,0 +1,40 @@ +! RUN: bbc -emit-hlfir -fopenmp -fopenmp-version=50 -o - %s 2>&1 | FileCheck %s +! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=50 -o - %s 2>&1 | FileCheck %s + +! CHECK-LABEL: omp.private +! CHECK-SAME: {type = private} @[[PRIVATE_I:.*]] : i32 + +! CHECK-LABEL: omp.declare_reduction +! CHECK-SAME: @[[ADD_RED_I32:.*]] : i32 init { +! CHECK: ^bb0(%{{.*}}: i32): +! CHECK: %[[C0_I32:.*]] = arith.constant 0 : i32 +! CHECK: omp.yield(%[[C0_I32]] : i32) +! CHECK: } combiner { +! CHECK: ^bb0(%{{.*}}: i32, %{{.*}}: i32): +! CHECK: %[[RES:.*]] = arith.addi %{{.*}}, %{{.*}} : i32 +! CHECK: omp.yield(%[[RES]] : i32) +! CHECK: } + +! CHECK-LABEL: func.func @_QPomp_taskloop_inreduction +! CHECK: %[[ALLOCA_I:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFomp_taskloop_inreductionEi"} +! CHECK: %[[DECL_I:.*]]:2 = hlfir.declare %[[ALLOCA_I]] {uniq_name = "_QFomp_taskloop_inreductionEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[ALLOCA_X:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFomp_taskloop_inreductionEx"} +! CHECK: %[[DECL_X:.*]]:2 = hlfir.declare %[[ALLOCA_X]] {uniq_name = "_QFomp_taskloop_inreductionEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[INIT_X:.*]] = arith.constant 0 : i32 +! CHECK: hlfir.assign %[[INIT_X]] to %[[DECL_X]]#0 : i32, !fir.ref +subroutine omp_taskloop_inreduction() + integer x + x = 0 + ! CHECK: omp.taskloop in_reduction(@[[ADD_RED_I32]] + ! CHECK: %[[DECL_X]]#0 -> %[[ARG0:.*]] : !fir.ref) private(@[[PRIVATE_I]] %[[DECL_I]]#0 -> %[[ARG1:.*]] : !fir.ref) { + ! CHECK: %[[VAL_ARG1:.*]]:2 = hlfir.declare %[[ARG0]] + ! CHECK-SAME: {uniq_name = "_QFomp_taskloop_inreductionEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) + !$omp taskloop in_reduction(+:x) + do i = 1, 100 + ! CHECK: %[[X_VAL:.*]] = fir.load %[[VAL_ARG1]]#0 : !fir.ref + ! CHECK: %[[ADD_VAL:.*]] = arith.addi %[[X_VAL]], %{{.*}} : i32 + x = x + 1 + ! CHECK: hlfir.assign %[[ADD_VAL]] to %[[VAL_ARG1]]#0 : i32, !fir.ref + end do + !$omp end taskloop +end subroutine omp_taskloop_inreduction diff --git a/flang/test/Lower/taskloop-reduction.f90 b/flang/test/Lower/taskloop-reduction.f90 new file mode 100644 index 0000000000000..e45c0181bcc8b --- /dev/null +++ b/flang/test/Lower/taskloop-reduction.f90 @@ -0,0 +1,39 @@ +! RUN: bbc -emit-hlfir -fopenmp -fopenmp-version=50 -o - %s 2>&1 | FileCheck %s +! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=50 -o - %s 2>&1 | FileCheck %s + +! CHECK-LABEL: omp.private +! CHECK-SAME: {type = private} @[[PRIVATE_I:.*]] : i32 + +! CHECK-LABEL: omp.declare_reduction +! CHECK-SAME: @[[ADD_RED_I32:.*]] : i32 init { +! CHECK: ^bb0(%{{.*}}: i32): +! CHECK: %[[C0_I32:.*]] = arith.constant 0 : i32 +! CHECK: omp.yield(%[[C0_I32]] : i32) +! CHECK: } combiner { +! CHECK: ^bb0(%{{.*}}: i32, %{{.*}}: i32): +! CHECK: %[[RES:.*]] = arith.addi %{{.*}}, %{{.*}} : i32 +! CHECK: omp.yield(%[[RES]] : i32) +! CHECK: } + +! CHECK-LABEL: func.func @_QPomp_taskloop_reduction +! CHECK: %[[ALLOCA_I:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFomp_taskloop_reductionEi"} +! CHECK: %[[DECL_I:.*]]:2 = hlfir.declare %[[ALLOCA_I]] {uniq_name = "_QFomp_taskloop_reductionEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[ALLOCA_X:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFomp_taskloop_reductionEx"} +! CHECK: %[[DECL_X:.*]]:2 = hlfir.declare %[[ALLOCA_X]] {uniq_name = "_QFomp_taskloop_reductionEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[INIT_X:.*]] = arith.constant 0 : i32 +! CHECK: hlfir.assign %[[INIT_X]] to %[[DECL_X]]#0 : i32, !fir.ref +subroutine omp_taskloop_reduction() + integer x + x = 0 + ! CHECK: omp.taskloop private(@[[PRIVATE_I]] + ! CHECK-SAME: %[[DECL_I]]#0 -> %[[ARG0:.*]] : !fir.ref) reduction(@[[ADD_RED_I32]] %[[DECL_X]]#0 -> %[[ARG1:.*]] : !fir.ref) { + ! CHECK: %[[VAL_ARG1:.*]]:2 = hlfir.declare %[[ARG1]] + !$omp taskloop reduction(+:x) + do i = 1, 100 + ! CHECK: %[[X_VAL:.*]] = fir.load %[[VAL_ARG1]]#0 : !fir.ref + ! CHECK: %[[ADD_VAL:.*]] = arith.addi %[[X_VAL]], %{{.*}} : i32 + x = x + 1 + ! CHECK: hlfir.assign %[[ADD_VAL]] to %[[VAL_ARG1]]#0 : i32, !fir.ref + end do + !$omp end taskloop +end subroutine omp_taskloop_reduction diff --git a/flang/test/Parser/OpenMP/dyn-groupprivate-clause.f90 b/flang/test/Parser/OpenMP/dyn-groupprivate-clause.f90 index 7d41efd348e50..599821dbe3377 100644 --- a/flang/test/Parser/OpenMP/dyn-groupprivate-clause.f90 +++ b/flang/test/Parser/OpenMP/dyn-groupprivate-clause.f90 @@ -26,21 +26,21 @@ subroutine f00(n) subroutine f01(n) implicit none integer :: n - !$omp target dyn_groupprivate(strict: n) + !$omp target dyn_groupprivate(fallback(abort): n) !$omp end target end !UNPARSE: SUBROUTINE f01 (n) !UNPARSE: IMPLICIT NONE !UNPARSE: INTEGER n -!UNPARSE: !$OMP TARGET DYN_GROUPPRIVATE(STRICT: n) +!UNPARSE: !$OMP TARGET DYN_GROUPPRIVATE(FALLBACK(ABORT): n) !UNPARSE: !$OMP END TARGET !UNPARSE: END SUBROUTINE !PARSE-TREE: OmpBeginDirective !PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: | OmpClauseList -> OmpClause -> DynGroupprivate -> OmpDynGroupprivateClause -!PARSE-TREE: | | Modifier -> OmpPrescriptiveness -> Value = Strict +!PARSE-TREE: | | Modifier -> OmpFallbackModifier -> Value = Abort !PARSE-TREE: | | Scalar -> Integer -> Expr = 'n' !PARSE-TREE: | | | Designator -> DataRef -> Name = 'n' !PARSE-TREE: | Flags = None @@ -49,21 +49,21 @@ subroutine f01(n) subroutine f02(n) implicit none integer :: n - !$omp target dyn_groupprivate(fallback, cgroup: n) + !$omp target dyn_groupprivate(fallback(default_mem), cgroup: n) !$omp end target end !UNPARSE: SUBROUTINE f02 (n) !UNPARSE: IMPLICIT NONE !UNPARSE: INTEGER n -!UNPARSE: !$OMP TARGET DYN_GROUPPRIVATE(FALLBACK, CGROUP: n) +!UNPARSE: !$OMP TARGET DYN_GROUPPRIVATE(FALLBACK(DEFAULT_MEM), CGROUP: n) !UNPARSE: !$OMP END TARGET !UNPARSE: END SUBROUTINE !PARSE-TREE: OmpBeginDirective !PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: | OmpClauseList -> OmpClause -> DynGroupprivate -> OmpDynGroupprivateClause -!PARSE-TREE: | | Modifier -> OmpPrescriptiveness -> Value = Fallback +!PARSE-TREE: | | Modifier -> OmpFallbackModifier -> Value = Default_Mem !PARSE-TREE: | | Modifier -> OmpAccessGroup -> Value = Cgroup !PARSE-TREE: | | Scalar -> Integer -> Expr = 'n' !PARSE-TREE: | | | Designator -> DataRef -> Name = 'n' diff --git a/flang/test/Semantics/OpenMP/dyn-groupprivate.f90 b/flang/test/Semantics/OpenMP/dyn-groupprivate.f90 new file mode 100644 index 0000000000000..f77a0b0d35f44 --- /dev/null +++ b/flang/test/Semantics/OpenMP/dyn-groupprivate.f90 @@ -0,0 +1,8 @@ +!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=61 + +subroutine f00(x) + integer :: x + !ERROR: The access-group modifier can only occur on a single clause in a construct + !$omp target dyn_groupprivate(cgroup: x), dyn_groupprivate(10) + !$omp end target +end diff --git a/flang/test/Semantics/OpenMP/in-reduction.f90 b/flang/test/Semantics/OpenMP/in-reduction.f90 index 1b82134b7104b..3f1e735214061 100644 --- a/flang/test/Semantics/OpenMP/in-reduction.f90 +++ b/flang/test/Semantics/OpenMP/in-reduction.f90 @@ -47,6 +47,7 @@ subroutine f06 integer :: a(10) end type type(t) :: x +!ERROR: A variable that is part of another variable cannot appear on the IN_REDUCTION clause !ERROR: The base expression of an array element or section in IN_REDUCTION clause must be an identifier !$omp target in_reduction(+: x%a(2)) !$omp end target @@ -57,6 +58,7 @@ subroutine f07 integer :: a(10) end type type(t) :: x +!ERROR: A variable that is part of another variable cannot appear on the IN_REDUCTION clause !ERROR: The base expression of an array element or section in IN_REDUCTION clause must be an identifier !$omp target in_reduction(+: x%a(1:10)) !$omp end target diff --git a/flang/test/Semantics/OpenMP/reduction15.f90 b/flang/test/Semantics/OpenMP/reduction15.f90 index 1d4de6ff702bb..61fa417f1111c 100644 --- a/flang/test/Semantics/OpenMP/reduction15.f90 +++ b/flang/test/Semantics/OpenMP/reduction15.f90 @@ -13,6 +13,7 @@ module m subroutine f00 type(t) :: x + !ERROR: A variable that is part of another variable cannot appear on the REDUCTION clause !ERROR: The base expression of an array element or section in REDUCTION clause must be an identifier !$omp do reduction (+ : x%a(2)) do i = 1, 10 @@ -22,6 +23,7 @@ subroutine f00 subroutine f01 type(t) :: x + !ERROR: A variable that is part of another variable cannot appear on the REDUCTION clause !ERROR: The base expression of an array element or section in REDUCTION clause must be an identifier !$omp do reduction (+ : x%a(1:10)) do i = 1, 10 diff --git a/flang/test/Semantics/OpenMP/reduction17.f90 b/flang/test/Semantics/OpenMP/reduction17.f90 new file mode 100644 index 0000000000000..5b6e8e977f46c --- /dev/null +++ b/flang/test/Semantics/OpenMP/reduction17.f90 @@ -0,0 +1,18 @@ +! Test that Structure Component Array Elements are caught by Semantics and return an error +! RUN: %python %S/../test_errors.py %s %flang_fc1 -fopenmp -fopenmp-version=45 + +type test_type + integer :: array(2) +end type + +contains + subroutine test + type(test_type) :: x + + !ERROR: A variable that is part of another variable cannot appear on the REDUCTION clause + !$omp do reduction(+: x%array(2)) + do i=1, 2 + end do + !$omp end do + end subroutine +end diff --git a/flang/test/Semantics/OpenMP/task-reduction.f90 b/flang/test/Semantics/OpenMP/task-reduction.f90 index 5a18ee48e7728..f76b07ae568f4 100644 --- a/flang/test/Semantics/OpenMP/task-reduction.f90 +++ b/flang/test/Semantics/OpenMP/task-reduction.f90 @@ -47,6 +47,7 @@ subroutine f06 integer :: a(10) end type type(t) :: x +!ERROR: A variable that is part of another variable cannot appear on the TASK_REDUCTION clause !ERROR: The base expression of an array element or section in TASK_REDUCTION clause must be an identifier !$omp taskgroup task_reduction(+: x%a(2)) !$omp end taskgroup @@ -57,6 +58,7 @@ subroutine f07 integer :: a(10) end type type(t) :: x +!ERROR: A variable that is part of another variable cannot appear on the TASK_REDUCTION clause !ERROR: The base expression of an array element or section in TASK_REDUCTION clause must be an identifier !$omp taskgroup task_reduction(+: x%a(1:10)) !$omp end taskgroup diff --git a/flang/tools/flang-driver/CMakeLists.txt b/flang/tools/flang-driver/CMakeLists.txt index 801fc324e888d..3caec17b3edbb 100644 --- a/flang/tools/flang-driver/CMakeLists.txt +++ b/flang/tools/flang-driver/CMakeLists.txt @@ -26,6 +26,7 @@ target_link_libraries(flang clang_target_link_libraries(flang PRIVATE clangDriver + clangOptions clangBasic ) diff --git a/flang/tools/flang-driver/driver.cpp b/flang/tools/flang-driver/driver.cpp index bd878b7a642f1..0840255a739f3 100644 --- a/flang/tools/flang-driver/driver.cpp +++ b/flang/tools/flang-driver/driver.cpp @@ -52,9 +52,9 @@ createAndPopulateDiagOpts(llvm::ArrayRef argv) { // Any errors that would be diagnosed here will also be diagnosed later, // when the DiagnosticsEngine actually exists. unsigned missingArgIndex, missingArgCount; - llvm::opt::InputArgList args = clang::driver::getDriverOptTable().ParseArgs( + llvm::opt::InputArgList args = clang::getDriverOptTable().ParseArgs( argv.slice(1), missingArgIndex, missingArgCount, - llvm::opt::Visibility(clang::driver::options::FlangOption)); + llvm::opt::Visibility(clang::options::FlangOption)); (void)Fortran::frontend::parseDiagnosticArgs(*diagOpts, args); diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt index 3845ec8376794..6d3036dfedddf 100644 --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -859,7 +859,6 @@ set(files __type_traits/is_reference.h __type_traits/is_reference_wrapper.h __type_traits/is_referenceable.h - __type_traits/is_replaceable.h __type_traits/is_same.h __type_traits/is_scalar.h __type_traits/is_signed.h diff --git a/libcxx/include/__exception/exception_ptr.h b/libcxx/include/__exception/exception_ptr.h index aef036a2c9586..92ff5c701e0d3 100644 --- a/libcxx/include/__exception/exception_ptr.h +++ b/libcxx/include/__exception/exception_ptr.h @@ -75,9 +75,7 @@ class _LIBCPP_EXPORTED_FROM_ABI exception_ptr { public: // exception_ptr is basically a COW string so it is trivially relocatable. - // It is also replaceable because assignment has normal value semantics. using __trivially_relocatable _LIBCPP_NODEBUG = exception_ptr; - using __replaceable _LIBCPP_NODEBUG = exception_ptr; _LIBCPP_HIDE_FROM_ABI exception_ptr() _NOEXCEPT : __ptr_() {} _LIBCPP_HIDE_FROM_ABI exception_ptr(nullptr_t) _NOEXCEPT : __ptr_() {} diff --git a/libcxx/include/__expected/expected.h b/libcxx/include/__expected/expected.h index 8b3eeebd38ae7..be37e8ab66ac4 100644 --- a/libcxx/include/__expected/expected.h +++ b/libcxx/include/__expected/expected.h @@ -30,7 +30,6 @@ #include <__type_traits/is_nothrow_assignable.h> #include <__type_traits/is_nothrow_constructible.h> #include <__type_traits/is_reference.h> -#include <__type_traits/is_replaceable.h> #include <__type_traits/is_same.h> #include <__type_traits/is_swappable.h> #include <__type_traits/is_trivially_constructible.h> @@ -472,8 +471,6 @@ class expected : private __expected_base<_Tp, _Err> { __conditional_t<__libcpp_is_trivially_relocatable<_Tp>::value && __libcpp_is_trivially_relocatable<_Err>::value, expected, void>; - using __replaceable _LIBCPP_NODEBUG = - __conditional_t<__is_replaceable_v<_Tp> && __is_replaceable_v<_Err>, expected, void>; template using rebind = expected<_Up, error_type>; diff --git a/libcxx/include/__locale b/libcxx/include/__locale index eb7b7786208e8..0948bd29b6f1b 100644 --- a/libcxx/include/__locale +++ b/libcxx/include/__locale @@ -57,9 +57,8 @@ _LIBCPP_HIDE_FROM_ABI const _Facet& use_facet(const locale&); class _LIBCPP_EXPORTED_FROM_ABI locale { public: // locale is essentially a shared_ptr that doesn't support weak_ptrs and never got a move constructor, - // so it is trivially relocatable. Like shared_ptr, it is also replaceable. + // so it is trivially relocatable. using __trivially_relocatable _LIBCPP_NODEBUG = locale; - using __replaceable _LIBCPP_NODEBUG = locale; // types: class _LIBCPP_EXPORTED_FROM_ABI facet; diff --git a/libcxx/include/__memory/shared_ptr.h b/libcxx/include/__memory/shared_ptr.h index e90db587d2836..67b94114988b5 100644 --- a/libcxx/include/__memory/shared_ptr.h +++ b/libcxx/include/__memory/shared_ptr.h @@ -317,10 +317,8 @@ class _LIBCPP_SHARED_PTR_TRIVIAL_ABI shared_ptr { #endif // A shared_ptr contains only two raw pointers which point to the heap and move constructing already doesn't require - // any bookkeeping, so it's always trivially relocatable. It is also replaceable because assignment just rebinds the - // shared_ptr to manage a different object. + // any bookkeeping, so it's always trivially relocatable. using __trivially_relocatable _LIBCPP_NODEBUG = shared_ptr; - using __replaceable _LIBCPP_NODEBUG = shared_ptr; private: element_type* __ptr_; @@ -1186,9 +1184,8 @@ class _LIBCPP_SHARED_PTR_TRIVIAL_ABI weak_ptr { #endif // A weak_ptr contains only two raw pointers which point to the heap and move constructing already doesn't require - // any bookkeeping, so it's always trivially relocatable. It's also replaceable for the same reason. + // any bookkeeping, so it's always trivially relocatable. using __trivially_relocatable _LIBCPP_NODEBUG = weak_ptr; - using __replaceable _LIBCPP_NODEBUG = weak_ptr; private: element_type* __ptr_; diff --git a/libcxx/include/__memory/unique_ptr.h b/libcxx/include/__memory/unique_ptr.h index eff24546cdc01..491d1c2e42417 100644 --- a/libcxx/include/__memory/unique_ptr.h +++ b/libcxx/include/__memory/unique_ptr.h @@ -39,7 +39,6 @@ #include <__type_traits/is_function.h> #include <__type_traits/is_pointer.h> #include <__type_traits/is_reference.h> -#include <__type_traits/is_replaceable.h> #include <__type_traits/is_same.h> #include <__type_traits/is_swappable.h> #include <__type_traits/is_trivially_relocatable.h> @@ -145,8 +144,6 @@ class _LIBCPP_UNIQUE_PTR_TRIVIAL_ABI unique_ptr { __libcpp_is_trivially_relocatable::value && __libcpp_is_trivially_relocatable::value, unique_ptr, void>; - using __replaceable _LIBCPP_NODEBUG = - __conditional_t<__is_replaceable_v && __is_replaceable_v, unique_ptr, void>; private: _LIBCPP_COMPRESSED_PAIR(pointer, __ptr_, deleter_type, __deleter_); @@ -413,8 +410,6 @@ class _LIBCPP_UNIQUE_PTR_TRIVIAL_ABI unique_ptr<_Tp[], _Dp> { __libcpp_is_trivially_relocatable::value && __libcpp_is_trivially_relocatable::value, unique_ptr, void>; - using __replaceable _LIBCPP_NODEBUG = - __conditional_t<__is_replaceable_v && __is_replaceable_v, unique_ptr, void>; private: template diff --git a/libcxx/include/__split_buffer b/libcxx/include/__split_buffer index 15368a3bc8955..1e05e4df8ba0f 100644 --- a/libcxx/include/__split_buffer +++ b/libcxx/include/__split_buffer @@ -30,7 +30,6 @@ #include <__type_traits/integral_constant.h> #include <__type_traits/is_nothrow_assignable.h> #include <__type_traits/is_nothrow_constructible.h> -#include <__type_traits/is_replaceable.h> #include <__type_traits/is_swappable.h> #include <__type_traits/is_trivially_destructible.h> #include <__type_traits/is_trivially_relocatable.h> @@ -484,10 +483,6 @@ public: __libcpp_is_trivially_relocatable::value && __libcpp_is_trivially_relocatable::value, __split_buffer, void>; - using __replaceable _LIBCPP_NODEBUG = - __conditional_t<__is_replaceable_v && __container_allocator_is_replaceable<__alloc_traits>::value, - __split_buffer, - void>; __split_buffer(const __split_buffer&) = delete; __split_buffer& operator=(const __split_buffer&) = delete; diff --git a/libcxx/include/__type_traits/is_replaceable.h b/libcxx/include/__type_traits/is_replaceable.h deleted file mode 100644 index e1d17c099cd3a..0000000000000 --- a/libcxx/include/__type_traits/is_replaceable.h +++ /dev/null @@ -1,61 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef _LIBCPP___TYPE_TRAITS_IS_REPLACEABLE_H -#define _LIBCPP___TYPE_TRAITS_IS_REPLACEABLE_H - -#include <__config> -#include <__type_traits/enable_if.h> -#include <__type_traits/integral_constant.h> -#include <__type_traits/is_same.h> -#include <__type_traits/is_trivially_copyable.h> - -#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) -# pragma GCC system_header -#endif - -_LIBCPP_BEGIN_NAMESPACE_STD - -// A type is replaceable if, with `x` and `y` being different objects, `x = std::move(y)` is equivalent to: -// -// std::destroy_at(&x) -// std::construct_at(&x, std::move(y)) -// -// This allows turning a move-assignment into a sequence of destroy + move-construct, which -// is often more efficient. This is especially relevant when the move-construct is in fact -// part of a trivial relocation from somewhere else, in which case there is a huge win. -// -// Note that this requires language support in order to be really effective, but we -// currently emulate the base template with something very conservative. -template -struct __is_replaceable : is_trivially_copyable<_Tp> {}; - -template -struct __is_replaceable<_Tp, __enable_if_t::value> > : true_type {}; - -template -inline const bool __is_replaceable_v = __is_replaceable<_Tp>::value; - -// Determines whether an allocator member of a container is replaceable. -// -// First, we require the allocator type to be considered replaceable. If not, then something fishy might be -// happening. Assuming the allocator type is replaceable, we conclude replaceability of the allocator as a -// member of the container if the allocator always compares equal (in which case propagation doesn't matter), -// or if the allocator always propagates on assignment, which is required in order for move construction and -// assignment to be equivalent. -template -struct __container_allocator_is_replaceable - : integral_constant && - (_AllocatorTraits::is_always_equal::value || - (_AllocatorTraits::propagate_on_container_move_assignment::value && - _AllocatorTraits::propagate_on_container_copy_assignment::value))> {}; - -_LIBCPP_END_NAMESPACE_STD - -#endif // _LIBCPP___TYPE_TRAITS_IS_REPLACEABLE_H diff --git a/libcxx/include/__utility/pair.h b/libcxx/include/__utility/pair.h index 33694c52430f1..61485123114ba 100644 --- a/libcxx/include/__utility/pair.h +++ b/libcxx/include/__utility/pair.h @@ -31,8 +31,6 @@ #include <__type_traits/is_implicitly_default_constructible.h> #include <__type_traits/is_nothrow_assignable.h> #include <__type_traits/is_nothrow_constructible.h> -#include <__type_traits/is_replaceable.h> -#include <__type_traits/is_same.h> #include <__type_traits/is_swappable.h> #include <__type_traits/is_trivially_relocatable.h> #include <__type_traits/nat.h> @@ -102,7 +100,6 @@ struct pair __conditional_t<__libcpp_is_trivially_relocatable<_T1>::value && __libcpp_is_trivially_relocatable<_T2>::value, pair, void>; - using __replaceable _LIBCPP_NODEBUG = __conditional_t<__is_replaceable_v<_T1> && __is_replaceable_v<_T2>, pair, void>; _LIBCPP_HIDE_FROM_ABI pair(pair const&) = default; _LIBCPP_HIDE_FROM_ABI pair(pair&&) = default; diff --git a/libcxx/include/__vector/vector.h b/libcxx/include/__vector/vector.h index a100b1675516e..7051e044314ea 100644 --- a/libcxx/include/__vector/vector.h +++ b/libcxx/include/__vector/vector.h @@ -54,7 +54,6 @@ #include <__type_traits/is_nothrow_assignable.h> #include <__type_traits/is_nothrow_constructible.h> #include <__type_traits/is_pointer.h> -#include <__type_traits/is_replaceable.h> #include <__type_traits/is_same.h> #include <__type_traits/is_trivially_relocatable.h> #include <__type_traits/type_identity.h> @@ -123,10 +122,6 @@ class vector { __libcpp_is_trivially_relocatable::value && __libcpp_is_trivially_relocatable::value, vector, void>; - using __replaceable _LIBCPP_NODEBUG = - __conditional_t<__is_replaceable_v && __container_allocator_is_replaceable<__alloc_traits>::value, - vector, - void>; static_assert(__check_valid_allocator::value, ""); static_assert(is_same::value, diff --git a/libcxx/include/array b/libcxx/include/array index 9643fc1dd9dca..ff46838e2e8e2 100644 --- a/libcxx/include/array +++ b/libcxx/include/array @@ -134,7 +134,6 @@ template const T&& get(const array&&) noexce # include <__type_traits/is_const.h> # include <__type_traits/is_constructible.h> # include <__type_traits/is_nothrow_constructible.h> -# include <__type_traits/is_replaceable.h> # include <__type_traits/is_same.h> # include <__type_traits/is_swappable.h> # include <__type_traits/is_trivially_relocatable.h> @@ -176,7 +175,6 @@ template struct array { using __trivially_relocatable _LIBCPP_NODEBUG = __conditional_t<__libcpp_is_trivially_relocatable<_Tp>::value, array, void>; - using __replaceable _LIBCPP_NODEBUG = __conditional_t<__is_replaceable_v<_Tp>, array, void>; // types: using __self _LIBCPP_NODEBUG = array; diff --git a/libcxx/include/deque b/libcxx/include/deque index cbf4b98e07a5b..08bf8141eb782 100644 --- a/libcxx/include/deque +++ b/libcxx/include/deque @@ -227,7 +227,6 @@ template # include <__type_traits/is_convertible.h> # include <__type_traits/is_nothrow_assignable.h> # include <__type_traits/is_nothrow_constructible.h> -# include <__type_traits/is_replaceable.h> # include <__type_traits/is_same.h> # include <__type_traits/is_swappable.h> # include <__type_traits/is_trivially_relocatable.h> @@ -531,10 +530,6 @@ public: __libcpp_is_trivially_relocatable<__map>::value && __libcpp_is_trivially_relocatable::value, deque, void>; - using __replaceable _LIBCPP_NODEBUG = - __conditional_t<__is_replaceable_v<__map> && __container_allocator_is_replaceable<__alloc_traits>::value, - deque, - void>; static_assert(is_nothrow_default_constructible::value == is_nothrow_default_constructible<__pointer_allocator>::value, diff --git a/libcxx/include/module.modulemap.in b/libcxx/include/module.modulemap.in index 85fe88afe248c..27f60e0c0a055 100644 --- a/libcxx/include/module.modulemap.in +++ b/libcxx/include/module.modulemap.in @@ -271,10 +271,6 @@ module std_core [system] { header "__type_traits/is_referenceable.h" export std_core.type_traits.integral_constant } - module is_replaceable { - header "__type_traits/is_replaceable.h" - export std_core.type_traits.integral_constant - } module is_same { header "__type_traits/is_same.h" export std_core.type_traits.integral_constant diff --git a/libcxx/include/optional b/libcxx/include/optional index ef1bfd3ec44c0..a3023622e2067 100644 --- a/libcxx/include/optional +++ b/libcxx/include/optional @@ -230,7 +230,6 @@ namespace std { # include <__type_traits/is_nothrow_constructible.h> # include <__type_traits/is_object.h> # include <__type_traits/is_reference.h> -# include <__type_traits/is_replaceable.h> # include <__type_traits/is_same.h> # include <__type_traits/is_scalar.h> # include <__type_traits/is_swappable.h> @@ -631,7 +630,6 @@ public: # endif using __trivially_relocatable _LIBCPP_NODEBUG = conditional_t<__libcpp_is_trivially_relocatable<_Tp>::value, optional, void>; - using __replaceable _LIBCPP_NODEBUG = conditional_t<__is_replaceable_v<_Tp>, optional, void>; private: // Disable the reference extension using this static assert. diff --git a/libcxx/include/string b/libcxx/include/string index ede42467b99fe..7a247f5459770 100644 --- a/libcxx/include/string +++ b/libcxx/include/string @@ -632,7 +632,6 @@ basic_string operator""s( const char32_t *str, size_t len ); # include <__type_traits/is_generic_transparent_comparator.h> # include <__type_traits/is_nothrow_assignable.h> # include <__type_traits/is_nothrow_constructible.h> -# include <__type_traits/is_replaceable.h> # include <__type_traits/is_same.h> # include <__type_traits/is_standard_layout.h> # include <__type_traits/is_trivially_constructible.h> @@ -757,9 +756,6 @@ public: // external memory. In such cases, the destructor is responsible for unpoisoning // the memory to avoid triggering false positives. // Therefore it's crucial to ensure the destructor is called. - // - // However, it is replaceable since implementing move-assignment as a destroy + move-construct - // will maintain the right ASAN state. using __trivially_relocatable = void; # else using __trivially_relocatable _LIBCPP_NODEBUG = __conditional_t< @@ -767,10 +763,6 @@ public: basic_string, void>; # endif - using __replaceable _LIBCPP_NODEBUG = - __conditional_t<__is_replaceable_v && __container_allocator_is_replaceable<__alloc_traits>::value, - basic_string, - void>; # if __has_feature(address_sanitizer) && _LIBCPP_INSTRUMENTED_WITH_ASAN _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pointer __asan_volatile_wrapper(pointer const& __ptr) const { diff --git a/libcxx/include/tuple b/libcxx/include/tuple index 0cfcd9a4fd9c5..caa473012a7c4 100644 --- a/libcxx/include/tuple +++ b/libcxx/include/tuple @@ -252,7 +252,6 @@ template # include <__type_traits/is_nothrow_assignable.h> # include <__type_traits/is_nothrow_constructible.h> # include <__type_traits/is_reference.h> -# include <__type_traits/is_replaceable.h> # include <__type_traits/is_same.h> # include <__type_traits/is_swappable.h> # include <__type_traits/is_trivially_relocatable.h> @@ -596,7 +595,6 @@ class _LIBCPP_NO_SPECIALIZATIONS tuple { public: using __trivially_relocatable _LIBCPP_NODEBUG = __conditional_t<_And<__libcpp_is_trivially_relocatable<_Tp>...>::value, tuple, void>; - using __replaceable _LIBCPP_NODEBUG = __conditional_t<_And<__is_replaceable<_Tp>...>::value, tuple, void>; // [tuple.cnstr] diff --git a/libcxx/include/variant b/libcxx/include/variant index 8e958581a6b07..df587ccf23843 100644 --- a/libcxx/include/variant +++ b/libcxx/include/variant @@ -247,7 +247,6 @@ namespace std { # include <__type_traits/is_nothrow_assignable.h> # include <__type_traits/is_nothrow_constructible.h> # include <__type_traits/is_reference.h> -# include <__type_traits/is_replaceable.h> # include <__type_traits/is_same.h> # include <__type_traits/is_swappable.h> # include <__type_traits/is_trivially_assignable.h> @@ -1172,7 +1171,6 @@ class _LIBCPP_DECLSPEC_EMPTY_BASES _LIBCPP_NO_SPECIALIZATIONS variant public: using __trivially_relocatable _LIBCPP_NODEBUG = conditional_t<_And<__libcpp_is_trivially_relocatable<_Types>...>::value, variant, void>; - using __replaceable _LIBCPP_NODEBUG = conditional_t<_And<__is_replaceable<_Types>...>::value, variant, void>; template , _Dummy>::value, int> = 0> diff --git a/libcxx/test/libcxx/type_traits/is_replaceable.compile.pass.cpp b/libcxx/test/libcxx/type_traits/is_replaceable.compile.pass.cpp deleted file mode 100644 index c04e9443c8e67..0000000000000 --- a/libcxx/test/libcxx/type_traits/is_replaceable.compile.pass.cpp +++ /dev/null @@ -1,353 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include <__type_traits/is_replaceable.h> -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "constexpr_char_traits.h" -#include "test_allocator.h" -#include "test_macros.h" - -#ifndef TEST_HAS_NO_LOCALIZATION -# include -#endif - -template -struct NonPropagatingStatefulMoveAssignAlloc : std::allocator { - using propagate_on_container_move_assignment = std::false_type; - using is_always_equal = std::false_type; - template - struct rebind { - using other = NonPropagatingStatefulMoveAssignAlloc; - }; -}; - -template -struct NonPropagatingStatefulCopyAssignAlloc : std::allocator { - using propagate_on_container_copy_assignment = std::false_type; - using is_always_equal = std::false_type; - template - struct rebind { - using other = NonPropagatingStatefulCopyAssignAlloc; - }; -}; - -template -struct NonPropagatingStatelessMoveAssignAlloc : std::allocator { - using propagate_on_container_move_assignment = std::false_type; - using is_always_equal = std::true_type; - template - struct rebind { - using other = NonPropagatingStatelessMoveAssignAlloc; - }; -}; - -template -struct NonPropagatingStatelessCopyAssignAlloc : std::allocator { - using propagate_on_container_copy_assignment = std::false_type; - using is_always_equal = std::true_type; - template - struct rebind { - using other = NonPropagatingStatelessCopyAssignAlloc; - }; -}; - -template -struct NonReplaceableStatelessAlloc : std::allocator { - // Ensure that we don't consider an allocator that is a member of a container to be - // replaceable if it's not replaceable, even if it always compares equal and always propagates. - using propagate_on_container_move_assignment = std::true_type; - using propagate_on_container_copy_assignment = std::true_type; - using is_always_equal = std::true_type; - NonReplaceableStatelessAlloc() = default; - NonReplaceableStatelessAlloc(NonReplaceableStatelessAlloc const&) {} - NonReplaceableStatelessAlloc(NonReplaceableStatelessAlloc&&) = default; - template - struct rebind { - using other = NonReplaceableStatelessAlloc; - }; -}; -static_assert(!std::__is_replaceable >::value, ""); - -static_assert(!std::__is_replaceable >::value, ""); // we use that property below - -struct Empty {}; -static_assert(std::__is_replaceable::value, ""); -static_assert(std::__is_replaceable::value, ""); -static_assert(std::__is_replaceable::value, ""); -static_assert(std::__is_replaceable::value, ""); - -struct TriviallyCopyable { - char c; - int i; - Empty s; -}; -static_assert(std::__is_replaceable::value, ""); - -struct NotTriviallyCopyable { - NotTriviallyCopyable(const NotTriviallyCopyable&); - ~NotTriviallyCopyable(); -}; -static_assert(!std::__is_replaceable::value, ""); - -struct MoveOnlyTriviallyCopyable { - MoveOnlyTriviallyCopyable(const MoveOnlyTriviallyCopyable&) = delete; - MoveOnlyTriviallyCopyable& operator=(const MoveOnlyTriviallyCopyable&) = delete; - MoveOnlyTriviallyCopyable(MoveOnlyTriviallyCopyable&&) = default; - MoveOnlyTriviallyCopyable& operator=(MoveOnlyTriviallyCopyable&&) = default; -}; -static_assert(std::__is_replaceable::value, ""); - -struct CustomCopyAssignment { - CustomCopyAssignment(const CustomCopyAssignment&) = default; - CustomCopyAssignment(CustomCopyAssignment&&) = default; - CustomCopyAssignment& operator=(const CustomCopyAssignment&); - CustomCopyAssignment& operator=(CustomCopyAssignment&&) = default; -}; -static_assert(!std::__is_replaceable::value, ""); - -struct CustomMoveAssignment { - CustomMoveAssignment(const CustomMoveAssignment&) = default; - CustomMoveAssignment(CustomMoveAssignment&&) = default; - CustomMoveAssignment& operator=(const CustomMoveAssignment&) = default; - CustomMoveAssignment& operator=(CustomMoveAssignment&&); -}; -static_assert(!std::__is_replaceable::value, ""); - -// library-internal types -// ---------------------- - -// __split_buffer -static_assert( - std::__is_replaceable, std::__split_buffer_pointer_layout> >::value, - ""); -static_assert(std::__is_replaceable, - std::__split_buffer_pointer_layout> >::value, - ""); -static_assert( - !std::__is_replaceable< - std::__split_buffer, std::__split_buffer_pointer_layout > >:: - value, - ""); -static_assert( - !std::__is_replaceable< - std::__split_buffer, std::__split_buffer_pointer_layout > >:: - value, - ""); -static_assert( - std::__is_replaceable< - std::__split_buffer, std::__split_buffer_pointer_layout > >:: - value, - ""); -static_assert( - std::__is_replaceable< - std::__split_buffer, std::__split_buffer_pointer_layout > >:: - value, - ""); - -static_assert( - std::__is_replaceable, std::__split_buffer_size_layout> >::value, ""); -static_assert(std::__is_replaceable, - std::__split_buffer_size_layout> >::value, - ""); -static_assert( - !std::__is_replaceable< - std::__split_buffer, std::__split_buffer_size_layout > >::value, - ""); -static_assert( - !std::__is_replaceable< - std::__split_buffer, std::__split_buffer_size_layout > >::value, - ""); -static_assert( - std::__is_replaceable< - std::__split_buffer, std::__split_buffer_size_layout > >:: - value, - ""); -static_assert( - std::__is_replaceable< - std::__split_buffer, std::__split_buffer_size_layout > >:: - value, - ""); - -// standard library types -// ---------------------- - -// array -static_assert(std::__is_replaceable >::value, ""); -static_assert(std::__is_replaceable >::value, ""); -static_assert(std::__is_replaceable, 0> >::value, ""); - -static_assert(std::__is_replaceable >::value, ""); -static_assert(!std::__is_replaceable >::value, ""); -static_assert(std::__is_replaceable, 1> >::value, ""); - -// basic_string -struct MyChar { - char c; -}; -template -struct NotReplaceableCharTraits : constexpr_char_traits { - NotReplaceableCharTraits(const NotReplaceableCharTraits&); - NotReplaceableCharTraits& operator=(const NotReplaceableCharTraits&); - ~NotReplaceableCharTraits(); -}; - -static_assert(std::__is_replaceable, std::allocator > >::value, - ""); -static_assert( - std::__is_replaceable, std::allocator > >::value, ""); -static_assert( - std::__is_replaceable, std::allocator > >::value, - ""); -static_assert(!std::__is_replaceable, test_allocator > >::value, - ""); -static_assert(!std::__is_replaceable< - std::basic_string, NonReplaceableStatelessAlloc > >::value, - ""); -static_assert(std::__is_replaceable< - std::basic_string, std::allocator > >::value, - ""); -static_assert( - !std::__is_replaceable< - std::basic_string, NonPropagatingStatefulCopyAssignAlloc > >::value, - ""); -static_assert( - !std::__is_replaceable< - std::basic_string, NonPropagatingStatefulMoveAssignAlloc > >::value, - ""); -static_assert( - std::__is_replaceable< - std::basic_string, NonPropagatingStatelessCopyAssignAlloc > >::value, - ""); -static_assert( - std::__is_replaceable< - std::basic_string, NonPropagatingStatelessMoveAssignAlloc > >::value, - ""); - -// deque -static_assert(std::__is_replaceable >::value, ""); -static_assert(std::__is_replaceable >::value, ""); -static_assert(!std::__is_replaceable > >::value, ""); -static_assert(!std::__is_replaceable > >::value, ""); -static_assert(!std::__is_replaceable > >::value, ""); -static_assert(!std::__is_replaceable > >::value, ""); -static_assert(std::__is_replaceable > >::value, ""); -static_assert(std::__is_replaceable > >::value, ""); - -// exception_ptr -#ifndef _LIBCPP_ABI_MICROSOFT -static_assert(std::__is_replaceable::value, ""); -#endif - -// expected -#if TEST_STD_VER >= 23 -static_assert(std::__is_replaceable >::value); -static_assert(!std::__is_replaceable>::value); -static_assert(!std::__is_replaceable>::value); -static_assert(!std::__is_replaceable>::value); -#endif - -// locale -#ifndef TEST_HAS_NO_LOCALIZATION -static_assert(std::__is_replaceable::value, ""); -#endif - -// optional -#if TEST_STD_VER >= 17 -static_assert(std::__is_replaceable>::value, ""); -static_assert(!std::__is_replaceable>::value, ""); -#endif - -// pair -static_assert(std::__is_replaceable >::value, ""); -static_assert(!std::__is_replaceable >::value, ""); -static_assert(!std::__is_replaceable >::value, ""); -static_assert(!std::__is_replaceable >::value, ""); - -// shared_ptr -static_assert(std::__is_replaceable >::value, ""); - -// tuple -#if TEST_STD_VER >= 11 -static_assert(std::__is_replaceable >::value, ""); - -static_assert(std::__is_replaceable >::value, ""); -static_assert(!std::__is_replaceable >::value, ""); - -static_assert(std::__is_replaceable >::value, ""); -static_assert(!std::__is_replaceable >::value, ""); -static_assert(!std::__is_replaceable >::value, ""); -static_assert(!std::__is_replaceable >::value, ""); -#endif // TEST_STD_VER >= 11 - -// unique_ptr -struct NonReplaceableDeleter { - NonReplaceableDeleter(const NonReplaceableDeleter&); - NonReplaceableDeleter& operator=(const NonReplaceableDeleter&); - ~NonReplaceableDeleter(); - - template - void operator()(T*); -}; - -struct NonReplaceablePointer { - struct pointer { - pointer(const pointer&); - pointer& operator=(const pointer&); - ~pointer(); - }; - - template - void operator()(T*); -}; - -static_assert(std::__is_replaceable >::value, ""); -static_assert(std::__is_replaceable >::value, ""); -static_assert(std::__is_replaceable >::value, ""); -static_assert(!std::__is_replaceable >::value, ""); -static_assert(!std::__is_replaceable >::value, ""); -static_assert(!std::__is_replaceable >::value, ""); -static_assert(!std::__is_replaceable >::value, ""); - -// variant -#if TEST_STD_VER >= 17 -static_assert(std::__is_replaceable >::value, ""); -static_assert(!std::__is_replaceable >::value, ""); - -static_assert(std::__is_replaceable >::value, ""); -static_assert(!std::__is_replaceable >::value, ""); -static_assert(!std::__is_replaceable >::value, ""); -static_assert(!std::__is_replaceable >::value, ""); -#endif // TEST_STD_VER >= 17 - -// vector -static_assert(std::__is_replaceable >::value, ""); -static_assert(std::__is_replaceable >::value, ""); -static_assert(!std::__is_replaceable > >::value, ""); -static_assert(!std::__is_replaceable > >::value, ""); -static_assert(!std::__is_replaceable > >::value, ""); -static_assert(!std::__is_replaceable > >::value, ""); -static_assert(std::__is_replaceable > >::value, ""); -static_assert(std::__is_replaceable > >::value, ""); - -// weak_ptr -static_assert(std::__is_replaceable >::value, ""); - -// TODO: Mark all the replaceable STL types as such diff --git a/libcxx/utils/libcxx/test/features.py b/libcxx/utils/libcxx/test/features.py deleted file mode 100644 index 5da1d9afee911..0000000000000 --- a/libcxx/utils/libcxx/test/features.py +++ /dev/null @@ -1,920 +0,0 @@ -# ===----------------------------------------------------------------------===## -# -# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# -# ===----------------------------------------------------------------------===## - -from libcxx.test.dsl import * -from lit.BooleanExpression import BooleanExpression -import re -import shutil -import subprocess -import sys - -_isAnyClang = lambda cfg: "__clang__" in compilerMacros(cfg) -_isAppleClang = lambda cfg: "__apple_build_version__" in compilerMacros(cfg) -_isAnyGCC = lambda cfg: "__GNUC__" in compilerMacros(cfg) -_isClang = lambda cfg: _isAnyClang(cfg) and not _isAppleClang(cfg) -_isGCC = lambda cfg: _isAnyGCC(cfg) and not _isAnyClang(cfg) -_isAnyClangOrGCC = lambda cfg: _isAnyClang(cfg) or _isAnyGCC(cfg) -_isClExe = lambda cfg: not _isAnyClangOrGCC(cfg) -_isMSVC = lambda cfg: "_MSC_VER" in compilerMacros(cfg) -_msvcVersion = lambda cfg: (int(compilerMacros(cfg)["_MSC_VER"]) // 100, int(compilerMacros(cfg)["_MSC_VER"]) % 100) - -def _getAndroidDeviceApi(cfg): - return int( - programOutput( - cfg, - r""" - #include - #include - int main(int, char**) { - printf("%d\n", android_get_device_api_level()); - return 0; - } - """, - ) - ) - - -def _mingwSupportsModules(cfg): - # Only mingw headers are known to work with libc++ built as a module, - # at the moment. - if not "__MINGW32__" in compilerMacros(cfg): - return False - # For mingw headers, check for a version known to support being built - # as a module. - return sourceBuilds( - cfg, - """ - #include <_mingw_mac.h> - #if __MINGW64_VERSION_MAJOR < 12 - #error Headers known to be incompatible - #elif __MINGW64_VERSION_MAJOR == 12 - // The headers were fixed to work with libc++ modules during - // __MINGW64_VERSION_MAJOR == 12. The headers became compatible - // with libc++ built as a module in - // 1652e9241b5d8a5a779c6582b1c3c4f4a7cc66e5 (Apr 2024), but the - // following commit 8c13b28ace68f2c0094d45121d59a4b951b533ed - // removed the now unused __mingw_static_ovr define. Use this - // as indicator for whether we've got new enough headers. - #ifdef __mingw_static_ovr - #error Headers too old - #endif - #else - // __MINGW64_VERSION_MAJOR > 12 should be ok. - #endif - int main(int, char**) { return 0; } - """, - ) - - -# Lit features are evaluated in order. Some checks may require the compiler detection to have -# run first in order to work properly. -DEFAULT_FEATURES = [ - # gcc-style-warnings detects compilers that understand -Wno-meow flags, unlike MSVC's compiler driver cl.exe. - Feature(name="gcc-style-warnings", when=_isAnyClangOrGCC), - Feature(name="cl-style-warnings", when=_isClExe), - Feature(name="apple-clang", when=_isAppleClang), - Feature( - name=lambda cfg: "apple-clang-{__clang_major__}".format(**compilerMacros(cfg)), - when=_isAppleClang, - ), - Feature( - name=lambda cfg: "apple-clang-{__clang_major__}.{__clang_minor__}".format(**compilerMacros(cfg)), - when=_isAppleClang, - ), - Feature( - name=lambda cfg: "apple-clang-{__clang_major__}.{__clang_minor__}.{__clang_patchlevel__}".format(**compilerMacros(cfg)), - when=_isAppleClang, - ), - Feature(name="clang", when=_isClang), - Feature( - name=lambda cfg: "clang-{__clang_major__}".format(**compilerMacros(cfg)), - when=_isClang, - ), - Feature( - name=lambda cfg: "clang-{__clang_major__}.{__clang_minor__}".format(**compilerMacros(cfg)), - when=_isClang, - ), - Feature( - name=lambda cfg: "clang-{__clang_major__}.{__clang_minor__}.{__clang_patchlevel__}".format(**compilerMacros(cfg)), - when=_isClang, - ), - # Note: Due to a GCC bug (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104760), we must disable deprecation warnings - # on GCC or spurious diagnostics are issued. - # - # TODO: - # - Enable -Wplacement-new with GCC. - # - Enable -Wclass-memaccess with GCC. - Feature( - name="gcc", - when=_isGCC, - actions=[ - AddCompileFlag("-D_LIBCPP_DISABLE_DEPRECATION_WARNINGS"), - AddCompileFlag("-Wno-placement-new"), - AddCompileFlag("-Wno-class-memaccess"), - AddFeature("GCC-ALWAYS_INLINE-FIXME"), - ], - ), - Feature( - name=lambda cfg: "gcc-{__GNUC__}".format(**compilerMacros(cfg)), when=_isGCC - ), - Feature( - name=lambda cfg: "gcc-{__GNUC__}.{__GNUC_MINOR__}".format(**compilerMacros(cfg)), - when=_isGCC, - ), - Feature( - name=lambda cfg: "gcc-{__GNUC__}.{__GNUC_MINOR__}.{__GNUC_PATCHLEVEL__}".format(**compilerMacros(cfg)), - when=_isGCC, - ), - Feature(name="msvc", when=_isMSVC), - Feature(name=lambda cfg: "msvc-{}".format(*_msvcVersion(cfg)), when=_isMSVC), - Feature(name=lambda cfg: "msvc-{}.{}".format(*_msvcVersion(cfg)), when=_isMSVC), - - Feature( - name="diagnose-if-support", - when=lambda cfg: hasCompileFlag(cfg, "-Wuser-defined-warnings"), - actions=[AddCompileFlag("-Wuser-defined-warnings")], - ), - Feature( - name="character-conversion-warnings", - when=lambda cfg: hasCompileFlag(cfg, "-Wcharacter-conversion"), - ), - # Tests to validate whether the compiler has a way to set the maximum number - # of steps during constant evaluation. Since the flag differs per compiler - # store the "valid" flag as a feature. This allows passing the proper compile - # flag to the compiler: - # // ADDITIONAL_COMPILE_FLAGS(has-fconstexpr-steps): -fconstexpr-steps=12345678 - # // ADDITIONAL_COMPILE_FLAGS(has-fconstexpr-ops-limit): -fconstexpr-ops-limit=12345678 - Feature( - name="has-fconstexpr-steps", - when=lambda cfg: hasCompileFlag(cfg, "-fconstexpr-steps=1"), - ), - Feature( - name="has-fconstexpr-ops-limit", - when=lambda cfg: hasCompileFlag(cfg, "-fconstexpr-ops-limit=1"), - ), - Feature(name="has-fblocks", when=lambda cfg: hasCompileFlag(cfg, "-fblocks")), - Feature( - name="fdelayed-template-parsing", - when=lambda cfg: hasCompileFlag(cfg, "-fdelayed-template-parsing"), - ), - Feature( - name="has-fobjc-arc", - when=lambda cfg: hasCompileFlag(cfg, "-xobjective-c++ -fobjc-arc") - and sys.platform.lower().strip() == "darwin", - ), # TODO: this doesn't handle cross-compiling to Apple platforms. - Feature( - name="objective-c++", - when=lambda cfg: hasCompileFlag(cfg, "-xobjective-c++ -fobjc-arc"), - ), - Feature( - name="verify-support", - when=lambda cfg: hasCompileFlag(cfg, "-Xclang -verify-ignore-unexpected"), - ), - Feature( - name="add-latomic-workaround", # https://llvm.org/PR73361 - when=lambda cfg: sourceBuilds( - cfg, "int main(int, char**) { return 0; }", ["-latomic"] - ), - actions=[AddLinkFlag("-latomic")], - ), - Feature( - name="has-64-bit-atomics", - when=lambda cfg: sourceBuilds( - cfg, - """ - #include - struct Large { char storage[64/8]; }; - std::atomic x; - int main(int, char**) { (void)x.load(); (void)x.is_lock_free(); return 0; } - """, - ), - ), - Feature( - name="has-1024-bit-atomics", - when=lambda cfg: sourceBuilds( - cfg, - """ - #include - struct Large { char storage[1024/8]; }; - std::atomic x; - int main(int, char**) { (void)x.load(); (void)x.is_lock_free(); return 0; } - """, - ), - ), - # Tests that require 64-bit architecture - Feature( - name="32-bit-pointer", - when=lambda cfg: sourceBuilds( - cfg, - """ - int main(int, char**) { - static_assert(sizeof(void *) == 4); - } - """, - ), - ), - # Check for a Windows UCRT bug (fixed in UCRT/Windows 10.0.20348.0): - # https://developercommunity.visualstudio.com/t/utf-8-locales-break-ctype-functions-for-wchar-type/1653678 - Feature( - name="win32-broken-utf8-wchar-ctype", - when=lambda cfg: not "_LIBCPP_HAS_LOCALIZATION" in compilerMacros(cfg) - or compilerMacros(cfg)["_LIBCPP_HAS_LOCALIZATION"] == "1" - and "_WIN32" in compilerMacros(cfg) - and not programSucceeds( - cfg, - """ - #include - #include - int main(int, char**) { - setlocale(LC_ALL, "en_US.UTF-8"); - return towlower(L'\\xDA') != L'\\xFA'; - } - """, - ), - ), - # Check for a Windows UCRT bug (fixed in UCRT/Windows 10.0.19041.0). - # https://developercommunity.visualstudio.com/t/printf-formatting-with-g-outputs-too/1660837 - Feature( - name="win32-broken-printf-g-precision", - when=lambda cfg: "_WIN32" in compilerMacros(cfg) - and not programSucceeds( - cfg, - """ - #include - #include - int main(int, char**) { - char buf[100]; - snprintf(buf, sizeof(buf), "%#.*g", 0, 0.0); - return strcmp(buf, "0."); - } - """, - ), - ), - # Check for a Windows UCRT bug (not fixed upstream yet). - # With UCRT, printf("%a", 0.0) produces "0x0.0000000000000p+0", - # while other C runtimes produce just "0x0p+0". - # https://developercommunity.visualstudio.com/t/Printf-formatting-of-float-as-hex-prints/1660844 - Feature( - name="win32-broken-printf-a-precision", - when=lambda cfg: "_WIN32" in compilerMacros(cfg) - and not programSucceeds( - cfg, - """ - #include - #include - int main(int, char**) { - char buf[100]; - snprintf(buf, sizeof(buf), "%a", 0.0); - return strcmp(buf, "0x0p+0"); - } - """, - ), - ), - # Check for Glibc < 2.27, where the ru_RU.UTF-8 locale had - # mon_decimal_point == ".", which our tests don't handle. - Feature( - name="glibc-old-ru_RU-decimal-point", - when=lambda cfg: not "_LIBCPP_HAS_LOCALIZATION" in compilerMacros(cfg) - or compilerMacros(cfg)["_LIBCPP_HAS_LOCALIZATION"] == "1" - and not programSucceeds( - cfg, - """ - #include - #include - int main(int, char**) { - setlocale(LC_ALL, "ru_RU.UTF-8"); - return strcmp(localeconv()->mon_decimal_point, ","); - } - """, - ), - ), - Feature( - name="has-unix-headers", - when=lambda cfg: sourceBuilds( - cfg, - """ - #include - #include - int main(int, char**) { - int fd[2]; - return pipe(fd); - } - """, - ), - ), - # Whether Bash can run on the executor. - # This is not always the case, for example when running on embedded systems. - # - # For the corner case of bash existing, but it being missing in the path - # set in %{exec} as "--env PATH=one-single-dir", the executor does find - # and executes bash, but bash then can't find any other common shell - # utilities. Test executing "bash -c 'bash --version'" to see if bash - # manages to find binaries to execute. - Feature( - name="executor-has-no-bash", - when=lambda cfg: runScriptExitCode(cfg, ["%{exec} bash -c 'bash --version'"]) != 0, - ), - # Whether module support for the platform is available. - Feature( - name="has-no-cxx-module-support", - # The libc of these platforms have functions with internal linkage. - # This is not allowed per C11 7.1.2 Standard headers/6 - # Any declaration of a library function shall have external linkage. - when=lambda cfg: "__ANDROID__" in compilerMacros(cfg) - or "__FreeBSD__" in compilerMacros(cfg) - or ("_WIN32" in compilerMacros(cfg) and not _mingwSupportsModules(cfg)) - or platform.system().lower().startswith("aix") - # Avoid building on platforms that don't support modules properly. - or not hasCompileFlag(cfg, "-Wno-reserved-module-identifier") - # older versions don't support extern "C++", newer versions don't support main in named module. - or not ( - sourceBuilds( - cfg, - """ - export module test; - extern "C++" int main(int, char**) { return 0; } - """, - ) - or sourceBuilds( - cfg, - """ - export module test; - int main(int, char**) { return 0; } - """, - ) - ), - ), - # The time zone validation tests compare the output of zdump against the - # output generated by 's time zone support. - Feature( - name="has-no-zdump", - when=lambda cfg: runScriptExitCode(cfg, ["zdump --version"]) != 0, - ), -] - -# Deduce and add the test features that that are implied by the #defines in -# the <__config> header. -# -# For each macro of the form `_LIBCPP_XXX_YYY_ZZZ` defined below that -# is defined after including <__config>, add a Lit feature called -# `libcpp-xxx-yyy-zzz`. When a macro is defined to a specific value -# (e.g. `_LIBCPP_ABI_VERSION=2`), the feature is `libcpp-xxx-yyy-zzz=`. -# -# Note that features that are more strongly tied to libc++ are named libcpp-foo, -# while features that are more general in nature are not prefixed with 'libcpp-'. -macros = { - "_LIBCPP_NO_VCRUNTIME": "libcpp-no-vcruntime", - "_LIBCPP_ABI_VERSION": "libcpp-abi-version", - "_LIBCPP_ABI_BOUNDED_ITERATORS": "libcpp-has-abi-bounded-iterators", - "_LIBCPP_ABI_BOUNDED_ITERATORS_IN_STRING": "libcpp-has-abi-bounded-iterators-in-string", - "_LIBCPP_ABI_BOUNDED_ITERATORS_IN_VECTOR": "libcpp-has-abi-bounded-iterators-in-vector", - "_LIBCPP_ABI_BOUNDED_ITERATORS_IN_STD_ARRAY": "libcpp-has-abi-bounded-iterators-in-std-array", - "_LIBCPP_ABI_BOUNDED_UNIQUE_PTR": "libcpp-has-abi-bounded-unique_ptr", - "_LIBCPP_ABI_FIX_UNORDERED_CONTAINER_SIZE_TYPE": "libcpp-has-abi-fix-unordered-container-size-type", - "_LIBCPP_DEPRECATED_ABI_DISABLE_PAIR_TRIVIAL_COPY_CTOR": "libcpp-deprecated-abi-disable-pair-trivial-copy-ctor", - "_LIBCPP_ABI_NO_COMPRESSED_PAIR_PADDING": "libcpp-abi-no-compressed-pair-padding", - "_LIBCPP_PSTL_BACKEND_LIBDISPATCH": "libcpp-pstl-backend-libdispatch", -} -for macro, feature in macros.items(): - DEFAULT_FEATURES.append( - Feature( - name=lambda cfg, m=macro, f=feature: f + ("={}".format(compilerMacros(cfg)[m]) if compilerMacros(cfg)[m] else ""), - when=lambda cfg, m=macro: m in compilerMacros(cfg), - ) - ) - -true_false_macros = { - "_LIBCPP_HAS_THREAD_API_EXTERNAL": "libcpp-has-thread-api-external", - "_LIBCPP_HAS_THREAD_API_PTHREAD": "libcpp-has-thread-api-pthread", -} -for macro, feature in true_false_macros.items(): - DEFAULT_FEATURES.append( - Feature( - name=feature, - when=lambda cfg, m=macro: m in compilerMacros(cfg) - and compilerMacros(cfg)[m] == "1", - ) - ) - -inverted_macros = { - "_LIBCPP_HAS_TIME_ZONE_DATABASE": "no-tzdb", - "_LIBCPP_HAS_FILESYSTEM": "no-filesystem", - "_LIBCPP_HAS_LOCALIZATION": "no-localization", - "_LIBCPP_HAS_THREADS": "no-threads", - "_LIBCPP_HAS_MONOTONIC_CLOCK": "no-monotonic-clock", - "_LIBCPP_HAS_WIDE_CHARACTERS": "no-wide-characters", - "_LIBCPP_HAS_VENDOR_AVAILABILITY_ANNOTATIONS": "libcpp-has-no-availability-markup", - "_LIBCPP_HAS_RANDOM_DEVICE": "no-random-device", - "_LIBCPP_HAS_UNICODE": "libcpp-has-no-unicode", - "_LIBCPP_HAS_TERMINAL": "no-terminal", -} -for macro, feature in inverted_macros.items(): - DEFAULT_FEATURES.append( - Feature( - name=feature, - when=lambda cfg, m=macro: m in compilerMacros(cfg) - and compilerMacros(cfg)[m] == "0", - ) - ) - -# Mapping from canonical locale names (used in the tests) to possible locale -# names on various systems. Each locale is considered supported if any of the -# alternative names is supported. -locales = { - "en_US.UTF-8": ["en_US.UTF-8", "en_US.utf8", "English_United States.1252"], - "fr_FR.UTF-8": ["fr_FR.UTF-8", "fr_FR.utf8", "French_France.1252"], - "ja_JP.UTF-8": ["ja_JP.UTF-8", "ja_JP.utf8", "Japanese_Japan.923"], - "ru_RU.UTF-8": ["ru_RU.UTF-8", "ru_RU.utf8", "Russian_Russia.1251"], - "zh_CN.UTF-8": ["zh_CN.UTF-8", "zh_CN.utf8", "Chinese_China.936"], - "fr_CA.ISO8859-1": ["fr_CA.ISO8859-1", "French_Canada.1252"], - "cs_CZ.ISO8859-2": ["cs_CZ.ISO8859-2", "Czech_Czech Republic.1250"], -} -provide_locale_conversions = { - "fr_FR.UTF-8": ["decimal_point", "mon_thousands_sep", "thousands_sep"], - "ru_RU.UTF-8": ["mon_thousands_sep"], -} -for locale, alts in locales.items(): - # Note: Using alts directly in the lambda body here will bind it to the value at the - # end of the loop. Assigning it to a default argument works around this issue. - DEFAULT_FEATURES.append( - Feature( - name="locale.{}".format(locale), - when=lambda cfg, alts=alts: hasAnyLocale(cfg, alts), - actions=lambda cfg, locale=locale, alts=alts: _getLocaleFlagsAction( - cfg, locale, alts, provide_locale_conversions[locale] - ) - if locale in provide_locale_conversions - and ("_LIBCPP_HAS_WIDE_CHARACTERS" not in compilerMacros(cfg) or - compilerMacros(cfg)["_LIBCPP_HAS_WIDE_CHARACTERS"] == "1") - else [], - ), - ) - - -# Provide environment locale conversions through substitutions to avoid platform specific -# maintenance. -def _getLocaleFlagsAction(cfg, locale, alts, members): - alts_list = ",".join([f'"{l}"' for l in alts]) - get_member_list = ",".join([f"lc->{m}" for m in members]) - - localeconv_info = programOutput( - cfg, - r""" - #if defined(_WIN32) && !defined(_CRT_SECURE_NO_WARNINGS) - #define _CRT_SECURE_NO_WARNINGS - #endif - #include - #include - #include - #include - - // Print each requested locale conversion member on separate lines. - int main(int, char**) { - const char* locales[] = { %s }; - for (int loc_i = 0; loc_i < %d; ++loc_i) { - if (!setlocale(LC_ALL, locales[loc_i])) { - continue; // Choose first locale name that is recognized. - } - - lconv* lc = localeconv(); - const char* members[] = { %s }; - for (size_t m_i = 0; m_i < %d; ++m_i) { - if (!members[m_i]) { - printf("\n"); // member value is an empty string - continue; - } - - size_t len = mbstowcs(nullptr, members[m_i], 0); - if (len == static_cast(-1)) { - fprintf(stderr, "mbstowcs failed unexpectedly\n"); - return 1; - } - // Include room for null terminator. Use malloc as these features - // are also used by lit configs that don't use -lc++ (libunwind tests). - wchar_t* dst = (wchar_t*)malloc((len + 1) * sizeof(wchar_t)); - size_t ret = mbstowcs(dst, members[m_i], len + 1); - if (ret == static_cast(-1)) { - fprintf(stderr, "mbstowcs failed unexpectedly\n"); - free(dst); - return 1; - } - - for (size_t i = 0; i < len; ++i) { - if (dst[i] > 0x7F) { - printf("\\u%%04x", dst[i]); - } else { - // c++03 does not allow basic ascii-range characters in UCNs - printf("%%c", (char)dst[i]); - } - } - printf("\n"); - free(dst); - } - return 0; - } - - return 1; - } - """ - % (alts_list, len(alts), get_member_list, len(members)), - ) - valid_define_name = re.sub(r"[.-]", "_", locale).upper() - return [ - # Provide locale conversion through a substitution. - # Example: %{LOCALE_CONV_FR_FR_UTF_8_THOUSANDS_SEP} = L"\u202f" - AddSubstitution( - f"%{{LOCALE_CONV_{valid_define_name}_{member.upper()}}}", - lambda cfg, value=value: f"'L\"{value}\"'", - ) - for member, value in zip(members, localeconv_info.split("\n")) - ] - - -# Add features representing the target platform name: darwin, linux, windows, etc... -DEFAULT_FEATURES += [ - Feature(name="darwin", when=lambda cfg: "__APPLE__" in compilerMacros(cfg)), - Feature(name="windows", when=lambda cfg: "_WIN32" in compilerMacros(cfg)), - Feature( - name="windows-dll", - when=lambda cfg: "_WIN32" in compilerMacros(cfg) - and sourceBuilds( - cfg, - """ - #include - int main(int, char**) { return 0; } - """, - ) - and programSucceeds( - cfg, - """ - #include - #include - #include - int main(int, char**) { - // Get a pointer to a data member that gets linked from the C++ - // library. This must be a data member (functions can get - // thunk inside the calling executable), and must not be - // something that is defined inline in headers. - void *ptr = &std::cout; - // Get a handle to the current main executable. - void *exe = GetModuleHandle(NULL); - // The handle points at the PE image header. Navigate through - // the header structure to find the size of the PE image (the - // executable). - PIMAGE_DOS_HEADER dosheader = (PIMAGE_DOS_HEADER)exe; - PIMAGE_NT_HEADERS ntheader = (PIMAGE_NT_HEADERS)((BYTE *)dosheader + dosheader->e_lfanew); - PIMAGE_OPTIONAL_HEADER peheader = &ntheader->OptionalHeader; - void *exeend = (BYTE*)exe + peheader->SizeOfImage; - // Check if the tested pointer - the data symbol from the - // C++ library - is located within the exe. - if (ptr >= exe && ptr <= exeend) - return 1; - // Return success if it was outside of the executable, i.e. - // loaded from a DLL. - return 0; - } - """, - ), - actions=[AddCompileFlag("-DTEST_WINDOWS_DLL")], - ), - Feature(name="linux", when=lambda cfg: "__linux__" in compilerMacros(cfg)), - Feature(name="android", when=lambda cfg: "__ANDROID__" in compilerMacros(cfg)), - Feature( - name=lambda cfg: "android-device-api={}".format(_getAndroidDeviceApi(cfg)), - when=lambda cfg: "__ANDROID__" in compilerMacros(cfg), - ), - Feature( - name="LIBCXX-ANDROID-FIXME", - when=lambda cfg: "__ANDROID__" in compilerMacros(cfg), - ), - Feature(name="netbsd", when=lambda cfg: "__NetBSD__" in compilerMacros(cfg)), - Feature(name="freebsd", when=lambda cfg: "__FreeBSD__" in compilerMacros(cfg)), - Feature( - name="LIBCXX-FREEBSD-FIXME", - when=lambda cfg: "__FreeBSD__" in compilerMacros(cfg), - ), - Feature( - name="LIBCXX-PICOLIBC-FIXME", - when=lambda cfg: sourceBuilds( - cfg, - """ - #include - #ifndef __PICOLIBC__ - #error not picolibc - #endif - int main(int, char**) { return 0; } - """, - ), - ), - Feature( - name="LIBCXX-AMDGPU-FIXME", - when=lambda cfg: "__AMDGPU__" in compilerMacros(cfg), - ), - Feature( - name="LIBCXX-NVPTX-FIXME", - when=lambda cfg: "__NVPTX__" in compilerMacros(cfg), - ), - Feature( - name="can-create-symlinks", - when=lambda cfg: "_WIN32" not in compilerMacros(cfg) - or programSucceeds( - cfg, - # Creation of symlinks require elevated privileges on Windows unless - # Windows developer mode is enabled. - """ - #include - #include - int main(int, char**) { - CHAR tempDirPath[MAX_PATH]; - DWORD tempPathRet = GetTempPathA(MAX_PATH, tempDirPath); - if (tempPathRet == 0 || tempPathRet > MAX_PATH) { - return 1; - } - - CHAR tempFilePath[MAX_PATH]; - UINT uRetVal = GetTempFileNameA( - tempDirPath, - "cxx", // Prefix - 0, // Unique=0 also implies file creation. - tempFilePath); - if (uRetVal == 0) { - return 1; - } - - CHAR symlinkFilePath[MAX_PATH]; - int ret = sprintf_s(symlinkFilePath, MAX_PATH, "%s_symlink", tempFilePath); - if (ret == -1) { - DeleteFileA(tempFilePath); - return 1; - } - - // Requires either administrator, or developer mode enabled. - BOOL bCreatedSymlink = CreateSymbolicLinkA(symlinkFilePath, - tempFilePath, - SYMBOLIC_LINK_FLAG_ALLOW_UNPRIVILEGED_CREATE); - if (!bCreatedSymlink) { - DeleteFileA(tempFilePath); - return 1; - } - - DeleteFileA(tempFilePath); - DeleteFileA(symlinkFilePath); - return 0; - } - """, - ), - ), -] - -# Add features representing the build host platform name. -# The build host could differ from the target platform for cross-compilation. -DEFAULT_FEATURES += [ - Feature(name="buildhost={}".format(sys.platform.lower().strip())), - # sys.platform can often be represented by a "sub-system", such as 'win32', 'cygwin', 'mingw', freebsd13 & etc. - # We define a consolidated feature on a few platforms. - Feature( - name="buildhost=windows", - when=lambda cfg: platform.system().lower().startswith("windows"), - ), - Feature( - name="buildhost=freebsd", - when=lambda cfg: platform.system().lower().startswith("freebsd"), - ), - Feature( - name="buildhost=aix", - when=lambda cfg: platform.system().lower().startswith("aix"), - ), -] - -# Detect whether GDB is on the system, has Python scripting and supports -# adding breakpoint commands. If so add a substitution to access it. -def check_gdb(cfg): - gdb_path = shutil.which("gdb") - if gdb_path is None: - return False - - # Check that we can set breakpoint commands, which was added in 8.3. - # Using the quit command here means that gdb itself exits, not just - # the "python <...>" command. - test_src = """\ -try: - gdb.Breakpoint(\"main\").commands=\"foo\" -except AttributeError: - gdb.execute(\"quit 1\") -gdb.execute(\"quit\")""" - - try: - stdout = subprocess.check_output( - [gdb_path, "-ex", "python " + test_src, "--batch"], - stderr=subprocess.DEVNULL, - universal_newlines=True, - ) - except subprocess.CalledProcessError: - # We can't set breakpoint commands - return False - - # Check we actually ran the Python - return not "Python scripting is not supported" in stdout - - -DEFAULT_FEATURES += [ - Feature( - name="host-has-gdb-with-python", - when=check_gdb, - actions=[AddSubstitution("%{gdb}", lambda cfg: shutil.which("gdb"))], - ) -] - -# Helpers to define correspondances between LLVM versions and vendor system versions. -# Those are used for backdeployment features below, do not use directly in tests. -DEFAULT_FEATURES += [ - Feature( - name="_target-has-llvm-22", - when=lambda cfg: BooleanExpression.evaluate( - "TBD", - cfg.available_features, - ), - ), - Feature( - name="_target-has-llvm-21", - when=lambda cfg: BooleanExpression.evaluate( - "TBD", - cfg.available_features, - ), - ), - Feature( - name="_target-has-llvm-20", - when=lambda cfg: BooleanExpression.evaluate( - "_target-has-llvm-21 || target={{.+}}-apple-macosx{{26.[0-9](.\d+)?}}", - cfg.available_features, - ), - ), - Feature( - name="_target-has-llvm-19", - when=lambda cfg: BooleanExpression.evaluate( - "_target-has-llvm-20 || target={{.+}}-apple-macosx{{15.[4-9](.\d+)?}}", - cfg.available_features, - ), - ), - Feature( - name="_target-has-llvm-18", - when=lambda cfg: BooleanExpression.evaluate( - "_target-has-llvm-19 || target={{.+}}-apple-macosx{{15.[0-3](.\d+)?}}", - cfg.available_features, - ), - ), - Feature( - name="_target-has-llvm-17", - when=lambda cfg: BooleanExpression.evaluate( - "_target-has-llvm-18 || target={{.+}}-apple-macosx{{14.[4-9](.\d+)?}}", - cfg.available_features, - ), - ), - Feature( - name="_target-has-llvm-16", - when=lambda cfg: BooleanExpression.evaluate( - "_target-has-llvm-17 || target={{.+}}-apple-macosx{{14.[0-3](.[0-9]+)?}}", - cfg.available_features, - ), - ), - Feature( - name="_target-has-llvm-15", - when=lambda cfg: BooleanExpression.evaluate( - "_target-has-llvm-16 || target={{.+}}-apple-macosx{{13.[4-9](.[0-9]+)?}}", - cfg.available_features, - ), - ), - Feature( - name="_target-has-llvm-14", - when=lambda cfg: BooleanExpression.evaluate( - "_target-has-llvm-15", - cfg.available_features, - ), - ), - Feature( - name="_target-has-llvm-13", - when=lambda cfg: BooleanExpression.evaluate( - "_target-has-llvm-14 || target={{.+}}-apple-macosx{{13.[0-3](.[0-9]+)?}}", - cfg.available_features, - ), - ), - Feature( - name="_target-has-llvm-12", - when=lambda cfg: BooleanExpression.evaluate( - "_target-has-llvm-13 || target={{.+}}-apple-macosx{{12.[3-9](.[0-9]+)?}}", - cfg.available_features, - ), - ), -] - -# Define features for back-deployment testing. -# -# These features can be used to XFAIL tests that fail when deployed on (or compiled -# for) an older system. For example, if a test exhibits a bug in the libc++ on a -# particular system version, or if it uses a symbol that is not available on an -# older version of the dylib, it can be marked as XFAIL with these features. -# -# We have two families of Lit features: -# -# The first one is `using-built-library-before-llvm-XYZ`. These features encode the -# fact that the test suite is being *run* against a version of the shared/static library -# that predates LLVM version XYZ. This is useful to represent the use case of compiling -# a program against the latest libc++ but then deploying it and running it on an older -# system with an older version of the (usually shared) library. -# -# This feature is built up using the target triple passed to the compiler and the -# `stdlib=system` Lit feature, which encodes that we're running against the same library -# as described by the target triple. -# -# The second set of features is `availability--missing`. This family of Lit -# features encodes the presence of availability markup in the libc++ headers. This is -# useful to check that a test fails specifically when compiled for a given deployment -# target, such as when testing availability markup where we want to make sure that -# using the annotated facility on a deployment target that doesn't support it will fail -# at compile time. This can be achieved by creating a `.verify.cpp` test that checks for -# the right errors and marking the test as `REQUIRES: availability--missing`. -# -# This feature is built up using the presence of availability markup detected inside -# __config, the flavor of the library being tested and the target triple passed to the -# compiler. -# -# Note that both families of Lit features are similar but different in important ways. -# For example, tests for availability markup should be expected to produce diagnostics -# regardless of whether we're running against a system library, as long as we're using -# a libc++ flavor that enables availability markup. Similarly, a test could fail when -# run against the system library of an older version of FreeBSD, even though FreeBSD -# doesn't provide availability markup at the time of writing this. -for version in ("12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22"): - DEFAULT_FEATURES.append( - Feature( - name="using-built-library-before-llvm-{}".format(version), - when=lambda cfg, v=version: BooleanExpression.evaluate( - "stdlib=system && !_target-has-llvm-{}".format(v), - cfg.available_features, - ), - ) - ) - -DEFAULT_FEATURES += [ - # Tests that require https://wg21.link/P0482 support in the built library - Feature( - name="availability-char8_t_support-missing", - when=lambda cfg: BooleanExpression.evaluate( - "!libcpp-has-no-availability-markup && (stdlib=apple-libc++ && !_target-has-llvm-12)", - cfg.available_features, - ), - ), - # Tests that require std::to_chars(floating-point) in the built library - Feature( - name="availability-fp_to_chars-missing", - when=lambda cfg: BooleanExpression.evaluate( - "!libcpp-has-no-availability-markup && (stdlib=apple-libc++ && !_target-has-llvm-14)", - cfg.available_features, - ), - ), - # Tests that require __libcpp_verbose_abort support in the built library - Feature( - name="availability-verbose_abort-missing", - when=lambda cfg: BooleanExpression.evaluate( - "!libcpp-has-no-availability-markup && (stdlib=apple-libc++ && !_target-has-llvm-15)", - cfg.available_features, - ), - ), - # Tests that require std::pmr support in the built library - Feature( - name="availability-pmr-missing", - when=lambda cfg: BooleanExpression.evaluate( - "!libcpp-has-no-availability-markup && (stdlib=apple-libc++ && !_target-has-llvm-16)", - cfg.available_features, - ), - ), - # Tests that require support for and std::print in in the built library. - Feature( - name="availability-print-missing", - when=lambda cfg: BooleanExpression.evaluate( - "!libcpp-has-no-availability-markup && (stdlib=apple-libc++ && !_target-has-llvm-18)", - cfg.available_features, - ), - ), - # Tests that require time zone database support in the built library - Feature( - name="availability-tzdb-missing", - when=lambda cfg: BooleanExpression.evaluate( - "!libcpp-has-no-availability-markup && (stdlib=apple-libc++ && !_target-has-llvm-19)", - cfg.available_features, - ), - ), - # Tests that require std::from_chars(floating-point) in the built library - Feature( - name="availability-fp_from_chars-missing", - when=lambda cfg: BooleanExpression.evaluate( - "!libcpp-has-no-availability-markup && (stdlib=apple-libc++ && !_target-has-llvm-20)", - cfg.available_features, - ), - ), -] diff --git a/libcxx/utils/libcxx/test/features/__init__.py b/libcxx/utils/libcxx/test/features/__init__.py new file mode 100644 index 0000000000000..5c0d1f3aaafc6 --- /dev/null +++ b/libcxx/utils/libcxx/test/features/__init__.py @@ -0,0 +1,21 @@ +# ===----------------------------------------------------------------------===## +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ===----------------------------------------------------------------------===## + +from . import availability, compiler, gdb, libcxx_macros, localization, misc, platform + +# Lit features are evaluated in order. Some features depend on other features, so +# we are careful to define them in the correct order. For example, several features +# require the compiler detection to have been performed. +DEFAULT_FEATURES = [] +DEFAULT_FEATURES += compiler.features +DEFAULT_FEATURES += libcxx_macros.features +DEFAULT_FEATURES += platform.features +DEFAULT_FEATURES += localization.features +DEFAULT_FEATURES += gdb.features +DEFAULT_FEATURES += misc.features +DEFAULT_FEATURES += availability.features diff --git a/libcxx/utils/libcxx/test/features/availability.py b/libcxx/utils/libcxx/test/features/availability.py new file mode 100644 index 0000000000000..c312a7cf830ed --- /dev/null +++ b/libcxx/utils/libcxx/test/features/availability.py @@ -0,0 +1,199 @@ +# ===----------------------------------------------------------------------===## +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ===----------------------------------------------------------------------===## + +from libcxx.test.dsl import Feature +from lit.BooleanExpression import BooleanExpression + +# Helpers to define correspondances between LLVM versions and vendor system versions. +# Those are used for backdeployment features below, do not use directly in tests. +features = [ + Feature( + name="_target-has-llvm-22", + when=lambda cfg: BooleanExpression.evaluate( + "TBD", + cfg.available_features, + ), + ), + Feature( + name="_target-has-llvm-21", + when=lambda cfg: BooleanExpression.evaluate( + "TBD", + cfg.available_features, + ), + ), + Feature( + name="_target-has-llvm-20", + when=lambda cfg: BooleanExpression.evaluate( + "_target-has-llvm-21 || target={{.+}}-apple-macosx{{26.[0-9](.\d+)?}}", + cfg.available_features, + ), + ), + Feature( + name="_target-has-llvm-19", + when=lambda cfg: BooleanExpression.evaluate( + "_target-has-llvm-20 || target={{.+}}-apple-macosx{{15.[4-9](.\d+)?}}", + cfg.available_features, + ), + ), + Feature( + name="_target-has-llvm-18", + when=lambda cfg: BooleanExpression.evaluate( + "_target-has-llvm-19 || target={{.+}}-apple-macosx{{15.[0-3](.\d+)?}}", + cfg.available_features, + ), + ), + Feature( + name="_target-has-llvm-17", + when=lambda cfg: BooleanExpression.evaluate( + "_target-has-llvm-18 || target={{.+}}-apple-macosx{{14.[4-9](.\d+)?}}", + cfg.available_features, + ), + ), + Feature( + name="_target-has-llvm-16", + when=lambda cfg: BooleanExpression.evaluate( + "_target-has-llvm-17 || target={{.+}}-apple-macosx{{14.[0-3](.[0-9]+)?}}", + cfg.available_features, + ), + ), + Feature( + name="_target-has-llvm-15", + when=lambda cfg: BooleanExpression.evaluate( + "_target-has-llvm-16 || target={{.+}}-apple-macosx{{13.[4-9](.[0-9]+)?}}", + cfg.available_features, + ), + ), + Feature( + name="_target-has-llvm-14", + when=lambda cfg: BooleanExpression.evaluate( + "_target-has-llvm-15", + cfg.available_features, + ), + ), + Feature( + name="_target-has-llvm-13", + when=lambda cfg: BooleanExpression.evaluate( + "_target-has-llvm-14 || target={{.+}}-apple-macosx{{13.[0-3](.[0-9]+)?}}", + cfg.available_features, + ), + ), + Feature( + name="_target-has-llvm-12", + when=lambda cfg: BooleanExpression.evaluate( + "_target-has-llvm-13 || target={{.+}}-apple-macosx{{12.[3-9](.[0-9]+)?}}", + cfg.available_features, + ), + ), +] + +# Define features for back-deployment testing. +# +# These features can be used to XFAIL tests that fail when deployed on (or compiled +# for) an older system. For example, if a test exhibits a bug in the libc++ on a +# particular system version, or if it uses a symbol that is not available on an +# older version of the dylib, it can be marked as XFAIL with these features. +# +# We have two families of Lit features: +# +# The first one is `using-built-library-before-llvm-XYZ`. These features encode the +# fact that the test suite is being *run* against a version of the shared/static library +# that predates LLVM version XYZ. This is useful to represent the use case of compiling +# a program against the latest libc++ but then deploying it and running it on an older +# system with an older version of the (usually shared) library. +# +# This feature is built up using the target triple passed to the compiler and the +# `stdlib=system` Lit feature, which encodes that we're running against the same library +# as described by the target triple. +# +# The second set of features is `availability--missing`. This family of Lit +# features encodes the presence of availability markup in the libc++ headers. This is +# useful to check that a test fails specifically when compiled for a given deployment +# target, such as when testing availability markup where we want to make sure that +# using the annotated facility on a deployment target that doesn't support it will fail +# at compile time. This can be achieved by creating a `.verify.cpp` test that checks for +# the right errors and marking the test as `REQUIRES: availability--missing`. +# +# This feature is built up using the presence of availability markup detected inside +# __config, the flavor of the library being tested and the target triple passed to the +# compiler. +# +# Note that both families of Lit features are similar but different in important ways. +# For example, tests for availability markup should be expected to produce diagnostics +# regardless of whether we're running against a system library, as long as we're using +# a libc++ flavor that enables availability markup. Similarly, a test could fail when +# run against the system library of an older version of FreeBSD, even though FreeBSD +# doesn't provide availability markup at the time of writing this. +for version in ("12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22"): + features.append( + Feature( + name="using-built-library-before-llvm-{}".format(version), + when=lambda cfg, v=version: BooleanExpression.evaluate( + "stdlib=system && !_target-has-llvm-{}".format(v), + cfg.available_features, + ), + ) + ) + +features += [ + # Tests that require https://wg21.link/P0482 support in the built library + Feature( + name="availability-char8_t_support-missing", + when=lambda cfg: BooleanExpression.evaluate( + "!libcpp-has-no-availability-markup && (stdlib=apple-libc++ && !_target-has-llvm-12)", + cfg.available_features, + ), + ), + # Tests that require std::to_chars(floating-point) in the built library + Feature( + name="availability-fp_to_chars-missing", + when=lambda cfg: BooleanExpression.evaluate( + "!libcpp-has-no-availability-markup && (stdlib=apple-libc++ && !_target-has-llvm-14)", + cfg.available_features, + ), + ), + # Tests that require __libcpp_verbose_abort support in the built library + Feature( + name="availability-verbose_abort-missing", + when=lambda cfg: BooleanExpression.evaluate( + "!libcpp-has-no-availability-markup && (stdlib=apple-libc++ && !_target-has-llvm-15)", + cfg.available_features, + ), + ), + # Tests that require std::pmr support in the built library + Feature( + name="availability-pmr-missing", + when=lambda cfg: BooleanExpression.evaluate( + "!libcpp-has-no-availability-markup && (stdlib=apple-libc++ && !_target-has-llvm-16)", + cfg.available_features, + ), + ), + # Tests that require support for and std::print in in the built library. + Feature( + name="availability-print-missing", + when=lambda cfg: BooleanExpression.evaluate( + "!libcpp-has-no-availability-markup && (stdlib=apple-libc++ && !_target-has-llvm-18)", + cfg.available_features, + ), + ), + # Tests that require time zone database support in the built library + Feature( + name="availability-tzdb-missing", + when=lambda cfg: BooleanExpression.evaluate( + "!libcpp-has-no-availability-markup && (stdlib=apple-libc++ && !_target-has-llvm-19)", + cfg.available_features, + ), + ), + # Tests that require std::from_chars(floating-point) in the built library + Feature( + name="availability-fp_from_chars-missing", + when=lambda cfg: BooleanExpression.evaluate( + "!libcpp-has-no-availability-markup && (stdlib=apple-libc++ && !_target-has-llvm-20)", + cfg.available_features, + ), + ), +] diff --git a/libcxx/utils/libcxx/test/features/compiler.py b/libcxx/utils/libcxx/test/features/compiler.py new file mode 100644 index 0000000000000..2fb2d4b1502ad --- /dev/null +++ b/libcxx/utils/libcxx/test/features/compiler.py @@ -0,0 +1,82 @@ +# ===----------------------------------------------------------------------===## +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ===----------------------------------------------------------------------===## + +from libcxx.test.dsl import compilerMacros, Feature, AddCompileFlag, AddFeature + +_isAnyClang = lambda cfg: "__clang__" in compilerMacros(cfg) +_isAppleClang = lambda cfg: "__apple_build_version__" in compilerMacros(cfg) +_isAnyGCC = lambda cfg: "__GNUC__" in compilerMacros(cfg) +_isClang = lambda cfg: _isAnyClang(cfg) and not _isAppleClang(cfg) +_isGCC = lambda cfg: _isAnyGCC(cfg) and not _isAnyClang(cfg) +_isAnyClangOrGCC = lambda cfg: _isAnyClang(cfg) or _isAnyGCC(cfg) +_isClExe = lambda cfg: not _isAnyClangOrGCC(cfg) +_isMSVC = lambda cfg: "_MSC_VER" in compilerMacros(cfg) +_msvcVersion = lambda cfg: (int(compilerMacros(cfg)["_MSC_VER"]) // 100, int(compilerMacros(cfg)["_MSC_VER"]) % 100) + +features = [ + # gcc-style-warnings detects compilers that understand -Wno-meow flags, unlike MSVC's compiler driver cl.exe. + Feature(name="gcc-style-warnings", when=_isAnyClangOrGCC), + Feature(name="cl-style-warnings", when=_isClExe), + + Feature(name="apple-clang", when=_isAppleClang), + Feature( + name=lambda cfg: "apple-clang-{__clang_major__}".format(**compilerMacros(cfg)), + when=_isAppleClang, + ), + Feature( + name=lambda cfg: "apple-clang-{__clang_major__}.{__clang_minor__}".format(**compilerMacros(cfg)), + when=_isAppleClang, + ), + Feature( + name=lambda cfg: "apple-clang-{__clang_major__}.{__clang_minor__}.{__clang_patchlevel__}".format(**compilerMacros(cfg)), + when=_isAppleClang, + ), + Feature(name="clang", when=_isClang), + Feature( + name=lambda cfg: "clang-{__clang_major__}".format(**compilerMacros(cfg)), + when=_isClang, + ), + Feature( + name=lambda cfg: "clang-{__clang_major__}.{__clang_minor__}".format(**compilerMacros(cfg)), + when=_isClang, + ), + Feature( + name=lambda cfg: "clang-{__clang_major__}.{__clang_minor__}.{__clang_patchlevel__}".format(**compilerMacros(cfg)), + when=_isClang, + ), + # Note: Due to a GCC bug (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104760), we must disable deprecation warnings + # on GCC or spurious diagnostics are issued. + # + # TODO: + # - Enable -Wplacement-new with GCC. + # - Enable -Wclass-memaccess with GCC. + Feature( + name="gcc", + when=_isGCC, + actions=[ + AddCompileFlag("-D_LIBCPP_DISABLE_DEPRECATION_WARNINGS"), + AddCompileFlag("-Wno-placement-new"), + AddCompileFlag("-Wno-class-memaccess"), + AddFeature("GCC-ALWAYS_INLINE-FIXME"), + ], + ), + Feature( + name=lambda cfg: "gcc-{__GNUC__}".format(**compilerMacros(cfg)), when=_isGCC + ), + Feature( + name=lambda cfg: "gcc-{__GNUC__}.{__GNUC_MINOR__}".format(**compilerMacros(cfg)), + when=_isGCC, + ), + Feature( + name=lambda cfg: "gcc-{__GNUC__}.{__GNUC_MINOR__}.{__GNUC_PATCHLEVEL__}".format(**compilerMacros(cfg)), + when=_isGCC, + ), + Feature(name="msvc", when=_isMSVC), + Feature(name=lambda cfg: "msvc-{}".format(*_msvcVersion(cfg)), when=_isMSVC), + Feature(name=lambda cfg: "msvc-{}.{}".format(*_msvcVersion(cfg)), when=_isMSVC), +] diff --git a/libcxx/utils/libcxx/test/features/gdb.py b/libcxx/utils/libcxx/test/features/gdb.py new file mode 100644 index 0000000000000..459a59afc32f4 --- /dev/null +++ b/libcxx/utils/libcxx/test/features/gdb.py @@ -0,0 +1,50 @@ +# ===----------------------------------------------------------------------===## +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ===----------------------------------------------------------------------===## + +from libcxx.test.dsl import Feature, AddSubstitution +import shutil +import subprocess + +# Detect whether GDB is on the system, has Python scripting and supports +# adding breakpoint commands. If so add a substitution to access it. +def check_gdb(cfg): + gdb_path = shutil.which("gdb") + if gdb_path is None: + return False + + # Check that we can set breakpoint commands, which was added in 8.3. + # Using the quit command here means that gdb itself exits, not just + # the "python <...>" command. + test_src = """\ +try: + gdb.Breakpoint(\"main\").commands=\"foo\" +except AttributeError: + gdb.execute(\"quit 1\") +gdb.execute(\"quit\")""" + + try: + stdout = subprocess.check_output( + [gdb_path, "-ex", "python " + test_src, "--batch"], + stderr=subprocess.DEVNULL, + universal_newlines=True, + ) + except subprocess.CalledProcessError: + # We can't set breakpoint commands + return False + + # Check we actually ran the Python + return not "Python scripting is not supported" in stdout + + +features = [ + Feature( + name="host-has-gdb-with-python", + when=check_gdb, + actions=[AddSubstitution("%{gdb}", lambda cfg: shutil.which("gdb"))], + ) +] diff --git a/libcxx/utils/libcxx/test/features/libcxx_macros.py b/libcxx/utils/libcxx/test/features/libcxx_macros.py new file mode 100644 index 0000000000000..7a465f2e87866 --- /dev/null +++ b/libcxx/utils/libcxx/test/features/libcxx_macros.py @@ -0,0 +1,76 @@ +# ===----------------------------------------------------------------------===## +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ===----------------------------------------------------------------------===## + +from libcxx.test.dsl import Feature, compilerMacros + +features = [] + +# Deduce and add the test features that that are implied by the #defines in +# the <__config> header. +# +# For each macro of the form `_LIBCPP_XXX_YYY_ZZZ` defined below that +# is defined after including <__config>, add a Lit feature called +# `libcpp-xxx-yyy-zzz`. When a macro is defined to a specific value +# (e.g. `_LIBCPP_ABI_VERSION=2`), the feature is `libcpp-xxx-yyy-zzz=`. +# +# Note that features that are more strongly tied to libc++ are named libcpp-foo, +# while features that are more general in nature are not prefixed with 'libcpp-'. +macros = { + "_LIBCPP_NO_VCRUNTIME": "libcpp-no-vcruntime", + "_LIBCPP_ABI_VERSION": "libcpp-abi-version", + "_LIBCPP_ABI_BOUNDED_ITERATORS": "libcpp-has-abi-bounded-iterators", + "_LIBCPP_ABI_BOUNDED_ITERATORS_IN_STRING": "libcpp-has-abi-bounded-iterators-in-string", + "_LIBCPP_ABI_BOUNDED_ITERATORS_IN_VECTOR": "libcpp-has-abi-bounded-iterators-in-vector", + "_LIBCPP_ABI_BOUNDED_ITERATORS_IN_STD_ARRAY": "libcpp-has-abi-bounded-iterators-in-std-array", + "_LIBCPP_ABI_BOUNDED_UNIQUE_PTR": "libcpp-has-abi-bounded-unique_ptr", + "_LIBCPP_ABI_FIX_UNORDERED_CONTAINER_SIZE_TYPE": "libcpp-has-abi-fix-unordered-container-size-type", + "_LIBCPP_DEPRECATED_ABI_DISABLE_PAIR_TRIVIAL_COPY_CTOR": "libcpp-deprecated-abi-disable-pair-trivial-copy-ctor", + "_LIBCPP_ABI_NO_COMPRESSED_PAIR_PADDING": "libcpp-abi-no-compressed-pair-padding", + "_LIBCPP_PSTL_BACKEND_LIBDISPATCH": "libcpp-pstl-backend-libdispatch", +} +for macro, feature in macros.items(): + features.append( + Feature( + name=lambda cfg, m=macro, f=feature: f + ("={}".format(compilerMacros(cfg)[m]) if compilerMacros(cfg)[m] else ""), + when=lambda cfg, m=macro: m in compilerMacros(cfg), + ) + ) + +true_false_macros = { + "_LIBCPP_HAS_THREAD_API_EXTERNAL": "libcpp-has-thread-api-external", + "_LIBCPP_HAS_THREAD_API_PTHREAD": "libcpp-has-thread-api-pthread", +} +for macro, feature in true_false_macros.items(): + features.append( + Feature( + name=feature, + when=lambda cfg, m=macro: m in compilerMacros(cfg) + and compilerMacros(cfg)[m] == "1", + ) + ) + +inverted_macros = { + "_LIBCPP_HAS_TIME_ZONE_DATABASE": "no-tzdb", + "_LIBCPP_HAS_FILESYSTEM": "no-filesystem", + "_LIBCPP_HAS_LOCALIZATION": "no-localization", + "_LIBCPP_HAS_THREADS": "no-threads", + "_LIBCPP_HAS_MONOTONIC_CLOCK": "no-monotonic-clock", + "_LIBCPP_HAS_WIDE_CHARACTERS": "no-wide-characters", + "_LIBCPP_HAS_VENDOR_AVAILABILITY_ANNOTATIONS": "libcpp-has-no-availability-markup", + "_LIBCPP_HAS_RANDOM_DEVICE": "no-random-device", + "_LIBCPP_HAS_UNICODE": "libcpp-has-no-unicode", + "_LIBCPP_HAS_TERMINAL": "no-terminal", +} +for macro, feature in inverted_macros.items(): + features.append( + Feature( + name=feature, + when=lambda cfg, m=macro: m in compilerMacros(cfg) + and compilerMacros(cfg)[m] == "0", + ) + ) diff --git a/libcxx/utils/libcxx/test/features/localization.py b/libcxx/utils/libcxx/test/features/localization.py new file mode 100644 index 0000000000000..157c250429d27 --- /dev/null +++ b/libcxx/utils/libcxx/test/features/localization.py @@ -0,0 +1,142 @@ +# ===----------------------------------------------------------------------===## +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ===----------------------------------------------------------------------===## + +from libcxx.test.dsl import compilerMacros, Feature, programSucceeds, hasAnyLocale, programOutput, AddSubstitution +import re + +features = [ + # Check for Glibc < 2.27, where the ru_RU.UTF-8 locale had + # mon_decimal_point == ".", which our tests don't handle. + Feature( + name="glibc-old-ru_RU-decimal-point", + when=lambda cfg: not "_LIBCPP_HAS_LOCALIZATION" in compilerMacros(cfg) + or compilerMacros(cfg)["_LIBCPP_HAS_LOCALIZATION"] == "1" + and not programSucceeds( + cfg, + """ + #include + #include + int main(int, char**) { + setlocale(LC_ALL, "ru_RU.UTF-8"); + return strcmp(localeconv()->mon_decimal_point, ","); + } + """, + ), + ), +] + +# Mapping from canonical locale names (used in the tests) to possible locale +# names on various systems. Each locale is considered supported if any of the +# alternative names is supported. +_locales = { + "en_US.UTF-8": ["en_US.UTF-8", "en_US.utf8", "English_United States.1252"], + "fr_FR.UTF-8": ["fr_FR.UTF-8", "fr_FR.utf8", "French_France.1252"], + "ja_JP.UTF-8": ["ja_JP.UTF-8", "ja_JP.utf8", "Japanese_Japan.923"], + "ru_RU.UTF-8": ["ru_RU.UTF-8", "ru_RU.utf8", "Russian_Russia.1251"], + "zh_CN.UTF-8": ["zh_CN.UTF-8", "zh_CN.utf8", "Chinese_China.936"], + "fr_CA.ISO8859-1": ["fr_CA.ISO8859-1", "French_Canada.1252"], + "cs_CZ.ISO8859-2": ["cs_CZ.ISO8859-2", "Czech_Czech Republic.1250"], +} +_provide_locale_conversions = { + "fr_FR.UTF-8": ["decimal_point", "mon_thousands_sep", "thousands_sep"], + "ru_RU.UTF-8": ["mon_thousands_sep"], +} +for locale, alts in _locales.items(): + # Note: Using alts directly in the lambda body here will bind it to the value at the + # end of the loop. Assigning it to a default argument works around this issue. + features.append( + Feature( + name="locale.{}".format(locale), + when=lambda cfg, alts=alts: hasAnyLocale(cfg, alts), + actions=lambda cfg, locale=locale, alts=alts: _getLocaleFlagsAction( + cfg, locale, alts, _provide_locale_conversions[locale] + ) + if locale in _provide_locale_conversions + and ("_LIBCPP_HAS_WIDE_CHARACTERS" not in compilerMacros(cfg) or + compilerMacros(cfg)["_LIBCPP_HAS_WIDE_CHARACTERS"] == "1") + else [], + ), + ) + +# Provide environment locale conversions through substitutions to avoid platform specific +# maintenance. +def _getLocaleFlagsAction(cfg, locale, alts, members): + alts_list = ",".join([f'"{l}"' for l in alts]) + get_member_list = ",".join([f"lc->{m}" for m in members]) + + localeconv_info = programOutput( + cfg, + r""" + #if defined(_WIN32) && !defined(_CRT_SECURE_NO_WARNINGS) + #define _CRT_SECURE_NO_WARNINGS + #endif + #include + #include + #include + #include + + // Print each requested locale conversion member on separate lines. + int main(int, char**) { + const char* locales[] = { %s }; + for (int loc_i = 0; loc_i < %d; ++loc_i) { + if (!setlocale(LC_ALL, locales[loc_i])) { + continue; // Choose first locale name that is recognized. + } + + lconv* lc = localeconv(); + const char* members[] = { %s }; + for (size_t m_i = 0; m_i < %d; ++m_i) { + if (!members[m_i]) { + printf("\n"); // member value is an empty string + continue; + } + + size_t len = mbstowcs(nullptr, members[m_i], 0); + if (len == static_cast(-1)) { + fprintf(stderr, "mbstowcs failed unexpectedly\n"); + return 1; + } + // Include room for null terminator. Use malloc as these features + // are also used by lit configs that don't use -lc++ (libunwind tests). + wchar_t* dst = (wchar_t*)malloc((len + 1) * sizeof(wchar_t)); + size_t ret = mbstowcs(dst, members[m_i], len + 1); + if (ret == static_cast(-1)) { + fprintf(stderr, "mbstowcs failed unexpectedly\n"); + free(dst); + return 1; + } + + for (size_t i = 0; i < len; ++i) { + if (dst[i] > 0x7F) { + printf("\\u%%04x", dst[i]); + } else { + // c++03 does not allow basic ascii-range characters in UCNs + printf("%%c", (char)dst[i]); + } + } + printf("\n"); + free(dst); + } + return 0; + } + + return 1; + } + """ + % (alts_list, len(alts), get_member_list, len(members)), + ) + valid_define_name = re.sub(r"[.-]", "_", locale).upper() + return [ + # Provide locale conversion through a substitution. + # Example: %{LOCALE_CONV_FR_FR_UTF_8_THOUSANDS_SEP} = L"\u202f" + AddSubstitution( + f"%{{LOCALE_CONV_{valid_define_name}_{member.upper()}}}", + lambda cfg, value=value: f"'L\"{value}\"'", + ) + for member, value in zip(members, localeconv_info.split("\n")) + ] diff --git a/libcxx/utils/libcxx/test/features/misc.py b/libcxx/utils/libcxx/test/features/misc.py new file mode 100644 index 0000000000000..738e3d8bb207c --- /dev/null +++ b/libcxx/utils/libcxx/test/features/misc.py @@ -0,0 +1,299 @@ +# ===----------------------------------------------------------------------===## +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ===----------------------------------------------------------------------===## + +from libcxx.test.dsl import compilerMacros, sourceBuilds, hasCompileFlag, programSucceeds, runScriptExitCode +from libcxx.test.dsl import Feature, AddCompileFlag, AddLinkFlag +import platform +import sys + +def _mingwSupportsModules(cfg): + # Only mingw headers are known to work with libc++ built as a module, + # at the moment. + if not "__MINGW32__" in compilerMacros(cfg): + return False + # For mingw headers, check for a version known to support being built + # as a module. + return sourceBuilds( + cfg, + """ + #include <_mingw_mac.h> + #if __MINGW64_VERSION_MAJOR < 12 + #error Headers known to be incompatible + #elif __MINGW64_VERSION_MAJOR == 12 + // The headers were fixed to work with libc++ modules during + // __MINGW64_VERSION_MAJOR == 12. The headers became compatible + // with libc++ built as a module in + // 1652e9241b5d8a5a779c6582b1c3c4f4a7cc66e5 (Apr 2024), but the + // following commit 8c13b28ace68f2c0094d45121d59a4b951b533ed + // removed the now unused __mingw_static_ovr define. Use this + // as indicator for whether we've got new enough headers. + #ifdef __mingw_static_ovr + #error Headers too old + #endif + #else + // __MINGW64_VERSION_MAJOR > 12 should be ok. + #endif + int main(int, char**) { return 0; } + """, + ) + +features = [ + Feature( + name="diagnose-if-support", + when=lambda cfg: hasCompileFlag(cfg, "-Wuser-defined-warnings"), + actions=[AddCompileFlag("-Wuser-defined-warnings")], + ), + Feature( + name="character-conversion-warnings", + when=lambda cfg: hasCompileFlag(cfg, "-Wcharacter-conversion"), + ), + # Tests to validate whether the compiler has a way to set the maximum number + # of steps during constant evaluation. Since the flag differs per compiler + # store the "valid" flag as a feature. This allows passing the proper compile + # flag to the compiler: + # // ADDITIONAL_COMPILE_FLAGS(has-fconstexpr-steps): -fconstexpr-steps=12345678 + # // ADDITIONAL_COMPILE_FLAGS(has-fconstexpr-ops-limit): -fconstexpr-ops-limit=12345678 + Feature( + name="has-fconstexpr-steps", + when=lambda cfg: hasCompileFlag(cfg, "-fconstexpr-steps=1"), + ), + Feature( + name="has-fconstexpr-ops-limit", + when=lambda cfg: hasCompileFlag(cfg, "-fconstexpr-ops-limit=1"), + ), + Feature(name="has-fblocks", when=lambda cfg: hasCompileFlag(cfg, "-fblocks")), + Feature( + name="fdelayed-template-parsing", + when=lambda cfg: hasCompileFlag(cfg, "-fdelayed-template-parsing"), + ), + Feature( + name="has-fobjc-arc", + when=lambda cfg: hasCompileFlag(cfg, "-xobjective-c++ -fobjc-arc") + and sys.platform.lower().strip() == "darwin", + ), # TODO: this doesn't handle cross-compiling to Apple platforms. + Feature( + name="objective-c++", + when=lambda cfg: hasCompileFlag(cfg, "-xobjective-c++ -fobjc-arc"), + ), + Feature( + name="verify-support", + when=lambda cfg: hasCompileFlag(cfg, "-Xclang -verify-ignore-unexpected"), + ), + Feature( + name="add-latomic-workaround", # https://llvm.org/PR73361 + when=lambda cfg: sourceBuilds( + cfg, "int main(int, char**) { return 0; }", ["-latomic"] + ), + actions=[AddLinkFlag("-latomic")], + ), + Feature( + name="has-64-bit-atomics", + when=lambda cfg: sourceBuilds( + cfg, + """ + #include + struct Large { char storage[64/8]; }; + std::atomic x; + int main(int, char**) { (void)x.load(); (void)x.is_lock_free(); return 0; } + """, + ), + ), + Feature( + name="has-1024-bit-atomics", + when=lambda cfg: sourceBuilds( + cfg, + """ + #include + struct Large { char storage[1024/8]; }; + std::atomic x; + int main(int, char**) { (void)x.load(); (void)x.is_lock_free(); return 0; } + """, + ), + ), + # Tests that require 64-bit architecture + Feature( + name="32-bit-pointer", + when=lambda cfg: sourceBuilds( + cfg, + """ + int main(int, char**) { + static_assert(sizeof(void *) == 4); + } + """, + ), + ), + # Check for a Windows UCRT bug (fixed in UCRT/Windows 10.0.20348.0): + # https://developercommunity.visualstudio.com/t/utf-8-locales-break-ctype-functions-for-wchar-type/1653678 + Feature( + name="win32-broken-utf8-wchar-ctype", + when=lambda cfg: not "_LIBCPP_HAS_LOCALIZATION" in compilerMacros(cfg) + or compilerMacros(cfg)["_LIBCPP_HAS_LOCALIZATION"] == "1" + and "_WIN32" in compilerMacros(cfg) + and not programSucceeds( + cfg, + """ + #include + #include + int main(int, char**) { + setlocale(LC_ALL, "en_US.UTF-8"); + return towlower(L'\\xDA') != L'\\xFA'; + } + """, + ), + ), + # Check for a Windows UCRT bug (fixed in UCRT/Windows 10.0.19041.0). + # https://developercommunity.visualstudio.com/t/printf-formatting-with-g-outputs-too/1660837 + Feature( + name="win32-broken-printf-g-precision", + when=lambda cfg: "_WIN32" in compilerMacros(cfg) + and not programSucceeds( + cfg, + """ + #include + #include + int main(int, char**) { + char buf[100]; + snprintf(buf, sizeof(buf), "%#.*g", 0, 0.0); + return strcmp(buf, "0."); + } + """, + ), + ), + # Check for a Windows UCRT bug (not fixed upstream yet). + # With UCRT, printf("%a", 0.0) produces "0x0.0000000000000p+0", + # while other C runtimes produce just "0x0p+0". + # https://developercommunity.visualstudio.com/t/Printf-formatting-of-float-as-hex-prints/1660844 + Feature( + name="win32-broken-printf-a-precision", + when=lambda cfg: "_WIN32" in compilerMacros(cfg) + and not programSucceeds( + cfg, + """ + #include + #include + int main(int, char**) { + char buf[100]; + snprintf(buf, sizeof(buf), "%a", 0.0); + return strcmp(buf, "0x0p+0"); + } + """, + ), + ), + Feature( + name="has-unix-headers", + when=lambda cfg: sourceBuilds( + cfg, + """ + #include + #include + int main(int, char**) { + int fd[2]; + return pipe(fd); + } + """, + ), + ), + # Whether Bash can run on the executor. + # This is not always the case, for example when running on embedded systems. + # + # For the corner case of bash existing, but it being missing in the path + # set in %{exec} as "--env PATH=one-single-dir", the executor does find + # and executes bash, but bash then can't find any other common shell + # utilities. Test executing "bash -c 'bash --version'" to see if bash + # manages to find binaries to execute. + Feature( + name="executor-has-no-bash", + when=lambda cfg: runScriptExitCode(cfg, ["%{exec} bash -c 'bash --version'"]) != 0, + ), + # Whether module support for the platform is available. + Feature( + name="has-no-cxx-module-support", + # The libc of these platforms have functions with internal linkage. + # This is not allowed per C11 7.1.2 Standard headers/6 + # Any declaration of a library function shall have external linkage. + when=lambda cfg: "__ANDROID__" in compilerMacros(cfg) + or "__FreeBSD__" in compilerMacros(cfg) + or ("_WIN32" in compilerMacros(cfg) and not _mingwSupportsModules(cfg)) + or platform.system().lower().startswith("aix") + # Avoid building on platforms that don't support modules properly. + or not hasCompileFlag(cfg, "-Wno-reserved-module-identifier") + # older versions don't support extern "C++", newer versions don't support main in named module. + or not ( + sourceBuilds( + cfg, + """ + export module test; + extern "C++" int main(int, char**) { return 0; } + """, + ) + or sourceBuilds( + cfg, + """ + export module test; + int main(int, char**) { return 0; } + """, + ) + ), + ), + # The time zone validation tests compare the output of zdump against the + # output generated by 's time zone support. + Feature( + name="has-no-zdump", + when=lambda cfg: runScriptExitCode(cfg, ["zdump --version"]) != 0, + ), + Feature( + name="can-create-symlinks", + when=lambda cfg: "_WIN32" not in compilerMacros(cfg) + or programSucceeds( + cfg, + # Creation of symlinks require elevated privileges on Windows unless + # Windows developer mode is enabled. + """ + #include + #include + int main(int, char**) { + CHAR tempDirPath[MAX_PATH]; + DWORD tempPathRet = GetTempPathA(MAX_PATH, tempDirPath); + if (tempPathRet == 0 || tempPathRet > MAX_PATH) { + return 1; + } + + CHAR tempFilePath[MAX_PATH]; + UINT uRetVal = GetTempFileNameA( + tempDirPath, + "cxx", // Prefix + 0, // Unique=0 also implies file creation. + tempFilePath); + if (uRetVal == 0) { + return 1; + } + + CHAR symlinkFilePath[MAX_PATH]; + int ret = sprintf_s(symlinkFilePath, MAX_PATH, "%s_symlink", tempFilePath); + if (ret == -1) { + DeleteFileA(tempFilePath); + return 1; + } + + // Requires either administrator, or developer mode enabled. + BOOL bCreatedSymlink = CreateSymbolicLinkA(symlinkFilePath, + tempFilePath, + SYMBOLIC_LINK_FLAG_ALLOW_UNPRIVILEGED_CREATE); + if (!bCreatedSymlink) { + DeleteFileA(tempFilePath); + return 1; + } + + DeleteFileA(tempFilePath); + DeleteFileA(symlinkFilePath); + return 0; + } + """, + ), + ), +] diff --git a/libcxx/utils/libcxx/test/features/platform.py b/libcxx/utils/libcxx/test/features/platform.py new file mode 100644 index 0000000000000..db9d3931da7ff --- /dev/null +++ b/libcxx/utils/libcxx/test/features/platform.py @@ -0,0 +1,132 @@ +# ===----------------------------------------------------------------------===## +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ===----------------------------------------------------------------------===## + +from libcxx.test.dsl import programOutput, Feature, compilerMacros, programSucceeds, AddCompileFlag, sourceBuilds +import platform +import sys + +def _getAndroidDeviceApi(cfg): + return int( + programOutput( + cfg, + r""" + #include + #include + int main(int, char**) { + printf("%d\n", android_get_device_api_level()); + return 0; + } + """, + ) + ) + +# Add features representing the target platform name: darwin, linux, windows, etc... +features = [ + Feature(name="darwin", when=lambda cfg: "__APPLE__" in compilerMacros(cfg)), + Feature(name="windows", when=lambda cfg: "_WIN32" in compilerMacros(cfg)), + Feature( + name="windows-dll", + when=lambda cfg: "_WIN32" in compilerMacros(cfg) + and sourceBuilds( + cfg, + """ + #include + int main(int, char**) { return 0; } + """, + ) + and programSucceeds( + cfg, + """ + #include + #include + #include + int main(int, char**) { + // Get a pointer to a data member that gets linked from the C++ + // library. This must be a data member (functions can get + // thunk inside the calling executable), and must not be + // something that is defined inline in headers. + void *ptr = &std::cout; + // Get a handle to the current main executable. + void *exe = GetModuleHandle(NULL); + // The handle points at the PE image header. Navigate through + // the header structure to find the size of the PE image (the + // executable). + PIMAGE_DOS_HEADER dosheader = (PIMAGE_DOS_HEADER)exe; + PIMAGE_NT_HEADERS ntheader = (PIMAGE_NT_HEADERS)((BYTE *)dosheader + dosheader->e_lfanew); + PIMAGE_OPTIONAL_HEADER peheader = &ntheader->OptionalHeader; + void *exeend = (BYTE*)exe + peheader->SizeOfImage; + // Check if the tested pointer - the data symbol from the + // C++ library - is located within the exe. + if (ptr >= exe && ptr <= exeend) + return 1; + // Return success if it was outside of the executable, i.e. + // loaded from a DLL. + return 0; + } + """, + ), + actions=[AddCompileFlag("-DTEST_WINDOWS_DLL")], + ), + Feature(name="linux", when=lambda cfg: "__linux__" in compilerMacros(cfg)), + Feature(name="android", when=lambda cfg: "__ANDROID__" in compilerMacros(cfg)), + Feature( + name=lambda cfg: "android-device-api={}".format(_getAndroidDeviceApi(cfg)), + when=lambda cfg: "__ANDROID__" in compilerMacros(cfg), + ), + Feature( + name="LIBCXX-ANDROID-FIXME", + when=lambda cfg: "__ANDROID__" in compilerMacros(cfg), + ), + Feature(name="netbsd", when=lambda cfg: "__NetBSD__" in compilerMacros(cfg)), + Feature(name="freebsd", when=lambda cfg: "__FreeBSD__" in compilerMacros(cfg)), + Feature( + name="LIBCXX-FREEBSD-FIXME", + when=lambda cfg: "__FreeBSD__" in compilerMacros(cfg), + ), + Feature( + name="LIBCXX-PICOLIBC-FIXME", + when=lambda cfg: sourceBuilds( + cfg, + """ + #include + #ifndef __PICOLIBC__ + #error not picolibc + #endif + int main(int, char**) { return 0; } + """, + ), + ), + Feature( + name="LIBCXX-AMDGPU-FIXME", + when=lambda cfg: "__AMDGPU__" in compilerMacros(cfg), + ), + Feature( + name="LIBCXX-NVPTX-FIXME", + when=lambda cfg: "__NVPTX__" in compilerMacros(cfg), + ), +] + +# Add features representing the build host platform name. +# The build host could differ from the target platform for cross-compilation. +features += [ + Feature(name="buildhost={}".format(sys.platform.lower().strip())), + # sys.platform can often be represented by a "sub-system", such as 'win32', 'cygwin', 'mingw', freebsd13 & etc. + # We define a consolidated feature on a few platforms. + Feature( + name="buildhost=windows", + when=lambda cfg: platform.system().lower().startswith("windows"), + ), + Feature( + name="buildhost=freebsd", + when=lambda cfg: platform.system().lower().startswith("freebsd"), + ), + Feature( + name="buildhost=aix", + when=lambda cfg: platform.system().lower().startswith("aix"), + ), +] diff --git a/libcxx/utils/libcxx/test/params.py b/libcxx/utils/libcxx/test/params.py index c02d6df1c47a4..299aa28777fd5 100644 --- a/libcxx/utils/libcxx/test/params.py +++ b/libcxx/utils/libcxx/test/params.py @@ -11,7 +11,7 @@ from pathlib import Path from libcxx.test.dsl import * -from libcxx.test.features import _isClang, _isAppleClang, _isGCC, _isMSVC +from libcxx.test.features.compiler import _isClang, _isAppleClang, _isGCC, _isMSVC _warningFlags = [ diff --git a/lldb/source/Plugins/Instruction/RISCV/EmulateInstructionRISCV.cpp b/lldb/source/Plugins/Instruction/RISCV/EmulateInstructionRISCV.cpp index 5c1b7d4943b3f..2957cb716041d 100644 --- a/lldb/source/Plugins/Instruction/RISCV/EmulateInstructionRISCV.cpp +++ b/lldb/source/Plugins/Instruction/RISCV/EmulateInstructionRISCV.cpp @@ -1328,32 +1328,36 @@ class Executor { m_emu, inst, 8, ZextD, [](uint64_t a, uint64_t b) { return std::max(a, b); }); } - template - bool F_Load(T inst, const fltSemantics &(*semantics)(), - unsigned int numBits) { + template + bool F_Load(I inst, const fltSemantics &(*semantics)()) { return transformOptional(inst.rs1.Read(m_emu), [&](auto &&rs1) { - uint64_t addr = rs1 + uint64_t(inst.imm); - uint64_t bits = *m_emu.ReadMem(addr); + uint64_t addr = + rs1 + uint64_t(SignExt(inst.imm)); + uint64_t bits = *m_emu.ReadMem(addr); + unsigned numBits = sizeof(T) * 8; APFloat f(semantics(), APInt(numBits, bits)); return inst.rd.WriteAPFloat(m_emu, f); }) .value_or(false); } - bool operator()(FLW inst) { return F_Load(inst, &APFloat::IEEEsingle, 32); } - template bool F_Store(T inst, bool isDouble) { + bool operator()(FLW inst) { + return F_Load(inst, &APFloat::IEEEsingle); + } + template bool F_Store(I inst, bool isDouble) { return transformOptional(zipOpt(inst.rs1.Read(m_emu), inst.rs2.ReadAPFloat(m_emu, isDouble)), [&](auto &&tup) { auto [rs1, rs2] = tup; - uint64_t addr = rs1 + uint64_t(inst.imm); + uint64_t addr = + rs1 + uint64_t(SignExt(inst.imm)); uint64_t bits = rs2.bitcastToAPInt().getZExtValue(); - return m_emu.WriteMem(addr, bits); + return m_emu.WriteMem(addr, bits); }) .value_or(false); } - bool operator()(FSW inst) { return F_Store(inst, false); } + bool operator()(FSW inst) { return F_Store(inst, false); } std::tuple FusedMultiplyAdd(APFloat rs1, APFloat rs2, APFloat rs3) { auto opStatus = rs1.fusedMultiplyAdd(rs2, rs3, m_emu.GetRoundingMode()); @@ -1616,8 +1620,10 @@ class Executor { bool operator()(FCVT_S_LU inst) { return FCVT_f2i(inst, &Rs::Read, APFloat::IEEEsingle()); } - bool operator()(FLD inst) { return F_Load(inst, &APFloat::IEEEdouble, 64); } - bool operator()(FSD inst) { return F_Store(inst, true); } + bool operator()(FLD inst) { + return F_Load(inst, &APFloat::IEEEdouble); + } + bool operator()(FSD inst) { return F_Store(inst, true); } bool operator()(FMADD_D inst) { return FMA(inst, true, 1.0f, 1.0f); } bool operator()(FMSUB_D inst) { return FMA(inst, true, 1.0f, -1.0f); } bool operator()(FNMSUB_D inst) { return FMA(inst, true, -1.0f, 1.0f); } diff --git a/lldb/source/Plugins/Platform/Android/PlatformAndroid.cpp b/lldb/source/Plugins/Platform/Android/PlatformAndroid.cpp index 57d88f615e2b3..22b9711fda480 100644 --- a/lldb/source/Plugins/Platform/Android/PlatformAndroid.cpp +++ b/lldb/source/Plugins/Platform/Android/PlatformAndroid.cpp @@ -15,6 +15,8 @@ #include "lldb/Utility/UriParser.h" #include "lldb/ValueObject/ValueObject.h" +#include "llvm/ADT/DenseMap.h" + #include "AdbClient.h" #include "PlatformAndroid.h" #include "PlatformAndroidRemoteGDBServer.h" @@ -479,136 +481,90 @@ std::string PlatformAndroid::GetRunAs() { return run_as.str(); } -// Helper function to populate process status information from -// /proc/[pid]/status -void PlatformAndroid::PopulateProcessStatusInfo( - lldb::pid_t pid, ProcessInstanceInfo &process_info) { - // Read /proc/[pid]/status to get parent PID, UIDs, and GIDs - Status error; - AdbClientUP status_adb = GetAdbClient(error); - if (error.Fail()) - return; - - std::string status_output; - StreamString status_cmd; - status_cmd.Printf( - "cat /proc/%llu/status 2>/dev/null | grep -E '^(PPid|Uid|Gid):'", - static_cast(pid)); - Status status_error = - status_adb->Shell(status_cmd.GetData(), seconds(5), &status_output); +static bool NeedsCmdlineSupplement(const ProcessInstanceInfo &proc_info) { + llvm::StringRef name = + proc_info.GetExecutableFile().GetFilename().GetStringRef(); + return name.contains("app_process") || name.contains("zygote"); +} - if (status_error.Fail() || status_output.empty()) +// Fetch /proc/PID/cmdline for processes to get actual package names. +// Android apps often show as "zygote" or "app_process" without this. +static void SupplementWithCmdlineInfo(ProcessInstanceInfoList &proc_infos, + AdbClient *adb) { + if (proc_infos.empty()) return; - llvm::SmallVector lines; - llvm::StringRef(status_output).split(lines, '\n'); - - for (llvm::StringRef line : lines) { - line = line.trim(); - if (line.starts_with("PPid:")) { - llvm::StringRef ppid_str = line.substr(5).trim(); - lldb::pid_t ppid; - if (llvm::to_integer(ppid_str, ppid)) - process_info.SetParentProcessID(ppid); - } else if (line.starts_with("Uid:")) { - llvm::SmallVector uid_parts; - line.substr(4).trim().split(uid_parts, '\t', -1, false); - if (uid_parts.size() >= 2) { - uint32_t uid, euid; - if (llvm::to_integer(uid_parts[0].trim(), uid)) - process_info.SetUserID(uid); - if (llvm::to_integer(uid_parts[1].trim(), euid)) - process_info.SetEffectiveUserID(euid); - } - } else if (line.starts_with("Gid:")) { - llvm::SmallVector gid_parts; - line.substr(4).trim().split(gid_parts, '\t', -1, false); - if (gid_parts.size() >= 2) { - uint32_t gid, egid; - if (llvm::to_integer(gid_parts[0].trim(), gid)) - process_info.SetGroupID(gid); - if (llvm::to_integer(gid_parts[1].trim(), egid)) - process_info.SetEffectiveGroupID(egid); - } + llvm::DenseMap pid_map; + std::string pid_list; + for (auto &proc_info : proc_infos) { + if (NeedsCmdlineSupplement(proc_info)) { + lldb::pid_t pid = proc_info.GetProcessID(); + pid_map[pid] = &proc_info; + if (!pid_list.empty()) + pid_list += " "; + pid_list += std::to_string(pid); } } -} -// Helper function to populate command line arguments from /proc/[pid]/cmdline -void PlatformAndroid::PopulateProcessCommandLine( - lldb::pid_t pid, ProcessInstanceInfo &process_info) { - // Read /proc/[pid]/cmdline to get command line arguments - Status error; - AdbClientUP cmdline_adb = GetAdbClient(error); - if (error.Fail()) + if (pid_list.empty()) return; + Log *log = GetLog(LLDBLog::Platform); + + // Use xargs -P to parallelize cmdline fetching (up to 8 concurrent reads) + StreamString cmd; + cmd.Printf( + "echo '%s' | xargs -n 1 -P 8 sh -c " + "'echo \"$1:$(cat /proc/$1/cmdline 2>/dev/null | tr \"\\0\" \" \")\"' sh", + pid_list.c_str()); + std::string cmdline_output; - StreamString cmdline_cmd; - cmdline_cmd.Printf("cat /proc/%llu/cmdline 2>/dev/null | tr '\\000' ' '", - static_cast(pid)); - Status cmdline_error = - cmdline_adb->Shell(cmdline_cmd.GetData(), seconds(5), &cmdline_output); + Status error = adb->Shell(cmd.GetData(), seconds(5), &cmdline_output); - if (cmdline_error.Fail() || cmdline_output.empty()) + if (error.Fail() || cmdline_output.empty()) return; - cmdline_output = llvm::StringRef(cmdline_output).trim().str(); - if (cmdline_output.empty()) - return; + llvm::SmallVector lines; + llvm::StringRef(cmdline_output).split(lines, '\n', -1, false); - llvm::SmallVector args; - llvm::StringRef(cmdline_output).split(args, ' ', -1, false); - if (args.empty()) - return; + for (llvm::StringRef line : lines) { + line = line.trim(); + auto [pid_str, cmdline] = line.split(':'); + if (pid_str.empty() || cmdline.empty()) + continue; - process_info.SetArg0(args[0]); - Args process_args; - for (size_t i = 1; i < args.size(); i++) { - if (!args[i].empty()) - process_args.AppendArgument(args[i]); - } - process_info.SetArguments(process_args, false); -} + cmdline = cmdline.trim(); -// Helper function to populate architecture from /proc/[pid]/exe -void PlatformAndroid::PopulateProcessArchitecture( - lldb::pid_t pid, ProcessInstanceInfo &process_info) { - // Read /proc/[pid]/exe to get executable path for architecture detection - Status error; - AdbClientUP exe_adb = GetAdbClient(error); - if (error.Fail()) - return; + lldb::pid_t pid; + if (!llvm::to_integer(pid_str, pid) || cmdline.empty()) + continue; - std::string exe_output; - StreamString exe_cmd; - exe_cmd.Printf("readlink /proc/%llu/exe 2>/dev/null", - static_cast(pid)); - Status exe_error = exe_adb->Shell(exe_cmd.GetData(), seconds(5), &exe_output); + auto it = pid_map.find(pid); + if (it == pid_map.end()) + continue; - if (exe_error.Fail() || exe_output.empty()) - return; + ProcessInstanceInfo *proc_info = it->second; + llvm::SmallVector args; + cmdline.split(args, ' ', -1, false); - exe_output = llvm::StringRef(exe_output).trim().str(); - - // Determine architecture from exe path - ArchSpec arch; - if (exe_output.find("64") != std::string::npos || - exe_output.find("arm64") != std::string::npos || - exe_output.find("aarch64") != std::string::npos) { - arch.SetTriple("aarch64-unknown-linux-android"); - } else if (exe_output.find("x86_64") != std::string::npos) { - arch.SetTriple("x86_64-unknown-linux-android"); - } else if (exe_output.find("x86") != std::string::npos || - exe_output.find("i686") != std::string::npos) { - arch.SetTriple("i686-unknown-linux-android"); - } else { - // Default to armv7 for 32-bit ARM (most common on Android) - arch.SetTriple("armv7-unknown-linux-android"); - } + if (!args.empty()) { + proc_info->GetExecutableFile().SetFile(args[0], FileSpec::Style::posix); + + if (args.size() > 1) { + Args process_args; + for (size_t i = 1; i < args.size(); ++i) { + if (!args[i].empty()) + process_args.AppendArgument(args[i]); + } + proc_info->SetArguments(process_args, false); + } - if (arch.IsValid()) - process_info.SetArchitecture(arch); + LLDB_LOGF(log, + "PlatformAndroid::%s supplemented PID %llu with cmdline: %s", + __FUNCTION__, static_cast(pid), + cmdline.str().c_str()); + } + } } uint32_t @@ -616,109 +572,39 @@ PlatformAndroid::FindProcesses(const ProcessInstanceInfoMatch &match_info, ProcessInstanceInfoList &proc_infos) { proc_infos.clear(); - // When LLDB is running natively on an Android device (IsHost() == true), - // use the parent class's standard Linux /proc enumeration. IsHost() is only - // true when compiled for Android (#if defined(__ANDROID__)), so calling - // PlatformLinux methods is safe (Android is Linux-based). if (IsHost()) return PlatformLinux::FindProcesses(match_info, proc_infos); - // Remote Android platform: implement process name lookup using 'pidof' over - // adb. - - // LLDB stores the search name in GetExecutableFile() (even though it's - // actually a process name like "com.android.chrome" rather than an - // executable path). If no search name is provided, we can't use - // 'pidof', so return early with no results. - const ProcessInstanceInfo &match_process_info = match_info.GetProcessInfo(); - if (!match_process_info.GetExecutableFile() || - match_info.GetNameMatchType() == NameMatch::Ignore) { - return 0; - } - - // Extract the process name to search for (typically an Android package name - // like "com.example.app" or binary name like "app_process64") - std::string process_name = match_process_info.GetExecutableFile().GetPath(); - if (process_name.empty()) - return 0; - - // Use adb to find the process by name - Status error; - AdbClientUP adb(GetAdbClient(error)); - if (error.Fail()) { - Log *log = GetLog(LLDBLog::Platform); - LLDB_LOGF(log, "PlatformAndroid::%s failed to get ADB client: %s", - __FUNCTION__, error.AsCString()); - return 0; - } - - // Use 'pidof' command to get PIDs for the process name. - // Quote the process name to handle special characters (spaces, etc.) - std::string pidof_output; - StreamString command; - command.Printf("pidof '%s'", process_name.c_str()); - error = adb->Shell(command.GetData(), seconds(5), &pidof_output); - - if (error.Fail()) { - Log *log = GetLog(LLDBLog::Platform); - LLDB_LOG(log, "PlatformAndroid::{} 'pidof {}' failed: {}", __FUNCTION__, - process_name.c_str(), error.AsCString()); - return 0; - } - - // Parse PIDs from pidof output. - // Note: pidof can return multiple PIDs (space-separated) if multiple - // instances of the same executable are running. - pidof_output = llvm::StringRef(pidof_output).trim().str(); - if (pidof_output.empty()) { - Log *log = GetLog(LLDBLog::Platform); - LLDB_LOGF(log, "PlatformAndroid::%s no process found with name '%s'", - __FUNCTION__, process_name.c_str()); + if (!m_remote_platform_sp) return 0; - } - - // Split the output by whitespace to handle multiple PIDs - llvm::SmallVector pid_strings; - llvm::StringRef(pidof_output).split(pid_strings, ' ', -1, false); - - Log *log = GetLog(LLDBLog::Platform); - - // Process each PID and gather information - uint32_t num_matches = 0; - for (llvm::StringRef pid_str : pid_strings) { - pid_str = pid_str.trim(); - if (pid_str.empty()) - continue; - - lldb::pid_t pid; - if (!llvm::to_integer(pid_str, pid)) { - LLDB_LOGF(log, "PlatformAndroid::%s failed to parse PID from: '%s'", - __FUNCTION__, pid_str.str().c_str()); - continue; - } - - ProcessInstanceInfo process_info; - process_info.SetProcessID(pid); - process_info.GetExecutableFile().SetFile(process_name, - FileSpec::Style::posix); - - // Populate additional process information - PopulateProcessStatusInfo(pid, process_info); - PopulateProcessCommandLine(pid, process_info); - PopulateProcessArchitecture(pid, process_info); - - // Check if this process matches the criteria - if (match_info.Matches(process_info)) { - proc_infos.push_back(process_info); - num_matches++; - LLDB_LOGF(log, "PlatformAndroid::%s found process '%s' with PID %llu", - __FUNCTION__, process_name.c_str(), - static_cast(pid)); + // Android-specific process name handling: + // Apps spawned from zygote initially appear as "app_process" or "zygote" + // in the process list, but their actual package names (e.g., + // "com.example.app") are only available in /proc/PID/cmdline. To support + // name-based matching, we must first fetch cmdline info for all processes, + // then apply the original name filter. + ProcessInstanceInfoMatch broad_match_info = match_info; + broad_match_info.SetNameMatchType(NameMatch::Ignore); + + ProcessInstanceInfoList all_procs; + uint32_t count = + m_remote_platform_sp->FindProcesses(broad_match_info, all_procs); + + if (count > 0) { + Status error; + AdbClientUP adb(GetAdbClient(error)); + if (error.Success()) + SupplementWithCmdlineInfo(all_procs, adb.get()); + + // Apply the original name matching against supplemented process info. + for (auto &proc_info : all_procs) { + if (match_info.Matches(proc_info)) + proc_infos.push_back(proc_info); } } - return num_matches; + return proc_infos.size(); } std::unique_ptr PlatformAndroid::GetSyncService(Status &error) { diff --git a/lldb/source/Plugins/Platform/Android/PlatformAndroid.h b/lldb/source/Plugins/Platform/Android/PlatformAndroid.h index e771c6ae97d4d..c6a412b39d410 100644 --- a/lldb/source/Plugins/Platform/Android/PlatformAndroid.h +++ b/lldb/source/Plugins/Platform/Android/PlatformAndroid.h @@ -60,7 +60,7 @@ class PlatformAndroid : public platform_linux::PlatformLinux { uint32_t GetDefaultMemoryCacheLineSize() override; uint32_t FindProcesses(const ProcessInstanceInfoMatch &match_info, - ProcessInstanceInfoList &proc_infos) override; + ProcessInstanceInfoList &process_infos) override; protected: const char *GetCacheHostname() override; @@ -86,17 +86,8 @@ class PlatformAndroid : public platform_linux::PlatformLinux { protected: virtual std::unique_ptr GetSyncService(Status &error); -private: std::string m_device_id; uint32_t m_sdk_version; - - // Helper functions for process information gathering - void PopulateProcessStatusInfo(lldb::pid_t pid, - ProcessInstanceInfo &process_info); - void PopulateProcessCommandLine(lldb::pid_t pid, - ProcessInstanceInfo &process_info); - void PopulateProcessArchitecture(lldb::pid_t pid, - ProcessInstanceInfo &process_info); }; } // namespace platform_android diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp index 8b4a3e0a7c3fb..bfbd85ea34203 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp @@ -56,7 +56,7 @@ using namespace lldb; using namespace lldb_private; #define OPTTABLE_STR_TABLE_CODE -#include "clang/Driver/Options.inc" +#include "clang/Options/Options.inc" #undef OPTTABLE_STR_TABLE_CODE static Status ExceptionMaskValidator(const char *string, void *unused) { @@ -1124,7 +1124,7 @@ void PlatformDarwin::AddClangModuleCompilationOptionsForSDKType( #define OPTION(PREFIX_OFFSET, NAME_OFFSET, VAR, ...) \ llvm::StringRef opt_##VAR = OptionStrTable[NAME_OFFSET]; \ (void)opt_##VAR; -#include "clang/Driver/Options.inc" +#include "clang/Options/Options.inc" #undef OPTION minimum_version_option << '-'; switch (sdk_type) { diff --git a/llvm/benchmarks/SpecialCaseListBM.cpp b/llvm/benchmarks/SpecialCaseListBM.cpp index b5d82682199db..7cf21431efecd 100644 --- a/llvm/benchmarks/SpecialCaseListBM.cpp +++ b/llvm/benchmarks/SpecialCaseListBM.cpp @@ -5,7 +5,6 @@ #include "llvm/Support/SpecialCaseList.h" #include "llvm/Support/raw_ostream.h" #include -#include #include #include #include diff --git a/llvm/include/llvm/ADT/BitmaskEnum.h b/llvm/include/llvm/ADT/BitmaskEnum.h index 9555fadda6e47..c10a38c8ce4cb 100644 --- a/llvm/include/llvm/ADT/BitmaskEnum.h +++ b/llvm/include/llvm/ADT/BitmaskEnum.h @@ -11,7 +11,6 @@ #include #include -#include #include "llvm/ADT/STLForwardCompat.h" #include "llvm/ADT/bit.h" diff --git a/llvm/include/llvm/ADT/ConcurrentHashtable.h b/llvm/include/llvm/ADT/ConcurrentHashtable.h index 0cc03cf7a692a..9ee5f594ea56a 100644 --- a/llvm/include/llvm/ADT/ConcurrentHashtable.h +++ b/llvm/include/llvm/ADT/ConcurrentHashtable.h @@ -24,7 +24,6 @@ #include #include #include -#include namespace llvm { diff --git a/llvm/include/llvm/ADT/PointerSumType.h b/llvm/include/llvm/ADT/PointerSumType.h index c4971bf3af87a..c8e6cffd796a6 100644 --- a/llvm/include/llvm/ADT/PointerSumType.h +++ b/llvm/include/llvm/ADT/PointerSumType.h @@ -15,7 +15,6 @@ #include #include #include -#include namespace llvm { diff --git a/llvm/include/llvm/ADT/SetVector.h b/llvm/include/llvm/ADT/SetVector.h index c129f3a695b9e..0fde14126c79b 100644 --- a/llvm/include/llvm/ADT/SetVector.h +++ b/llvm/include/llvm/ADT/SetVector.h @@ -28,7 +28,6 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Compiler.h" #include -#include namespace llvm { diff --git a/llvm/include/llvm/ADT/TinyPtrVector.h b/llvm/include/llvm/ADT/TinyPtrVector.h index 8d7a07b5e9eb5..ed08ec8a966c7 100644 --- a/llvm/include/llvm/ADT/TinyPtrVector.h +++ b/llvm/include/llvm/ADT/TinyPtrVector.h @@ -15,7 +15,6 @@ #include #include #include -#include namespace llvm { diff --git a/llvm/include/llvm/Analysis/DominanceFrontier.h b/llvm/include/llvm/Analysis/DominanceFrontier.h index 68ddcf753b59f..787793501f98a 100644 --- a/llvm/include/llvm/Analysis/DominanceFrontier.h +++ b/llvm/include/llvm/Analysis/DominanceFrontier.h @@ -24,7 +24,6 @@ #include "llvm/Pass.h" #include "llvm/Support/GenericDomTree.h" #include -#include namespace llvm { diff --git a/llvm/include/llvm/Analysis/DominanceFrontierImpl.h b/llvm/include/llvm/Analysis/DominanceFrontierImpl.h index 871dd95c365e8..1483588581f4e 100644 --- a/llvm/include/llvm/Analysis/DominanceFrontierImpl.h +++ b/llvm/include/llvm/Analysis/DominanceFrontierImpl.h @@ -24,7 +24,6 @@ #include "llvm/Support/GenericDomTree.h" #include "llvm/Support/raw_ostream.h" #include -#include #include namespace llvm { diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 98565f423df3e..4d5d1fc7dfadc 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -58,7 +58,6 @@ #include #include #include -#include #include #include #include diff --git a/llvm/include/llvm/DWARFLinker/IndexedValuesMap.h b/llvm/include/llvm/DWARFLinker/IndexedValuesMap.h index 5e0779157473e..8fde15d342a15 100644 --- a/llvm/include/llvm/DWARFLinker/IndexedValuesMap.h +++ b/llvm/include/llvm/DWARFLinker/IndexedValuesMap.h @@ -12,7 +12,6 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include -#include namespace llvm { namespace dwarf_linker { diff --git a/llvm/include/llvm/DebugInfo/CodeView/CodeView.h b/llvm/include/llvm/DebugInfo/CodeView/CodeView.h index b769e53d80270..7a1008689296d 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/CodeView.h +++ b/llvm/include/llvm/DebugInfo/CodeView/CodeView.h @@ -15,7 +15,6 @@ #include "llvm/Support/Compiler.h" #include -#include #include "llvm/ADT/STLForwardCompat.h" #include "llvm/Support/Endian.h" diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/FormatUtil.h b/llvm/include/llvm/DebugInfo/PDB/Native/FormatUtil.h index 76a019ddf8f34..17b5bfac9ac31 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/FormatUtil.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/FormatUtil.h @@ -19,7 +19,6 @@ #include "llvm/Support/FormatVariadic.h" #include -#include namespace llvm { namespace pdb { diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/ELF_systemz.h b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_systemz.h new file mode 100644 index 0000000000000..a996dfd9543df --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_systemz.h @@ -0,0 +1,39 @@ +//===--- ELF_systemz.h - JIT link functions for ELF/systemz --*- C++ -*----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// +// +// jit-link functions for ELF/systemz. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_JITLINK_ELF_SYSTEMZ_H +#define LLVM_EXECUTIONENGINE_JITLINK_ELF_SYSTEMZ_H + +#include "llvm/ExecutionEngine/JITLink/JITLink.h" + +namespace llvm { +namespace jitlink { + +/// Create a LinkGraph from an ELF/systemz relocatable object +/// +/// Note: The graph does not take ownership of the underlying buffer, nor copy +/// its contents. The caller is responsible for ensuring that the object buffer +/// outlives the graph. +Expected> createLinkGraphFromELFObject_systemz( + MemoryBufferRef ObjectBuffer, std::shared_ptr SSP); + +/// jit-link the given object buffer, which must be a ELF systemz relocatable +/// object file. +void link_ELF_systemz(std::unique_ptr G, + std::unique_ptr Ctx); + +} // end namespace jitlink +} // end namespace llvm + +#endif // LLVM_EXECUTIONENGINE_JITLINK_ELF_SYSTEMZ_H diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/systemz.h b/llvm/include/llvm/ExecutionEngine/JITLink/systemz.h new file mode 100644 index 0000000000000..dde3448cd5da7 --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/JITLink/systemz.h @@ -0,0 +1,924 @@ +//=== systemz.h - Generic JITLink systemz edge kinds, utilities -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Generic utilities for graphs representing systemz objects. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_JITLINK_SYSTEMZ_H +#define LLVM_EXECUTIONENGINE_JITLINK_SYSTEMZ_H + +#include "TableManager.h" +#include "llvm/ExecutionEngine/JITLink/JITLink.h" + +using namespace llvm::support::endian; + +namespace llvm { +namespace jitlink { +namespace systemz { + +/// Represents systemz fixups and other systemz-specific edge kinds. +enum EdgeKind_systemz : Edge::Kind { + + /// A plain 64-bit pointer value relocation. + /// + /// Fixup expression: + /// Fixup <- Target + Addend : uint64 + /// + Pointer64 = Edge::FirstRelocation, + + /// A plain 32-bit pointer value relocation. + /// + /// Fixup expression: + /// Fixup <- Target + Addend : uint32 + /// + /// Errors: + /// - The target must reside in the low 32-bits of the address space, + /// otherwise an out-of-range error will be returned. + /// + Pointer32, + + /// A plain 20-bit pointer value relocation. + /// + /// Fixup expression: + /// Fixup <- Target + Addend : uint20 + /// + /// Errors: + /// - The target must reside in the low 20-bits of the address space, + /// otherwise an out-of-range error will be returned. + /// + Pointer20, + + /// A plain 16-bit pointer value relocation. + /// + /// Fixup expression: + /// Fixup <- Target + Addend : uint16 + /// + /// Errors: + /// - The target must reside in the low 16-bits of the address space, + /// otherwise an out-of-range error will be returned. + /// + Pointer16, + + /// A plain 12-bit pointer value relocation. + /// + /// Fixup expression: + /// Fixup <- Target + Addend : uint12 + /// + /// Errors: + /// - The target must reside in the low 12-bits of the address space, + /// otherwise an out-of-range error will be returned. + /// + Pointer12, + + /// A plain 8-bit pointer value relocation. + /// + /// Fixup expression: + /// Fixup <- Target + Addend : uint8 + /// + /// Errors: + /// - The target must reside in the low 8-bits of the address space, + /// otherwise an out-of-range error will be returned. + /// + Pointer8, + + /// A 64-bit delta. + /// + /// Delta from the fixup to the target. + /// + /// Fixup expression: + /// Fixup <- Target - Fixup + Addend : int64 + /// + Delta64, + + /// A 32-bit delta. + /// + /// Delta from the fixup to the target. + /// + /// Fixup expression: + /// Fixup <- Target - Fixup + Addend : int32 + /// + /// Errors: + /// - The result of the fixup expression must fit into an int32, otherwise + /// an out-of-range error will be returned. + /// + Delta32, + + /// A 16-bit delta. + /// + /// Delta from the fixup to the target. + /// + /// Fixup expression: + /// Fixup <- Target - Fixup + Addend : int16 + /// + /// Errors: + /// - The result of the fixup expression must fit into an int16, otherwise + /// an out-of-range error will be returned. + /// + Delta16, + + /// A 32-bit delta shifted by 1. + /// + /// Delta from the fixup to the target. + /// + /// Fixup expression: + /// Fixup <- (Target - Fixup + Addend) >> 1 : int32 + /// + /// Errors: + /// - The result of the fixup expression before shifting right by 1 must + /// fit into an int33, otherwise an out-of-range error will be returned. + /// - The result of the fixup expression before shifting right by 1 must + /// be multiple of 2, otherwise an alignment error will be returned. + /// + Delta32dbl, + + /// A 24-bit delta shifted by 1. + /// + /// Delta from the fixup to the target. + /// + /// Fixup expression: + /// Fixup <- (Target - Fixup + Addend) >> 1 : int24 + /// + /// Errors: + /// - The result of the fixup expression before shifting right by 1 must + /// fit into an int25, otherwise an out-of-range error will be returned. + /// - The result of the fixup expression before shifting right by 1 must + /// be multiple of 2, otherwise an alignment error will be returned. + /// + Delta24dbl, + + /// A 16-bit delta shifted by 1. + /// + /// Delta from the fixup to the target. + /// + /// Fixup expression: + /// Fixup <- (Target - Fixup + Addend) >> 1 : int16 + /// + /// Errors: + /// - The result of the fixup expression before shifting right by 1 must + /// fit into an int17, otherwise an out-of-range error will be returned. + /// - The result of the fixup expression before shifting right by 1 must + /// be multiple of 2, otherwise an alignment error will be returned. + /// + Delta16dbl, + + /// A 12-bit delta shifted by 1. + /// + /// Delta from the fixup to the target. + /// + /// Fixup expression: + /// Fixup <- (Target - Fixup + Addend) >> 1 : int12 + /// + /// Errors: + /// - The result of the fixup expression before shifting right by 1 must + /// fit into an int13, otherwise an out-of-range error will be returned. + /// - The result of the fixup expression before shifting right by 1 must + /// be multiple of 2, otherwise an alignment error will be returned. + /// + Delta12dbl, + + /// A 64-bit negative delta. + /// + /// Delta from target back to the fixup. + /// + /// Fixup expression: + /// Fixup <- Fixup - Target + Addend : int64 + /// + NegDelta64, + + /// A 32-bit negative delta. + /// + /// Delta from the target back to the fixup. + /// + /// Fixup expression: + /// Fixup <- Fixup - Target + Addend : int32 + /// + /// Errors: + /// - The result of the fixup expression must fit into an int32, otherwise + /// an out-of-range error will be returned. + NegDelta32, + + /// A 32-bit Delta shifted by 1. + /// + /// Delta from the fixup to the PLT slot for the target. This will lead to + /// creation of a PLT stub. + /// + /// Fixup expression: + /// Fixup <- (Target - Fixup + Addend) >> 1 : int32 + /// + /// Errors: + /// - The result of the fixup expression before shifting right by 1 must + /// fit into an int33, otherwise an out-of-range error will be returned. + /// - The result of the fixup expression before shifting right by 1 must + /// be multiple of 2, otherwise an alignment error will be returned. + /// + DeltaPLT32dbl, + + /// A 24-bit Delta shifted by 1. + /// + /// Delta from the fixup to the PLT slot for the target. This will lead to + /// creation of a PLT stub. + /// + /// Fixup expression: + /// Fixup <- (Target - Fixup + Addend) >> 1 : int24 + /// + /// Errors: + /// - The result of the fixup expression before shifting right by 1 must + /// fit into an int25, otherwise an out-of-range error will be returned. + /// - The result of the fixup expression before shifting right by 1 must + /// be multiple of 2, otherwise an alignment error will be returned. + /// + DeltaPLT24dbl, + + /// A 16-bit Delta shifted by 1. + /// + /// Delta from the fixup to the PLT slot for the target. This will lead to + /// creation of a PLT stub. + /// + /// Fixup expression: + /// Fixup <- (Target - Fixup + Addend) >> 1 : int16 + /// + /// Errors: + /// - The result of the fixup expression before shifting right by 1 must + /// fit into an int17, otherwise an out-of-range error will be returned. + /// - The result of the fixup expression before shifting right by 1 must + /// be multiple of 2, otherwise an alignment error will be returned. + /// + DeltaPLT16dbl, + + /// A 12-bit Delta shifted by 1. + /// + /// Delta from the fixup to the PLT slot for the target. This will lead to + /// creation of a PLT stub. + /// + /// Fixup expression: + /// Fixup <- (Target - Fixup + Addend) >> 1 : int12 + /// + /// Errors: + /// - The result of the fixup expression before shifting right by 1 must + /// fit into an int13, otherwise an out-of-range error will be returned. + /// - The result of the fixup expression before shifting right by 1 must + /// be multiple of 2, otherwise an alignment error will be returned. + /// + DeltaPLT12dbl, + + /// A 64-bit Delta. + /// + /// Delta from the fixup to the PLT slot for the target. This will lead to + /// creation of a PLT stub. + /// + /// Fixup expression: + /// Fixup <- Target - Fixup + Addend : int64 + /// + DeltaPLT64, + + /// A 32-bit Delta. + /// + /// Delta from the fixup to the PLT slot for the target. This will lead to + /// creation of a PLT stub. + /// + /// Fixup expression: + /// Fixup <- Target - Fixup + Addend : int32 + /// + /// Errors: + /// - The result of the fixup expression must fit into an int32, otherwise + /// an out-of-range error will be returned. + /// + DeltaPLT32, + + /// A 64-bit offset from GOT to PLT. + /// + /// Fixup expression: + /// Fixup <- Target - GOTBase + Addend : int64 + /// + /// Errors: + /// - *ASSERTION* Failure to a null pointer GOTSymbol, which the GOT section + /// symbol was not been defined. + /// + Delta64PLTFromGOT, + + /// A 32-bit offset from GOT to PLT. + /// + /// Fixup expression: + /// Fixup <- Target - GOTBase + Addend : int32 + /// + /// Errors: + /// - *ASSERTION* Failure to a null pointer GOTSymbol, which the GOT section + /// symbol was not been defined. + /// - The result of the fixup expression must fit into an int32, otherwise + /// an out-of-range error will be returned. + /// + Delta32PLTFromGOT, + + /// A 16-bit offset from GOT to PLT. + /// + /// Fixup expression: + /// Fixup <- Target - GOTBase + Addend : int16 + /// + /// Errors: + /// - *ASSERTION* Failure to a null pointer GOTSymbol, which the GOT section + /// symbol was not been defined. + /// - The result of the fixup expression must fit into an int16, otherwise + /// an out-of-range error will be returned. + /// + Delta16PLTFromGOT, + + /// A 64-bit offset from GOT. + /// + /// Fixup expression: + /// Fixup <- Target - GOTBase + Addend : int64 + /// + /// Errors: + /// - *ASSERTION* Failure to a null pointer GOTSymbol, which the GOT section + /// symbol was not been defined. + /// + Delta64FromGOT, + + /// A 32-bit offset from GOT. + /// + /// Fixup expression: + /// Fixup <- Target - GOTBase + Addend : int32 + /// + /// Errors: + /// - *ASSERTION* Failure to a null pointer GOTSymbol, which the GOT section + /// symbol was not been defined. + /// - The result of the fixup expression must fit into an int32, otherwise + /// an out-of-range error will be returned. + /// + Delta32FromGOT, + + /// A 16-bit offset from GOT. + /// + /// Fixup expression: + /// Fixup <- Target - GOTBase + Addend : int16 + /// + /// Errors: + /// - *ASSERTION* Failure to a null pointer GOTSymbol, which the GOT section + /// symbol was not been defined. + /// - The result of the fixup expression must fit into an int16, otherwise + /// an out-of-range error will be returned. + /// + Delta16FromGOT, + + /// A 20-bit offset from GOT. + /// + /// Fixup expression: + /// Fixup <- Target - GOTBase + Addend : int20 + /// + /// Errors: + /// - *ASSERTION* Failure to a null pointer GOTSymbol, which the GOT section + /// symbol was not been defined. + /// - The result of the fixup expression must fit into an int16, otherwise + /// an out-of-range error will be returned. + /// + Delta20FromGOT, + + /// A 12-bit offset from GOT. + /// + /// Fixup expression: + /// Fixup <- Target - GOTBase + Addend : int12 + /// + /// Errors: + /// - *ASSERTION* Failure to a null pointer GOTSymbol, which the GOT section + /// symbol was not been defined. + /// - The result of the fixup expression must fit into an int16, otherwise + /// an out-of-range error will be returned. + /// + Delta12FromGOT, + + /// A GOT entry getter/constructor, transformed to Delta64FromGOT pointing + /// at the GOT entry for the original target. + /// + /// Indicates that this edge should be transformed into a Delta64FromGOT + /// targeting the GOT entry for the edge's current target, maintaining the + /// same addend. A GOT entry for the target should be created if one does + /// not already exist. + /// + /// Edges of this kind are usually handled by a GOT builder pass inserted by + /// default. + /// + /// Fixup expression: + /// NONE + /// + /// Errors: + /// - *ASSERTION* Failure to handle edges of this kind prior to the fixup + /// + RequestGOTAndTransformToDelta64FromGOT, + + /// A GOT entry getter/constructor, transformed to Delta32FromGOT pointing + /// at the GOT entry for the original target. + /// + /// Indicates that this edge should be transformed into a Delta32FromGOT + /// targeting the GOT entry for the edge's current target, maintaining the + /// same addend. A GOT entry for the target should be created if one does + /// not already exist. + /// + /// Edges of this kind are usually handled by a GOT builder pass inserted by + /// default. + /// + /// Fixup expression: + /// NONE + /// + /// Errors: + /// - *ASSERTION* Failure to handle edges of this kind prior to the fixup + /// + RequestGOTAndTransformToDelta32FromGOT, + + /// A GOT entry getter/constructor, transformed to Delta20FromGOT pointing + /// at the GOT entry for the original target. + /// + /// Indicates that this edge should be transformed into a Delta20FromGOT + /// targeting the GOT entry for the edge's current target, maintaining the + /// same addend. A GOT entry for the target should be created if one does + /// not already exist. + /// + /// Edges of this kind are usually handled by a GOT builder pass inserted by + /// default. + /// + /// Fixup expression: + /// NONE + /// + /// Errors: + /// - *ASSERTION* Failure to handle edges of this kind prior to the fixup + /// + RequestGOTAndTransformToDelta20FromGOT, + + /// A GOT entry getter/constructor, transformed to Delta16FromGOT pointing + /// at the GOT entry for the original target. + /// + /// Indicates that this edge should be transformed into a Delta16FromGOT + /// targeting the GOT entry for the edge's current target, maintaining the + /// same addend. A GOT entry for the target should be created if one does + /// not already exist. + /// + /// Edges of this kind are usually handled by a GOT builder pass inserted by + /// default. + /// + /// Fixup expression: + /// NONE + /// + /// Errors: + /// - *ASSERTION* Failure to handle edges of this kind prior to the fixup + /// + RequestGOTAndTransformToDelta16FromGOT, + + /// A GOT entry getter/constructor, transformed to Delta12FromGOT pointing + /// at the GOT entry for the original target. + /// + /// Indicates that this edge should be transformed into a Delta12FromGOT + /// targeting the GOT entry for the edge's current target, maintaining the + /// same addend. A GOT entry for the target should be created if one does + /// not already exist. + /// + /// Edges of this kind are usually handled by a GOT builder pass inserted by + /// default. + /// + /// Fixup expression: + /// NONE + /// + /// Errors: + /// - *ASSERTION* Failure to handle edges of this kind prior to the fixup + /// phase will result in an assert/unreachable during the fixup phase. + /// + RequestGOTAndTransformToDelta12FromGOT, + + /// A GOT entry getter/constructor, transformed to Delta32dbl pointing at + /// the GOT entry for the original target. + /// + /// Indicates that this edge should be transformed into a Delta32dbl targeting + /// the GOT entry for the edge's current target, maintaining the same addend. + /// A GOT entry for the target should be created if one does not already + /// exist. + /// + /// Edges of this kind are usually handled by a GOT builder pass inserted by + /// default. + /// + /// Fixup expression: + /// NONE + /// + /// Errors: + /// - *ASSERTION* Failure to handle edges of this kind prior to the fixup + /// phase will result in an assert/unreachable during the fixup phase. + /// + RequestGOTAndTransformToDelta32dbl, + + /// A 32-bit Delta to GOT base. + /// + /// Fixup expression: + /// Fixup <- GOTBase - Fixup + Addend : int32 + /// + /// Errors: + /// - *ASSERTION* Failure to a null pointer GOTSymbol, which the GOT section + /// symbol was not been defined. + /// - The result of the fixup expression must fit into an int32, otherwise + /// an out-of-range error will be returned. + /// + Delta32GOTBase, + + /// A 32-bit Delta to GOT base shifted by 1. + /// + /// Fixup expression: + /// Fixup <- (GOTBase - Fixup + Addend) >> 1 : int32 + /// + /// Errors: + /// - *ASSERTION* Failure to a null pointer GOTSymbol, which the GOT section + /// symbol was not been defined. + /// - The result of the fixup expression before shifting right by 1 must + /// fit into an int33, otherwise an out-of-range error will be returned. + /// - The result of the fixup expression before shifting right by 1 must + /// be multiple of 2, otherwise an alignment error will be returned. + /// + Delta32dblGOTBase, + +}; + +/// Returns a string name for the given systemz edge. For debugging purposes +/// only +const char *getEdgeKindName(Edge::Kind K); + +/// Apply fixup expression for edge to block content. +inline Error applyFixup(LinkGraph &G, Block &B, const Edge &E, + const Symbol *GOTSymbol) { + using namespace support; + + char *BlockWorkingMem = B.getAlreadyMutableContent().data(); + char *FixupPtr = BlockWorkingMem + E.getOffset(); + orc::ExecutorAddr FixupAddress = B.getAddress() + E.getOffset(); + int64_t S = E.getTarget().getAddress().getValue(); + int64_t A = E.getAddend(); + int64_t P = FixupAddress.getValue(); + int64_t GOTBase = GOTSymbol ? GOTSymbol->getAddress().getValue() : 0; + Edge::Kind K = E.getKind(); + + DEBUG_WITH_TYPE("jitlink", { + dbgs() << " Applying fixup on " << G.getEdgeKindName(K) + << " edge, (S, A, P, .GOT.) = (" << formatv("{0:x}", S) << ", " + << formatv("{0:x}", A) << ", " << formatv("{0:x}", P) << ", " + << formatv("{0:x}", GOTBase) << ")\n"; + }); + + const auto isAlignmentCorrect = [](uint64_t Value, int N) { + return (Value & (N - 1)) ? false : true; + }; + + switch (K) { + case Pointer64: { + uint64_t Value = S + A; + write64be(FixupPtr, Value); + break; + } + case Pointer32: { + uint64_t Value = S + A; + if (!LLVM_UNLIKELY(isUInt<32>(Value))) + return makeTargetOutOfRangeError(G, B, E); + write32be(FixupPtr, Value); + break; + } + case Pointer20: { + uint64_t Value = S + A; + if (!LLVM_UNLIKELY(isInt<20>(Value))) + return makeTargetOutOfRangeError(G, B, E); + write32be(FixupPtr, (read32be(FixupPtr) & 0xF00000FF) | + ((Value & 0xFFF) << 16) | ((Value & 0xFF000) >> 4)); + break; + } + case Pointer16: { + uint64_t Value = S + A; + if (!LLVM_UNLIKELY(isUInt<16>(Value))) + return makeTargetOutOfRangeError(G, B, E); + write16be(FixupPtr, Value); + break; + } + case Pointer12: { + uint64_t Value = S + A; + if (!LLVM_UNLIKELY(isUInt<12>(Value))) + return makeTargetOutOfRangeError(G, B, E); + write16be(FixupPtr, (read16be(FixupPtr) & 0xF000) | Value); + break; + } + case Pointer8: { + uint64_t Value = S + A; + if (!LLVM_UNLIKELY(isUInt<8>(Value))) + return makeTargetOutOfRangeError(G, B, E); + *(uint8_t *)FixupPtr = Value; + break; + } + case Delta64: + case DeltaPLT64: { + int64_t Value = S + A - P; + write64be(FixupPtr, Value); + break; + } + case Delta32: + case DeltaPLT32: { + int64_t Value = S + A - P; + if (!LLVM_UNLIKELY(isInt<32>(Value))) + return makeTargetOutOfRangeError(G, B, E); + write32be(FixupPtr, Value); + break; + } + case Delta16: { + int64_t Value = S + A - P; + if (!LLVM_UNLIKELY(isInt<16>(Value))) + return makeTargetOutOfRangeError(G, B, E); + write16be(FixupPtr, Value); + break; + } + case NegDelta32: { + int64_t Value = P + A - S; + if (!LLVM_UNLIKELY(isInt<32>(Value))) + return makeTargetOutOfRangeError(G, B, E); + write32be(FixupPtr, Value); + break; + } + case Delta32dbl: + case DeltaPLT32dbl: { + int64_t Value = S + A - P; + if (!LLVM_UNLIKELY(isInt<33>(Value))) + return makeTargetOutOfRangeError(G, B, E); + if (!LLVM_UNLIKELY(isAlignmentCorrect(Value, 2))) + return makeAlignmentError(FixupAddress, Value, 2, E); + write32be(FixupPtr, Value >> 1); + break; + } + case Delta24dbl: + case DeltaPLT24dbl: { + int64_t Value = S + A - P; + if (!LLVM_UNLIKELY(isInt<25>(Value))) + return makeTargetOutOfRangeError(G, B, E); + if (!LLVM_UNLIKELY(isAlignmentCorrect(Value, 2))) + return makeAlignmentError(FixupAddress, Value, 2, E); + FixupPtr[0] = Value >> 17; + FixupPtr[1] = Value >> 9; + FixupPtr[2] = Value >> 1; + break; + } + case Delta16dbl: + case DeltaPLT16dbl: { + int64_t Value = S + A - P; + if (!LLVM_UNLIKELY(isInt<17>(Value))) + return makeTargetOutOfRangeError(G, B, E); + if (!LLVM_UNLIKELY(isAlignmentCorrect(Value, 2))) + return makeAlignmentError(FixupAddress, Value, 2, E); + write16be(FixupPtr, Value >> 1); + break; + } + case Delta12dbl: + case DeltaPLT12dbl: { + int64_t Value = S + A - P; + if (!LLVM_UNLIKELY(isInt<13>(Value))) + return makeTargetOutOfRangeError(G, B, E); + if (!LLVM_UNLIKELY(isAlignmentCorrect(Value, 2))) + return makeAlignmentError(FixupAddress, Value, 2, E); + write16be(FixupPtr, + (read16be(FixupPtr) & 0xF000) | ((Value >> 1) & 0x0FFF)); + break; + } + case Delta32GOTBase: { + assert(GOTSymbol && "No GOT section symbol"); + int64_t Value = GOTBase + A - P; + if (!LLVM_UNLIKELY(isInt<32>(Value))) + return makeTargetOutOfRangeError(G, B, E); + write32be(FixupPtr, Value); + break; + } + case Delta32dblGOTBase: { + assert(GOTSymbol && "No GOT section symbol"); + int64_t Value = GOTBase + A - P; + if (!LLVM_UNLIKELY(isInt<33>(Value))) + return makeTargetOutOfRangeError(G, B, E); + if (!LLVM_UNLIKELY(isAlignmentCorrect(Value, 2))) + return makeAlignmentError(FixupAddress, Value, 2, E); + write32be(FixupPtr, Value >> 1); + break; + } + case Delta64PLTFromGOT: + case Delta64FromGOT: { + assert(GOTSymbol && "No GOT section symbol"); + int64_t Value = S + A - GOTBase; + write64be(FixupPtr, Value); + break; + } + case Delta32PLTFromGOT: + case Delta32FromGOT: { + assert(GOTSymbol && "No GOT section symbol"); + int64_t Value = S + A - GOTBase; + if (!LLVM_UNLIKELY(isInt<32>(Value))) + return makeTargetOutOfRangeError(G, B, E); + write32be(FixupPtr, Value); + break; + } + case Delta16PLTFromGOT: + case Delta16FromGOT: { + assert(GOTSymbol && "No GOT section symbol"); + int64_t Value = S + A - GOTBase; + if (!LLVM_UNLIKELY(isInt<16>(Value))) + return makeTargetOutOfRangeError(G, B, E); + write16be(FixupPtr, Value); + break; + } + case Delta20FromGOT: { + assert(GOTSymbol && "No GOT section symbol"); + uint64_t Value = S - GOTBase + A; + if (!LLVM_UNLIKELY(isInt<20>(Value))) + return makeTargetOutOfRangeError(G, B, E); + write32be(FixupPtr, (read32be(FixupPtr) & 0xF00000FF) | + ((Value & 0xFFF) << 16) | ((Value & 0xFF000) >> 4)); + break; + } + case Delta12FromGOT: { + assert(GOTSymbol && "No GOT section symbol"); + uint64_t Value = S - GOTBase + A; + if (!LLVM_UNLIKELY(isUInt<12>(Value))) + return makeTargetOutOfRangeError(G, B, E); + write16be(FixupPtr, (read16be(FixupPtr) & 0xF000) | Value); + break; + } + default: + return make_error( + "In graph " + G.getName() + ", section " + B.getSection().getName() + + " unsupported edge kind " + getEdgeKindName(E.getKind())); + } + + return Error::success(); +} + +/// SystemZ null pointer content. +extern const char NullPointerContent[8]; +inline ArrayRef getGOTEntryBlockContent(LinkGraph &G) { + return {reinterpret_cast(NullPointerContent), + G.getPointerSize()}; +} + +/// SystemZ pointer jump stub content. +/// +/// Contains the instruction sequence for an indirect jump via an in-memory +/// pointer: +/// lgrl %r1, ptr +/// j %r1 +constexpr size_t StubEntrySize = 8; +extern const char Pointer64JumpStubContent[StubEntrySize]; +inline ArrayRef getStubBlockContent(LinkGraph &G) { + auto StubContent = Pointer64JumpStubContent; + return {reinterpret_cast(StubContent), StubEntrySize}; +} + +/// Creates a new pointer block in the given section and returns an +/// Anonymous symbol pointing to it. +/// +/// If InitialTarget is given then an Pointer64 relocation will be added to the +/// block pointing at InitialTarget. +inline Symbol &createAnonymousPointer(LinkGraph &G, Section &PointerSection, + Symbol *InitialTarget = nullptr, + uint64_t InitialAddend = 0) { + auto &B = G.createContentBlock(PointerSection, getGOTEntryBlockContent(G), + orc::ExecutorAddr(), G.getPointerSize(), 0); + if (InitialTarget) + B.addEdge(Pointer64, 0, *InitialTarget, InitialAddend); + return G.addAnonymousSymbol(B, 0, G.getPointerSize(), false, false); +} + +/// Create a jump stub block that jumps via the pointer at the given symbol. +/// +/// The stub block will have the following default values: +/// alignment: 16-bit +/// alignment-offset: 0 +inline Block &createPointerJumpStubBlock(LinkGraph &G, Section &StubSection, + Symbol &PointerSymbol) { + auto &B = G.createContentBlock(StubSection, getStubBlockContent(G), + orc::ExecutorAddr(), 16, 0); + B.addEdge(Delta32dbl, 2, PointerSymbol, 2); + return B; +} + +/// Create a jump stub that jumps via the pointer at the given symbol and +/// an anonymous symbol pointing to it. Return the anonymous symbol. +/// +/// The stub block will be created by createPointerJumpStubBlock. +inline Symbol &createAnonymousPointerJumpStub(LinkGraph &G, + Section &StubSection, + Symbol &PointerSymbol) { + return G.addAnonymousSymbol( + createPointerJumpStubBlock(G, StubSection, PointerSymbol), 0, + StubEntrySize, true, false); +} + +/// Global Offset Table Builder. +class GOTTableManager : public TableManager { +public: + static StringRef getSectionName() { return "$__GOT"; } + + bool visitEdge(LinkGraph &G, Block *B, Edge &E) { + if (E.getTarget().isDefined()) + return false; + Edge::Kind KindToSet = Edge::Invalid; + switch (E.getKind()) { + case systemz::RequestGOTAndTransformToDelta12FromGOT: + KindToSet = systemz::Delta12FromGOT; + break; + case systemz::RequestGOTAndTransformToDelta16FromGOT: + KindToSet = systemz::Delta16FromGOT; + break; + case systemz::RequestGOTAndTransformToDelta20FromGOT: + KindToSet = systemz::Delta20FromGOT; + break; + case systemz::RequestGOTAndTransformToDelta32FromGOT: + KindToSet = systemz::Delta32FromGOT; + break; + case systemz::RequestGOTAndTransformToDelta64FromGOT: + KindToSet = systemz::Delta64FromGOT; + break; + case systemz::RequestGOTAndTransformToDelta32dbl: + KindToSet = systemz::DeltaPLT32dbl; + break; + default: + return false; + } + assert(KindToSet != Edge::Invalid && + "Fell through switch, but no new kind to set"); + DEBUG_WITH_TYPE("jitlink", { + dbgs() << " Fixing " << G.getEdgeKindName(E.getKind()) << " edge at " + << B->getFixupAddress(E) << " (" << B->getAddress() << " + " + << formatv("{0:x}", E.getOffset()) << ")\n"; + }); + E.setKind(KindToSet); + E.setTarget(getEntryForTarget(G, E.getTarget())); + return true; + } + + Symbol &createEntry(LinkGraph &G, Symbol &Target) { + return createAnonymousPointer(G, getGOTSection(G), &Target); + } + +private: + Section &getGOTSection(LinkGraph &G) { + if (!GOTSection) + GOTSection = &G.createSection(getSectionName(), + orc::MemProt::Read | orc::MemProt::Exec); + return *GOTSection; + } + + Section *GOTSection = nullptr; +}; + +/// Procedure Linkage Table Builder. +class PLTTableManager : public TableManager { +public: + PLTTableManager(GOTTableManager &GOT) : GOT(GOT) {} + + static StringRef getSectionName() { return "$__STUBS"; } + + bool visitEdge(LinkGraph &G, Block *B, Edge &E) { + if (E.getTarget().isDefined()) + return false; + + switch (E.getKind()) { + case systemz::DeltaPLT32: + case systemz::DeltaPLT64: + case systemz::DeltaPLT12dbl: + case systemz::DeltaPLT16dbl: + case systemz::DeltaPLT24dbl: + case systemz::DeltaPLT32dbl: + case systemz::Delta16PLTFromGOT: + case systemz::Delta32PLTFromGOT: + case systemz::Delta64PLTFromGOT: + break; + default: + return false; + } + DEBUG_WITH_TYPE("jitlink", { + dbgs() << " Fixing " << G.getEdgeKindName(E.getKind()) << " edge at " + << B->getFixupAddress(E) << " (" << B->getAddress() << " + " + << formatv("{0:x}", E.getOffset()) << ")\n"; + }); + E.setTarget(getEntryForTarget(G, E.getTarget())); + return true; + } + + Symbol &createEntry(LinkGraph &G, Symbol &Target) { + return createAnonymousPointerJumpStub(G, getStubsSection(G), + GOT.getEntryForTarget(G, Target)); + } + +public: + Section &getStubsSection(LinkGraph &G) { + if (!StubsSection) + StubsSection = &G.createSection(getSectionName(), + orc::MemProt::Read | orc::MemProt::Exec); + return *StubsSection; + } + + GOTTableManager &GOT; + Section *StubsSection = nullptr; +}; + +} // namespace systemz +} // namespace jitlink +} // namespace llvm + +#endif // LLVM_EXECUTIONENGINE_JITLINK_SYSTEMZ_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h b/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h index fc01afc6d8739..01e9cf914cb54 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h @@ -43,7 +43,6 @@ #include #include #include -#include #include #include diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/SymbolFilter.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/SymbolFilter.h index 517089341978a..81c6a0b01530a 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/SymbolFilter.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/SymbolFilter.h @@ -12,7 +12,6 @@ #include "llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h" #include -#include #include namespace llvm { diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h b/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h index ef0fed4f41556..e6058612de4b7 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h @@ -20,7 +20,6 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include -#include #include namespace llvm { diff --git a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h index 12dfb6c607bb9..67ebafc89cf99 100644 --- a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h +++ b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h @@ -241,7 +241,7 @@ ENUM(MotionExpectation, Present); // V5.2: [15.9.1] `task-dependence-type` modifier ENUM(DependenceType, Depobj, In, Inout, Inoutset, Mutexinoutset, Out, Sink, Source); -ENUM(Prescriptiveness, Strict, Fallback); +ENUM(Prescriptiveness, Strict); template // struct LoopIterationT { @@ -591,10 +591,10 @@ struct DynamicAllocatorsT { template // struct DynGroupprivateT { ENUM(AccessGroup, Cgroup); - using Prescriptiveness = type::Prescriptiveness; + ENUM(Fallback, Abort, Default_Mem, Null); using Size = E; using TupleTrait = std::true_type; - std::tuple t; + std::tuple t; }; // V5.2: [5.8.4] `enter` clause diff --git a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h index 6d6eb5cda52de..d702273cec9ec 100644 --- a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h +++ b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h @@ -207,40 +207,40 @@ struct ConstructDecompositionT { template bool applyClause(Clause &&clause, const ClauseTy *node); + bool applyClause(const tomp::clause::AllocateT &clause, + const ClauseTy *); bool applyClause(const tomp::clause::CollapseT &clause, const ClauseTy *); - bool applyClause(const tomp::clause::PrivateT &clause, + bool applyClause(const tomp::clause::DefaultT &clause, const ClauseTy *); bool applyClause(const tomp::clause::FirstprivateT &clause, const ClauseTy *); + bool applyClause(const tomp::clause::IfT &clause, + const ClauseTy *); bool applyClause(const tomp::clause::LastprivateT &clause, const ClauseTy *); - bool applyClause(const tomp::clause::SharedT &clause, + bool applyClause(const tomp::clause::LinearT &clause, const ClauseTy *); - bool applyClause(const tomp::clause::DefaultT &clause, + bool applyClause(const tomp::clause::NowaitT &clause, const ClauseTy *); bool - applyClause(const tomp::clause::ThreadLimitT &clause, + applyClause(const tomp::clause::OmpxAttributeT &clause, const ClauseTy *); + bool applyClause(const tomp::clause::OmpxBareT &clause, + const ClauseTy *); bool applyClause(const tomp::clause::OrderT &clause, const ClauseTy *); - bool applyClause(const tomp::clause::AllocateT &clause, + bool applyClause(const tomp::clause::PrivateT &clause, const ClauseTy *); bool applyClause(const tomp::clause::ReductionT &clause, const ClauseTy *); - bool applyClause(const tomp::clause::IfT &clause, - const ClauseTy *); - bool applyClause(const tomp::clause::LinearT &clause, - const ClauseTy *); - bool applyClause(const tomp::clause::NowaitT &clause, + bool applyClause(const tomp::clause::SharedT &clause, const ClauseTy *); bool - applyClause(const tomp::clause::OmpxAttributeT &clause, + applyClause(const tomp::clause::ThreadLimitT &clause, const ClauseTy *); - bool applyClause(const tomp::clause::OmpxBareT &clause, - const ClauseTy *); uint32_t version; llvm::omp::Directive construct; @@ -458,6 +458,34 @@ bool ConstructDecompositionT::applyClause(Specific &&specific, return false; } +// --- Specific clauses ----------------------------------------------- + +// ALLOCATE +// [5.2:178:7-9] +// Directives: allocators, distribute, do, for, parallel, scope, sections, +// single, target, task, taskgroup, taskloop, teams +// +// [5.2:340:33-35] +// (33) The effect of the allocate clause is as if it is applied to all leaf +// constructs that permit the clause and to which a data-sharing attribute +// clause that may create a private copy of the same list item is applied. +template +bool ConstructDecompositionT::applyClause( + const tomp::clause::AllocateT &clause, + const ClauseTy *node) { + // This one needs to be applied at the end, once we know which clauses are + // assigned to which leaf constructs. + + // [5.2:340:33] + bool applied = applyIf(node, [&](const auto &leaf) { + return llvm::any_of(leaf.clauses, [&](const ClauseTy *n) { + return llvm::omp::isPrivatizingClause(n->id); + }); + }); + + return applied; +} + // COLLAPSE // [5.2:93:20-21] // Directives: distribute, do, for, loop, simd, taskloop @@ -483,19 +511,19 @@ bool ConstructDecompositionT::applyClause( return false; } -// PRIVATE -// [5.2:111:5-7] -// Directives: distribute, do, for, loop, parallel, scope, sections, simd, -// single, target, task, taskloop, teams +// DEFAULT +// [5.2:109:5-6] +// Directives: parallel, task, taskloop, teams // -// [5.2:340:1-2] -// (1) The effect of the 1 private clause is as if it is applied only to the -// innermost leaf construct that permits it. +// [5.2:340:31-32] +// (31) The effect of the shared, default, thread_limit, or order clause is as +// if it is applied to all leaf constructs that permit the clause. template bool ConstructDecompositionT::applyClause( - const tomp::clause::PrivateT &clause, + const tomp::clause::DefaultT &clause, const ClauseTy *node) { - return applyToInnermost(node); + // [5.2:340:31] + return applyToAll(node); } // FIRSTPRIVATE @@ -626,6 +654,44 @@ bool ConstructDecompositionT::applyClause( return applied; } +// IF +// [5.2:72:7-9] +// Directives: cancel, parallel, simd, target, target data, target enter data, +// target exit data, target update, task, taskloop +// +// [5.2:72:15-18] +// (15) For combined or composite constructs, the if clause only applies to the +// semantics of the construct named in the directive-name-modifier. +// (16) For a combined or composite construct, if no directive-name-modifier is +// specified then the if clause applies to all constituent constructs to which +// an if clause can apply. +template +bool ConstructDecompositionT::applyClause( + const tomp::clause::IfT &clause, + const ClauseTy *node) { + using DirectiveNameModifier = + typename clause::IfT::DirectiveNameModifier; + using IfExpression = typename clause::IfT::IfExpression; + auto &modifier = std::get>(clause.t); + + if (modifier) { + llvm::omp::Directive dirId = *modifier; + auto *unmodified = + makeClause(llvm::omp::Clause::OMPC_if, + tomp::clause::IfT{ + {/*DirectiveNameModifier=*/std::nullopt, + /*IfExpression=*/std::get(clause.t)}}); + + if (auto *hasDir = findDirective(dirId)) { + hasDir->clauses.push_back(unmodified); + return true; + } + return false; + } + + return applyToAll(node); +} + // LASTPRIVATE // [5.2:115:7-8] // Directives: distribute, do, for, loop, sections, simd, taskloop @@ -720,51 +786,94 @@ bool ConstructDecompositionT::applyClause( return applied; } -// SHARED -// [5.2:110:5-6] -// Directives: parallel, task, taskloop, teams +// LINEAR +// [5.2:118:1-2] +// Directives: declare simd, do, for, simd // -// [5.2:340:31-32] -// (31) The effect of the shared, default, thread_limit, or order clause is as -// if it is applied to all leaf constructs that permit the clause. +// [5.2:341:15-22] +// (15.1) The effect of the linear clause is as if it is applied to the +// innermost leaf construct. +// (15.2) Additionally, if the list item is not the iteration variable of a simd +// or worksharing-loop SIMD construct, the effect on the outer leaf constructs +// is as if the list item was specified in firstprivate and lastprivate clauses +// on the combined or composite construct, with the rules specified above +// applied. +// (19) If a list item of the linear clause is the iteration variable of a simd +// or worksharing-loop SIMD construct and it is not declared in the construct, +// the effect on the outer leaf constructs is as if the list item was specified +// in a lastprivate clause on the combined or composite construct with the rules +// specified above applied. template bool ConstructDecompositionT::applyClause( - const tomp::clause::SharedT &clause, + const tomp::clause::LinearT &clause, const ClauseTy *node) { - // [5.2:340:31] - return applyToAll(node); + // [5.2:341:15.1] + if (!applyToInnermost(node)) + return false; + + // [5.2:341:15.2], [5.2:341:19] + auto dirSimd = findDirective(llvm::omp::Directive::OMPD_simd); + std::optional iterVar = helper.getLoopIterVar(); + const auto &objects = std::get>(clause.t); + + // Lists of objects that will be used to construct "firstprivate" and + // "lastprivate" clauses. + tomp::ObjectListT first, last; + + for (const ObjectTy &object : objects) { + last.push_back(object); + if (!dirSimd || !iterVar || object.id() != iterVar->id()) + first.push_back(object); + } + + if (!first.empty()) { + auto *firstp = makeClause( + llvm::omp::Clause::OMPC_firstprivate, + tomp::clause::FirstprivateT{/*List=*/first}); + nodes.push_back(firstp); // Appending to the main clause list. + } + if (!last.empty()) { + auto *lastp = + makeClause(llvm::omp::Clause::OMPC_lastprivate, + tomp::clause::LastprivateT{ + {/*LastprivateModifier=*/std::nullopt, /*List=*/last}}); + nodes.push_back(lastp); // Appending to the main clause list. + } + return true; } -// DEFAULT -// [5.2:109:5-6] -// Directives: parallel, task, taskloop, teams +// NOWAIT +// [5.2:308:11-13] +// Directives: dispatch, do, for, interop, scope, sections, single, target, +// target enter data, target exit data, target update, taskwait, workshare // -// [5.2:340:31-32] -// (31) The effect of the shared, default, thread_limit, or order clause is as -// if it is applied to all leaf constructs that permit the clause. +// [5.2:341:23] +// (23) The effect of the nowait clause is as if it is applied to the outermost +// leaf construct that permits it. template bool ConstructDecompositionT::applyClause( - const tomp::clause::DefaultT &clause, + const tomp::clause::NowaitT &clause, const ClauseTy *node) { - // [5.2:340:31] - return applyToAll(node); + return applyToOutermost(node); } -// THREAD_LIMIT -// [5.2:277:14-15] -// Directives: target, teams -// -// [5.2:340:31-32] -// (31) The effect of the shared, default, thread_limit, or order clause is as -// if it is applied to all leaf constructs that permit the clause. +// OMPX_ATTRIBUTE template bool ConstructDecompositionT::applyClause( - const tomp::clause::ThreadLimitT &clause, + const tomp::clause::OmpxAttributeT &clause, const ClauseTy *node) { - // [5.2:340:31] + // ERROR: no leaf that allows clause return applyToAll(node); } +// OMPX_BARE +template +bool ConstructDecompositionT::applyClause( + const tomp::clause::OmpxBareT &clause, + const ClauseTy *node) { + return applyToOutermost(node); +} + // ORDER // [5.2:234:3-4] // Directives: distribute, do, for, loop, simd @@ -780,30 +889,19 @@ bool ConstructDecompositionT::applyClause( return applyToAll(node); } -// ALLOCATE -// [5.2:178:7-9] -// Directives: allocators, distribute, do, for, parallel, scope, sections, -// single, target, task, taskgroup, taskloop, teams +// PRIVATE +// [5.2:111:5-7] +// Directives: distribute, do, for, loop, parallel, scope, sections, simd, +// single, target, task, taskloop, teams // -// [5.2:340:33-35] -// (33) The effect of the allocate clause is as if it is applied to all leaf -// constructs that permit the clause and to which a data-sharing attribute -// clause that may create a private copy of the same list item is applied. +// [5.2:340:1-2] +// (1) The effect of the 1 private clause is as if it is applied only to the +// innermost leaf construct that permits it. template bool ConstructDecompositionT::applyClause( - const tomp::clause::AllocateT &clause, + const tomp::clause::PrivateT &clause, const ClauseTy *node) { - // This one needs to be applied at the end, once we know which clauses are - // assigned to which leaf constructs. - - // [5.2:340:33] - bool applied = applyIf(node, [&](const auto &leaf) { - return llvm::any_of(leaf.clauses, [&](const ClauseTy *n) { - return llvm::omp::isPrivatizingClause(n->id); - }); - }); - - return applied; + return applyToInnermost(node); } // REDUCTION @@ -983,129 +1081,38 @@ bool ConstructDecompositionT::applyClause( return applied; } -// IF -// [5.2:72:7-9] -// Directives: cancel, parallel, simd, target, target data, target enter data, -// target exit data, target update, task, taskloop +// SHARED +// [5.2:110:5-6] +// Directives: parallel, task, taskloop, teams // -// [5.2:72:15-18] -// (15) For combined or composite constructs, the if clause only applies to the -// semantics of the construct named in the directive-name-modifier. -// (16) For a combined or composite construct, if no directive-name-modifier is -// specified then the if clause applies to all constituent constructs to which -// an if clause can apply. +// [5.2:340:31-32] +// (31) The effect of the shared, default, thread_limit, or order clause is as +// if it is applied to all leaf constructs that permit the clause. template bool ConstructDecompositionT::applyClause( - const tomp::clause::IfT &clause, + const tomp::clause::SharedT &clause, const ClauseTy *node) { - using DirectiveNameModifier = - typename clause::IfT::DirectiveNameModifier; - using IfExpression = typename clause::IfT::IfExpression; - auto &modifier = std::get>(clause.t); - - if (modifier) { - llvm::omp::Directive dirId = *modifier; - auto *unmodified = - makeClause(llvm::omp::Clause::OMPC_if, - tomp::clause::IfT{ - {/*DirectiveNameModifier=*/std::nullopt, - /*IfExpression=*/std::get(clause.t)}}); - - if (auto *hasDir = findDirective(dirId)) { - hasDir->clauses.push_back(unmodified); - return true; - } - return false; - } - + // [5.2:340:31] return applyToAll(node); } -// LINEAR -// [5.2:118:1-2] -// Directives: declare simd, do, for, simd -// -// [5.2:341:15-22] -// (15.1) The effect of the linear clause is as if it is applied to the -// innermost leaf construct. -// (15.2) Additionally, if the list item is not the iteration variable of a simd -// or worksharing-loop SIMD construct, the effect on the outer leaf constructs -// is as if the list item was specified in firstprivate and lastprivate clauses -// on the combined or composite construct, with the rules specified above -// applied. -// (19) If a list item of the linear clause is the iteration variable of a simd -// or worksharing-loop SIMD construct and it is not declared in the construct, -// the effect on the outer leaf constructs is as if the list item was specified -// in a lastprivate clause on the combined or composite construct with the rules -// specified above applied. -template -bool ConstructDecompositionT::applyClause( - const tomp::clause::LinearT &clause, - const ClauseTy *node) { - // [5.2:341:15.1] - if (!applyToInnermost(node)) - return false; - - // [5.2:341:15.2], [5.2:341:19] - auto dirSimd = findDirective(llvm::omp::Directive::OMPD_simd); - std::optional iterVar = helper.getLoopIterVar(); - const auto &objects = std::get>(clause.t); - - // Lists of objects that will be used to construct "firstprivate" and - // "lastprivate" clauses. - tomp::ObjectListT first, last; - - for (const ObjectTy &object : objects) { - last.push_back(object); - if (!dirSimd || !iterVar || object.id() != iterVar->id()) - first.push_back(object); - } - - if (!first.empty()) { - auto *firstp = makeClause( - llvm::omp::Clause::OMPC_firstprivate, - tomp::clause::FirstprivateT{/*List=*/first}); - nodes.push_back(firstp); // Appending to the main clause list. - } - if (!last.empty()) { - auto *lastp = - makeClause(llvm::omp::Clause::OMPC_lastprivate, - tomp::clause::LastprivateT{ - {/*LastprivateModifier=*/std::nullopt, /*List=*/last}}); - nodes.push_back(lastp); // Appending to the main clause list. - } - return true; -} - -// NOWAIT -// [5.2:308:11-13] -// Directives: dispatch, do, for, interop, scope, sections, single, target, -// target enter data, target exit data, target update, taskwait, workshare +// THREAD_LIMIT +// [5.2:277:14-15] +// Directives: target, teams // -// [5.2:341:23] -// (23) The effect of the nowait clause is as if it is applied to the outermost -// leaf construct that permits it. -template -bool ConstructDecompositionT::applyClause( - const tomp::clause::NowaitT &clause, - const ClauseTy *node) { - return applyToOutermost(node); -} - -template -bool ConstructDecompositionT::applyClause( - const tomp::clause::OmpxBareT &clause, - const ClauseTy *node) { - return applyToOutermost(node); -} - +// [5.2:340:31-32] +// (31) The effect of the shared, default, thread_limit, or order clause is as +// if it is applied to all leaf constructs that permit the clause. template bool ConstructDecompositionT::applyClause( - const tomp::clause::OmpxAttributeT &clause, + const tomp::clause::ThreadLimitT &clause, const ClauseTy *node) { + // [5.2:340:31] return applyToAll(node); } +// --- Splitting ------------------------------------------------------ + template bool ConstructDecompositionT::split() { bool success = true; diff --git a/llvm/include/llvm/IR/DiagnosticInfo.h b/llvm/include/llvm/IR/DiagnosticInfo.h index 1c86d181e4375..8f6fb4da0c839 100644 --- a/llvm/include/llvm/IR/DiagnosticInfo.h +++ b/llvm/include/llvm/IR/DiagnosticInfo.h @@ -28,7 +28,6 @@ #include "llvm/Support/TypeSize.h" #include #include -#include #include #include #include diff --git a/llvm/include/llvm/IR/PassInstrumentation.h b/llvm/include/llvm/IR/PassInstrumentation.h index 33eda5a4222f1..c25e2891d33d5 100644 --- a/llvm/include/llvm/IR/PassInstrumentation.h +++ b/llvm/include/llvm/IR/PassInstrumentation.h @@ -55,7 +55,6 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/IR/PassManager.h" #include "llvm/Support/Compiler.h" -#include #include namespace llvm { diff --git a/llvm/include/llvm/IR/PassManager.h b/llvm/include/llvm/IR/PassManager.h index acb17a8090c51..4354551a2405b 100644 --- a/llvm/include/llvm/IR/PassManager.h +++ b/llvm/include/llvm/IR/PassManager.h @@ -47,7 +47,6 @@ #include "llvm/Support/TypeName.h" #include #include -#include #include #include #include diff --git a/llvm/include/llvm/MC/MCAsmInfo.h b/llvm/include/llvm/MC/MCAsmInfo.h index 3b3c26fee02ec..1716492e73ff9 100644 --- a/llvm/include/llvm/MC/MCAsmInfo.h +++ b/llvm/include/llvm/MC/MCAsmInfo.h @@ -406,7 +406,7 @@ class LLVM_ABI MCAsmInfo { // Generated object files can use all ELF features supported by GNU ld of // this binutils version and later. INT_MAX means all features can be used, // regardless of GNU ld support. The default value is referenced by - // clang/Driver/Options.td. + // clang/Options/Options.td. std::pair BinutilsVersion = {2, 26}; /// Should we use the integrated assembler? diff --git a/llvm/include/llvm/MC/MCParser/AsmLexer.h b/llvm/include/llvm/MC/MCParser/AsmLexer.h index 11d32fbb64702..c514b768637d1 100644 --- a/llvm/include/llvm/MC/MCParser/AsmLexer.h +++ b/llvm/include/llvm/MC/MCParser/AsmLexer.h @@ -21,7 +21,6 @@ #include #include #include -#include namespace llvm { diff --git a/llvm/include/llvm/Option/Arg.h b/llvm/include/llvm/Option/Arg.h index b1e56b58da684..496373d28600f 100644 --- a/llvm/include/llvm/Option/Arg.h +++ b/llvm/include/llvm/Option/Arg.h @@ -51,7 +51,7 @@ class Arg { /// Was this argument used to affect compilation? /// /// This is used to generate an "argument unused" warning (without - /// clang::driver::options::TargetSpecific) or "unsupported option" error + /// clang::options::TargetSpecific) or "unsupported option" error /// (with TargetSpecific). mutable unsigned Claimed : 1; diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index bd7cd39ebb743..03777c7fcb45f 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -128,7 +128,6 @@ #include "llvm/Transforms/Utils/EntryExitInstrumenter.h" #include "llvm/Transforms/Utils/LowerInvoke.h" #include -#include #include namespace llvm { diff --git a/llvm/include/llvm/ProfileData/HashKeyMap.h b/llvm/include/llvm/ProfileData/HashKeyMap.h index b2f1bf222157b..fceb95143340f 100644 --- a/llvm/include/llvm/ProfileData/HashKeyMap.h +++ b/llvm/include/llvm/ProfileData/HashKeyMap.h @@ -16,7 +16,6 @@ #define LLVM_PROFILEDATA_HASHKEYMAP_H #include "llvm/ADT/Hashing.h" -#include #include namespace llvm { diff --git a/llvm/include/llvm/Support/CFGDiff.h b/llvm/include/llvm/Support/CFGDiff.h index 41004d755a124..88f4fe52d2019 100644 --- a/llvm/include/llvm/Support/CFGDiff.h +++ b/llvm/include/llvm/Support/CFGDiff.h @@ -21,7 +21,6 @@ #include "llvm/Support/type_traits.h" #include #include -#include // Two booleans are used to define orders in graphs: // InverseGraph defines when we need to reverse the whole graph and is as such diff --git a/llvm/include/llvm/Support/GenericDomTree.h b/llvm/include/llvm/Support/GenericDomTree.h index af542bae9f8c6..b6aae9f7928e3 100644 --- a/llvm/include/llvm/Support/GenericDomTree.h +++ b/llvm/include/llvm/Support/GenericDomTree.h @@ -35,7 +35,6 @@ #include #include #include -#include #include #include #include diff --git a/llvm/include/llvm/Support/type_traits.h b/llvm/include/llvm/Support/type_traits.h index a96125c16f11b..d037132fa5bad 100644 --- a/llvm/include/llvm/Support/type_traits.h +++ b/llvm/include/llvm/Support/type_traits.h @@ -15,7 +15,6 @@ #include "llvm/Support/Compiler.h" #include -#include namespace llvm { diff --git a/llvm/include/llvm/Telemetry/Telemetry.h b/llvm/include/llvm/Telemetry/Telemetry.h index b20c7e2ec07d2..9b607f1a3a8fc 100644 --- a/llvm/include/llvm/Telemetry/Telemetry.h +++ b/llvm/include/llvm/Telemetry/Telemetry.h @@ -22,7 +22,6 @@ #include #include #include -#include #include namespace llvm { diff --git a/llvm/include/llvm/XRay/YAMLXRayRecord.h b/llvm/include/llvm/XRay/YAMLXRayRecord.h index 6bf4f1d1ae082..8de569827586c 100644 --- a/llvm/include/llvm/XRay/YAMLXRayRecord.h +++ b/llvm/include/llvm/XRay/YAMLXRayRecord.h @@ -12,8 +12,6 @@ #ifndef LLVM_XRAY_YAMLXRAYRECORD_H #define LLVM_XRAY_YAMLXRAYRECORD_H -#include - #include "llvm/Support/YAMLTraits.h" #include "llvm/XRay/XRayRecord.h" diff --git a/llvm/lib/Analysis/AssumptionCache.cpp b/llvm/lib/Analysis/AssumptionCache.cpp index 61b7b3fa9e2c4..7fe00c6e22c51 100644 --- a/llvm/lib/Analysis/AssumptionCache.cpp +++ b/llvm/lib/Analysis/AssumptionCache.cpp @@ -32,7 +32,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include -#include using namespace llvm; using namespace llvm::PatternMatch; diff --git a/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp b/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp index f2ada27cac01d..a3cd157e6aa61 100644 --- a/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp +++ b/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp @@ -17,8 +17,6 @@ #include "llvm/ADT/StringSwitch.h" #include "llvm/BinaryFormat/MsgPackDocument.h" -#include - namespace llvm { namespace AMDGPU { namespace HSAMD { diff --git a/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp index 46b5bb7908227..060582cec74d8 100644 --- a/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -34,7 +34,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include -#include using namespace llvm; diff --git a/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp b/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp index 11ca48d9fe05c..bb55fc77fca0f 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp @@ -12,7 +12,6 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/Target/TargetLoweringObjectFile.h" -#include using namespace llvm; diff --git a/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp b/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp index 98cdada3d8add..aff6a76879062 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp @@ -26,7 +26,6 @@ #include #include #include -#include using namespace llvm; diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h index 8931daf9bb3de..7716dfc21d475 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h @@ -20,7 +20,6 @@ #include "llvm/IR/DebugInfoMetadata.h" #include #include -#include #include namespace llvm { diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp index d5dac417756f0..d304c7efe2a75 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp @@ -13,7 +13,6 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCStreamer.h" #include -#include using namespace llvm; diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 3d71a9c7b0b9e..9dfb6af58323a 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -32,7 +32,6 @@ #include #include #include -#include using namespace llvm; diff --git a/llvm/lib/CodeGen/LiveRangeCalc.cpp b/llvm/lib/CodeGen/LiveRangeCalc.cpp index 149f93fa69ccb..0260ee2e75aa5 100644 --- a/llvm/lib/CodeGen/LiveRangeCalc.cpp +++ b/llvm/lib/CodeGen/LiveRangeCalc.cpp @@ -28,7 +28,6 @@ #include #include #include -#include using namespace llvm; diff --git a/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/llvm/lib/CodeGen/MachineCopyPropagation.cpp index ea08365810a29..187bff78f236f 100644 --- a/llvm/lib/CodeGen/MachineCopyPropagation.cpp +++ b/llvm/lib/CodeGen/MachineCopyPropagation.cpp @@ -937,16 +937,6 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) { if (CopyOperands) { Register RegSrc = CopyOperands->Source->getReg(); Register RegDef = CopyOperands->Destination->getReg(); - // It's possible that the previous transformations have resulted in a - // no-op register move (i.e. one where source and destination registers - // are the same and are not referring to a reserved register). If so, - // delete it. - if (RegSrc == RegDef && !MRI->isReserved(RegSrc)) { - MI.eraseFromParent(); - NumDeletes++; - Changed = true; - continue; - } if (!TRI->regsOverlap(RegDef, RegSrc)) { // Copy is now a candidate for deletion. diff --git a/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/llvm/lib/CodeGen/MachineTraceMetrics.cpp index 9ac3f7411af35..c40bd1c83f34a 100644 --- a/llvm/lib/CodeGen/MachineTraceMetrics.cpp +++ b/llvm/lib/CodeGen/MachineTraceMetrics.cpp @@ -31,7 +31,6 @@ #include #include #include -#include using namespace llvm; diff --git a/llvm/lib/CodeGen/SafeStack.cpp b/llvm/lib/CodeGen/SafeStack.cpp index 6b747f343c268..782898f430c19 100644 --- a/llvm/lib/CodeGen/SafeStack.cpp +++ b/llvm/lib/CodeGen/SafeStack.cpp @@ -69,7 +69,6 @@ #include #include #include -#include using namespace llvm; using namespace llvm::safestack; diff --git a/llvm/lib/CodeGen/StackProtector.cpp b/llvm/lib/CodeGen/StackProtector.cpp index 2a8234a37a167..5fd5d6cce23df 100644 --- a/llvm/lib/CodeGen/StackProtector.cpp +++ b/llvm/lib/CodeGen/StackProtector.cpp @@ -49,7 +49,6 @@ #include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include -#include using namespace llvm; diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 814b4b57a0b9b..01216552ed260 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -59,7 +59,6 @@ #include #include #include -#include #include #include #include diff --git a/llvm/lib/DWARFCFIChecker/Registers.h b/llvm/lib/DWARFCFIChecker/Registers.h index a372c4c4345bd..915250de5aeae 100644 --- a/llvm/lib/DWARFCFIChecker/Registers.h +++ b/llvm/lib/DWARFCFIChecker/Registers.h @@ -17,7 +17,6 @@ #include "llvm/MC/MCRegister.h" #include "llvm/MC/MCRegisterInfo.h" -#include namespace llvm { diff --git a/llvm/lib/DebugInfo/CodeView/DebugCrossImpSubsection.cpp b/llvm/lib/DebugInfo/CodeView/DebugCrossImpSubsection.cpp index 1898fba004e88..c437c53b0481a 100644 --- a/llvm/lib/DebugInfo/CodeView/DebugCrossImpSubsection.cpp +++ b/llvm/lib/DebugInfo/CodeView/DebugCrossImpSubsection.cpp @@ -15,7 +15,6 @@ #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include -#include #include using namespace llvm; diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp index 44fe0a990e8c0..545896ff493c6 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp @@ -31,7 +31,6 @@ #include #include #include -#include using namespace llvm; using namespace dwarf; diff --git a/llvm/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp b/llvm/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp index a201fae84838c..db6170c784f80 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp @@ -17,7 +17,6 @@ #include #include #include -#include using namespace llvm; diff --git a/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp index 87675be1fc8e1..9fe74898170a5 100644 --- a/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp +++ b/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp @@ -41,7 +41,6 @@ #include #include #include -#include #include #ifdef HAVE_FFI_CALL diff --git a/llvm/lib/ExecutionEngine/JITLink/CMakeLists.txt b/llvm/lib/ExecutionEngine/JITLink/CMakeLists.txt index 4669124ebe578..0b530fb1bc478 100644 --- a/llvm/lib/ExecutionEngine/JITLink/CMakeLists.txt +++ b/llvm/lib/ExecutionEngine/JITLink/CMakeLists.txt @@ -26,6 +26,7 @@ add_llvm_component_library(LLVMJITLink ELF_loongarch.cpp ELF_ppc64.cpp ELF_riscv.cpp + ELF_systemz.cpp ELF_x86.cpp ELF_x86_64.cpp @@ -46,6 +47,7 @@ add_llvm_component_library(LLVMJITLink loongarch.cpp ppc64.cpp riscv.cpp + systemz.cpp x86.cpp x86_64.cpp diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF.cpp index 87e451715811f..42f42eef00e5b 100644 --- a/llvm/lib/ExecutionEngine/JITLink/ELF.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/ELF.cpp @@ -18,6 +18,7 @@ #include "llvm/ExecutionEngine/JITLink/ELF_loongarch.h" #include "llvm/ExecutionEngine/JITLink/ELF_ppc64.h" #include "llvm/ExecutionEngine/JITLink/ELF_riscv.h" +#include "llvm/ExecutionEngine/JITLink/ELF_systemz.h" #include "llvm/ExecutionEngine/JITLink/ELF_x86.h" #include "llvm/ExecutionEngine/JITLink/ELF_x86_64.h" #include "llvm/Object/ELF.h" @@ -98,6 +99,8 @@ createLinkGraphFromELFObject(MemoryBufferRef ObjectBuffer, return createLinkGraphFromELFObject_loongarch(ObjectBuffer, std::move(SSP)); case ELF::EM_RISCV: return createLinkGraphFromELFObject_riscv(ObjectBuffer, std::move(SSP)); + case ELF::EM_S390: + return createLinkGraphFromELFObject_systemz(ObjectBuffer, std::move(SSP)); case ELF::EM_X86_64: return createLinkGraphFromELFObject_x86_64(ObjectBuffer, std::move(SSP)); case ELF::EM_386: @@ -135,6 +138,9 @@ void link_ELF(std::unique_ptr G, case Triple::riscv64: link_ELF_riscv(std::move(G), std::move(Ctx)); return; + case Triple::systemz: + link_ELF_systemz(std::move(G), std::move(Ctx)); + return; case Triple::x86_64: link_ELF_x86_64(std::move(G), std::move(Ctx)); return; diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_systemz.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_systemz.cpp new file mode 100644 index 0000000000000..29eeecceea766 --- /dev/null +++ b/llvm/lib/ExecutionEngine/JITLink/ELF_systemz.cpp @@ -0,0 +1,424 @@ +//===----- ELF_systemz.cpp - JIT linker implementation for ELF/systemz ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// ELF/systemz jit-link implementation. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ExecutionEngine/JITLink/DWARFRecordSectionSplitter.h" +#include "llvm/ExecutionEngine/JITLink/systemz.h" +#include "llvm/Object/ELFObjectFile.h" + +#include "DefineExternalSectionStartAndEndSymbols.h" +#include "EHFrameSupportImpl.h" +#include "ELFLinkGraphBuilder.h" +#include "JITLinkGeneric.h" + +#define DEBUG_TYPE "jitlink" + +using namespace llvm; +using namespace llvm::jitlink; + +namespace { + +constexpr StringRef ELFGOTSymbolName = "_GLOBAL_OFFSET_TABLE_"; + +Error buildTables_ELF_systemz(LinkGraph &G) { + LLVM_DEBUG(dbgs() << "Visiting edges in graph:\n"); + systemz::GOTTableManager GOT; + systemz::PLTTableManager PLT(GOT); + visitExistingEdges(G, GOT, PLT); + return Error::success(); +} + +} // namespace + +namespace llvm { +namespace jitlink { +class ELFJITLinker_systemz : public JITLinker { + friend class JITLinker; + +public: + ELFJITLinker_systemz(std::unique_ptr Ctx, + std::unique_ptr G, + PassConfiguration PassConfig) + : JITLinker(std::move(Ctx), std::move(G), std::move(PassConfig)) { + if (shouldAddDefaultTargetPasses(getGraph().getTargetTriple())) + getPassConfig().PostAllocationPasses.push_back( + [this](LinkGraph &G) { return getOrCreateGOTSymbol(G); }); + } + +private: + Symbol *GOTSymbol = nullptr; + + Error applyFixup(LinkGraph &G, Block &B, const Edge &E) const { + return systemz::applyFixup(G, B, E, GOTSymbol); + } + + Error getOrCreateGOTSymbol(LinkGraph &G) { + auto DefineExternalGOTSymbolIfPresent = + createDefineExternalSectionStartAndEndSymbolsPass( + [&](LinkGraph &LG, Symbol &Sym) -> SectionRangeSymbolDesc { + if (Sym.getName() != nullptr && + *Sym.getName() == ELFGOTSymbolName) + if (auto *GOTSection = G.findSectionByName( + systemz::GOTTableManager::getSectionName())) { + GOTSymbol = &Sym; + return {*GOTSection, true}; + } + return {}; + }); + + // Try to attach _GLOBAL_OFFSET_TABLE_ to the GOT if it's defined as an + // external. + if (auto Err = DefineExternalGOTSymbolIfPresent(G)) + return Err; + + // If we succeeded then we're done. + if (GOTSymbol) + return Error::success(); + + // Otherwise look for a GOT section: If it already has a start symbol we'll + // record it, otherwise we'll create our own. + // If there's a GOT section but we didn't find an external GOT symbol... + if (auto *GOTSection = + G.findSectionByName(systemz::GOTTableManager::getSectionName())) { + + // Check for an existing defined symbol. + for (auto *Sym : GOTSection->symbols()) + if (Sym->getName() != nullptr && *Sym->getName() == ELFGOTSymbolName) { + GOTSymbol = Sym; + return Error::success(); + } + + // If there's no defined symbol then create one. + SectionRange SR(*GOTSection); + if (SR.empty()) + GOTSymbol = + &G.addAbsoluteSymbol(ELFGOTSymbolName, orc::ExecutorAddr(), 0, + Linkage::Strong, Scope::Local, true); + else + GOTSymbol = + &G.addDefinedSymbol(*SR.getFirstBlock(), 0, ELFGOTSymbolName, 0, + Linkage::Strong, Scope::Local, false, true); + } + + // If we still haven't found a GOT symbol then double check the externals. + // We may have a GOT-relative reference but no GOT section, in which case + // we just need to point the GOT symbol at some address in this graph. + if (!GOTSymbol) { + for (auto *Sym : G.external_symbols()) { + if (Sym->getName() != nullptr && *Sym->getName() == ELFGOTSymbolName) { + auto Blocks = G.blocks(); + if (!Blocks.empty()) { + G.makeAbsolute(*Sym, (*Blocks.begin())->getAddress()); + GOTSymbol = Sym; + break; + } + } + } + } + + return Error::success(); + } +}; + +class ELFLinkGraphBuilder_systemz + : public ELFLinkGraphBuilder { +private: + using ELFT = object::ELF64BE; + using Base = ELFLinkGraphBuilder; + using Base::G; // Use LinkGraph pointer from base class. + + Error addRelocations() override { + LLVM_DEBUG(dbgs() << "Processing relocations:\n"); + + using Base = ELFLinkGraphBuilder; + using Self = ELFLinkGraphBuilder_systemz; + for (const auto &RelSect : Base::Sections) { + if (RelSect.sh_type == ELF::SHT_REL) + // Validate the section to read relocation entries from. + return make_error("No SHT_REL in valid " + + G->getTargetTriple().getArchName() + + " ELF object files", + inconvertibleErrorCode()); + + if (Error Err = Base::forEachRelaRelocation(RelSect, this, + &Self::addSingleRelocation)) + return Err; + } + + return Error::success(); + } + + Error addSingleRelocation(const typename ELFT::Rela &Rel, + const typename ELFT::Shdr &FixupSect, + Block &BlockToFix) { + using support::big32_t; + using Base = ELFLinkGraphBuilder; + auto ELFReloc = Rel.getType(false); + + // No reloc. + if (LLVM_UNLIKELY(ELFReloc == ELF::R_390_NONE)) + return Error::success(); + + uint32_t SymbolIndex = Rel.getSymbol(false); + auto ObjSymbol = Base::Obj.getRelocationSymbol(Rel, Base::SymTabSec); + if (!ObjSymbol) + return ObjSymbol.takeError(); + + Symbol *GraphSymbol = Base::getGraphSymbol(SymbolIndex); + if (!GraphSymbol) + return make_error( + formatv("Could not find symbol at given index, did you add it to " + "JITSymbolTable? index: {0}, shndx: {1} Size of table: {2}", + SymbolIndex, (*ObjSymbol)->st_shndx, + Base::GraphSymbols.size()), + inconvertibleErrorCode()); + + // Validate the relocation kind. + int64_t Addend = Rel.r_addend; + Edge::Kind Kind = Edge::Invalid; + + switch (ELFReloc) { + case ELF::R_390_PC64: { + Kind = systemz::Delta64; + break; + } + case ELF::R_390_PC32: { + Kind = systemz::Delta32; + break; + } + case ELF::R_390_PC16: { + Kind = systemz::Delta16; + break; + } + case ELF::R_390_PC32DBL: { + Kind = systemz::Delta32dbl; + break; + } + case ELF::R_390_PC24DBL: { + Kind = systemz::Delta24dbl; + break; + } + case ELF::R_390_PC16DBL: { + Kind = systemz::Delta16dbl; + break; + } + case ELF::R_390_PC12DBL: { + Kind = systemz::Delta12dbl; + break; + } + case ELF::R_390_64: { + Kind = systemz::Pointer64; + break; + } + case ELF::R_390_32: { + Kind = systemz::Pointer32; + break; + } + case ELF::R_390_20: { + Kind = systemz::Pointer20; + break; + } + case ELF::R_390_16: { + Kind = systemz::Pointer16; + break; + } + case ELF::R_390_12: { + Kind = systemz::Pointer12; + break; + } + case ELF::R_390_8: { + Kind = systemz::Pointer8; + break; + } + // Relocations targeting the PLT associated with the symbol. + case ELF::R_390_PLT64: { + Kind = systemz::DeltaPLT64; + break; + } + case ELF::R_390_PLT32: { + Kind = systemz::DeltaPLT32; + break; + } + case ELF::R_390_PLT32DBL: { + Kind = systemz::DeltaPLT32dbl; + break; + } + case ELF::R_390_PLT24DBL: { + Kind = systemz::DeltaPLT24dbl; + break; + } + case ELF::R_390_PLT16DBL: { + Kind = systemz::DeltaPLT16dbl; + break; + } + case ELF::R_390_PLT12DBL: { + Kind = systemz::DeltaPLT12dbl; + break; + } + case ELF::R_390_PLTOFF64: { + Kind = systemz::Delta64PLTFromGOT; + break; + } + case ELF::R_390_PLTOFF32: { + Kind = systemz::Delta32PLTFromGOT; + break; + } + case ELF::R_390_PLTOFF16: { + Kind = systemz::Delta16PLTFromGOT; + break; + } + // Relocations targeting the actual symbol (just relative to the GOT). + case ELF::R_390_GOTOFF64: { + Kind = systemz::Delta64FromGOT; + break; + } + case ELF::R_390_GOTOFF: { + Kind = systemz::Delta32FromGOT; + break; + } + case ELF::R_390_GOTOFF16: { + Kind = systemz::Delta16FromGOT; + break; + } + // Relocations targeting the GOT entry associated with the symbol. + case ELF::R_390_GOT64: + case ELF::R_390_GOTPLT64: { + Kind = systemz::RequestGOTAndTransformToDelta64FromGOT; + break; + } + case ELF::R_390_GOT32: + case ELF::R_390_GOTPLT32: { + Kind = systemz::RequestGOTAndTransformToDelta32FromGOT; + break; + } + case ELF::R_390_GOT20: + case ELF::R_390_GOTPLT20: { + Kind = systemz::RequestGOTAndTransformToDelta20FromGOT; + break; + } + case ELF::R_390_GOT16: + case ELF::R_390_GOTPLT16: { + Kind = systemz::RequestGOTAndTransformToDelta16FromGOT; + break; + } + case ELF::R_390_GOT12: + case ELF::R_390_GOTPLT12: { + Kind = systemz::RequestGOTAndTransformToDelta12FromGOT; + break; + } + case ELF::R_390_GOTENT: + case ELF::R_390_GOTPLTENT: { + Kind = systemz::RequestGOTAndTransformToDelta32dbl; + break; + } + // R_390_GOTPC and R_390_GOTPCDBL don't create GOT entry, they don't even + // have symbol. + case ELF::R_390_GOTPC: { + Kind = systemz::Delta32GOTBase; + break; + } + case ELF::R_390_GOTPCDBL: { + Kind = systemz::Delta32dblGOTBase; + break; + } + default: + return make_error( + "In " + G->getName() + ": Unsupported systemz relocation type " + + object::getELFRelocationTypeName(ELF::EM_S390, ELFReloc)); + } + auto FixupAddress = orc::ExecutorAddr(FixupSect.sh_addr) + Rel.r_offset; + Edge::OffsetT Offset = FixupAddress - BlockToFix.getAddress(); + Edge GE(Kind, Offset, *GraphSymbol, Addend); + LLVM_DEBUG({ + dbgs() << " "; + printEdge(dbgs(), BlockToFix, GE, systemz::getEdgeKindName(Kind)); + dbgs() << "\n"; + }); + + BlockToFix.addEdge(std::move(GE)); + + return Error::success(); + } + +public: + ELFLinkGraphBuilder_systemz(StringRef FileName, + const object::ELFFile &Obj, + std::shared_ptr SSP, + Triple TT, SubtargetFeatures Features) + : ELFLinkGraphBuilder(Obj, std::move(SSP), std::move(TT), + std::move(Features), FileName, + systemz::getEdgeKindName) {} +}; + +Expected> createLinkGraphFromELFObject_systemz( + MemoryBufferRef ObjectBuffer, std::shared_ptr SSP) { + LLVM_DEBUG({ + dbgs() << "Building jitlink graph for new input " + << ObjectBuffer.getBufferIdentifier() << "...\n"; + }); + + auto ELFObj = object::ObjectFile::createELFObjectFile(ObjectBuffer); + if (!ELFObj) + return ELFObj.takeError(); + + auto Features = (*ELFObj)->getFeatures(); + if (!Features) + return Features.takeError(); + + assert((*ELFObj)->getArch() == Triple::systemz && + "Only SystemZ is supported"); + + auto &ELFObjFile = cast>(**ELFObj); + return ELFLinkGraphBuilder_systemz( + (*ELFObj)->getFileName(), ELFObjFile.getELFFile(), std::move(SSP), + (*ELFObj)->makeTriple(), std::move(*Features)) + .buildGraph(); +} + +void link_ELF_systemz(std::unique_ptr G, + std::unique_ptr Ctx) { + PassConfiguration Config; + const Triple &TT = G->getTargetTriple(); + if (Ctx->shouldAddDefaultTargetPasses(TT)) { + // Add eh-frame passes. + Config.PrePrunePasses.push_back(DWARFRecordSectionSplitter(".eh_frame")); + Config.PrePrunePasses.push_back( + EHFrameEdgeFixer(".eh_frame", G->getPointerSize(), systemz::Pointer32, + systemz::Pointer64, systemz::Delta32, systemz::Delta64, + systemz::NegDelta32)); + Config.PrePrunePasses.push_back(EHFrameNullTerminator(".eh_frame")); + + // Add a mark-live pass. + if (auto MarkLive = Ctx->getMarkLivePass(TT)) + Config.PrePrunePasses.push_back(std::move(MarkLive)); + else + Config.PrePrunePasses.push_back(markAllSymbolsLive); + + // Add an in-place GOT/Stubs build pass. + Config.PostPrunePasses.push_back(buildTables_ELF_systemz); + + // Resolve any external section start / end symbols. + Config.PostAllocationPasses.push_back( + createDefineExternalSectionStartAndEndSymbolsPass( + identifyELFSectionStartAndEndSymbols)); + + // TODO: Add GOT/Stubs optimizer pass. + // Config.PreFixupPasses.push_back(systemz::optimizeGOTAndStubAccesses); + } + + if (auto Err = Ctx->modifyPassConfig(*G, Config)) + return Ctx->notifyFailed(std::move(Err)); + + ELFJITLinker_systemz::link(std::move(Ctx), std::move(G), std::move(Config)); +} + +} // namespace jitlink +} // namespace llvm diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp b/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp index 6e316f105715d..d98ded1ee4c32 100644 --- a/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp @@ -16,6 +16,7 @@ #include "llvm/ExecutionEngine/JITLink/XCOFF.h" #include "llvm/ExecutionEngine/JITLink/aarch64.h" #include "llvm/ExecutionEngine/JITLink/loongarch.h" +#include "llvm/ExecutionEngine/JITLink/systemz.h" #include "llvm/ExecutionEngine/JITLink/x86.h" #include "llvm/ExecutionEngine/JITLink/x86_64.h" #include "llvm/Support/raw_ostream.h" @@ -479,6 +480,8 @@ AnonymousPointerCreator getAnonymousPointerCreator(const Triple &TT) { case Triple::loongarch32: case Triple::loongarch64: return loongarch::createAnonymousPointer; + case Triple::systemz: + return systemz::createAnonymousPointer; default: return nullptr; } @@ -495,6 +498,8 @@ PointerJumpStubCreator getPointerJumpStubCreator(const Triple &TT) { case Triple::loongarch32: case Triple::loongarch64: return loongarch::createAnonymousPointerJumpStub; + case Triple::systemz: + return systemz::createAnonymousPointerJumpStub; default: return nullptr; } diff --git a/llvm/lib/ExecutionEngine/JITLink/systemz.cpp b/llvm/lib/ExecutionEngine/JITLink/systemz.cpp new file mode 100644 index 0000000000000..f6cc29fa6e6a1 --- /dev/null +++ b/llvm/lib/ExecutionEngine/JITLink/systemz.cpp @@ -0,0 +1,114 @@ +//===---- systemz.cpp - Generic JITLink systemz edge kinds, utilities -----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Generic utilities for graphs representing systemz objects. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ExecutionEngine/JITLink/systemz.h" + +#define DEBUG_TYPE "jitlink" + +namespace llvm { +namespace jitlink { +namespace systemz { + +const char NullPointerContent[8] = {0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00}; + +const char Pointer64JumpStubContent[8] = { + static_cast(0xC4u), + 0x18, + 0x00, + 0x00, + 0x00, + 0x00, // lgrl r1 + static_cast(0x07u), + static_cast(0xF1u), // BCR 15, 1 +}; + +const char *getEdgeKindName(Edge::Kind R) { + switch (R) { + case Pointer64: + return "Pointer64"; + case Pointer32: + return "Pointer32"; + case Pointer20: + return "Pointer20"; + case Pointer16: + return "Pointer16"; + case Pointer12: + return "Pointer12"; + case Pointer8: + return "Pointer8"; + case Delta64: + return "Delta64"; + case Delta32: + return "Delta32"; + case Delta16: + return "Delta16"; + case Delta32dbl: + return "Delta32dbl"; + case Delta24dbl: + return "Delta24dbl"; + case Delta16dbl: + return "Delta16dbl"; + case Delta12dbl: + return "Delta12dbl"; + case NegDelta64: + return "NegDelta64"; + case NegDelta32: + return "NegDelta32"; + case DeltaPLT32dbl: + return "DeltaPLT32dbl"; + case DeltaPLT24dbl: + return "DeltaPLT24dbl"; + case DeltaPLT16dbl: + return "DeltaPLT16dbl"; + case DeltaPLT12dbl: + return "DeltaPLT12dbl"; + case DeltaPLT64: + return "DeltaPLT64"; + case DeltaPLT32: + return "DeltaPLT32"; + case Delta64FromGOT: + return "Delta64FromGOT"; + case Delta32FromGOT: + return "Delta32FromGOT"; + case Delta16FromGOT: + return "Delta16FromGOT"; + case Delta64PLTFromGOT: + return "Delta64PLTFromGOT"; + case Delta32PLTFromGOT: + return "Delta32PLTFromGOT"; + case Delta16PLTFromGOT: + return "Delta16PLTFromGOT"; + case Delta32GOTBase: + return "Delta32GOTBase"; + case Delta32dblGOTBase: + return "Delta32dblGOTBase"; + case RequestGOTAndTransformToDelta64FromGOT: + return "RequestGOTAndTransformToDelta64FromGOT"; + case RequestGOTAndTransformToDelta32FromGOT: + return "RequestGOTAndTransformToDelta32FromGOT"; + case RequestGOTAndTransformToDelta20FromGOT: + return "RequestGOTAndTransformToDelta20FromGOT"; + case RequestGOTAndTransformToDelta16FromGOT: + return "RequestGOTAndTransformToDelta16FromGOT"; + case RequestGOTAndTransformToDelta12FromGOT: + return "RequestGOTAndTransformToDelta12FromGOT"; + case RequestGOTAndTransformToDelta32dbl: + return "RequestGOTAndTransformToDelta32dbl"; + default: + return getGenericEdgeKindName(static_cast(R)); + } +} + +} // namespace systemz +} // namespace jitlink +} // namespace llvm diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp index ba3c039503720..d35438d5e1c9c 100644 --- a/llvm/lib/IR/DebugInfo.cpp +++ b/llvm/lib/IR/DebugInfo.cpp @@ -40,7 +40,6 @@ #include #include #include -#include using namespace llvm; using namespace llvm::at; diff --git a/llvm/lib/IR/LLVMContextImpl.cpp b/llvm/lib/IR/LLVMContextImpl.cpp index 1f5e78732761d..fd8add4e07832 100644 --- a/llvm/lib/IR/LLVMContextImpl.cpp +++ b/llvm/lib/IR/LLVMContextImpl.cpp @@ -25,7 +25,6 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include -#include using namespace llvm; diff --git a/llvm/lib/IR/PassRegistry.cpp b/llvm/lib/IR/PassRegistry.cpp index 94afbb52d70e3..a91bb563af4bb 100644 --- a/llvm/lib/IR/PassRegistry.cpp +++ b/llvm/lib/IR/PassRegistry.cpp @@ -17,7 +17,6 @@ #include "llvm/PassInfo.h" #include #include -#include using namespace llvm; diff --git a/llvm/lib/IR/Type.cpp b/llvm/lib/IR/Type.cpp index 8e60577bf8fb4..e3ece87778a04 100644 --- a/llvm/lib/IR/Type.cpp +++ b/llvm/lib/IR/Type.cpp @@ -28,7 +28,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/TargetParser/RISCVTargetParser.h" #include -#include using namespace llvm; diff --git a/llvm/lib/IR/ValueSymbolTable.cpp b/llvm/lib/IR/ValueSymbolTable.cpp index cd1cee16e7473..3bf52f6ef024e 100644 --- a/llvm/lib/IR/ValueSymbolTable.cpp +++ b/llvm/lib/IR/ValueSymbolTable.cpp @@ -23,7 +23,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/TargetParser/Triple.h" #include -#include using namespace llvm; diff --git a/llvm/lib/MC/MCParser/COFFAsmParser.cpp b/llvm/lib/MC/MCParser/COFFAsmParser.cpp index 5dd79946d8779..2a796fb1cfe11 100644 --- a/llvm/lib/MC/MCParser/COFFAsmParser.cpp +++ b/llvm/lib/MC/MCParser/COFFAsmParser.cpp @@ -21,7 +21,6 @@ #include #include #include -#include using namespace llvm; diff --git a/llvm/lib/MC/MCParser/COFFMasmParser.cpp b/llvm/lib/MC/MCParser/COFFMasmParser.cpp index 04e12e56c4262..6e685c60a406e 100644 --- a/llvm/lib/MC/MCParser/COFFMasmParser.cpp +++ b/llvm/lib/MC/MCParser/COFFMasmParser.cpp @@ -20,7 +20,6 @@ #include "llvm/MC/SectionKind.h" #include "llvm/Support/SMLoc.h" #include -#include using namespace llvm; diff --git a/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/llvm/lib/MC/MCParser/ELFAsmParser.cpp index 911d92c51b59b..c3faab89bb258 100644 --- a/llvm/lib/MC/MCParser/ELFAsmParser.cpp +++ b/llvm/lib/MC/MCParser/ELFAsmParser.cpp @@ -25,7 +25,6 @@ #include "llvm/Support/SMLoc.h" #include #include -#include using namespace llvm; diff --git a/llvm/lib/MC/MachObjectWriter.cpp b/llvm/lib/MC/MachObjectWriter.cpp index 39542bfbdd8e3..a95ccf83a2636 100644 --- a/llvm/lib/MC/MachObjectWriter.cpp +++ b/llvm/lib/MC/MachObjectWriter.cpp @@ -33,7 +33,6 @@ #include #include #include -#include #include using namespace llvm; diff --git a/llvm/lib/Option/ArgList.cpp b/llvm/lib/Option/ArgList.cpp index 9f9d63f212c88..e2fc32d90f15e 100644 --- a/llvm/lib/Option/ArgList.cpp +++ b/llvm/lib/Option/ArgList.cpp @@ -24,7 +24,6 @@ #include #include #include -#include #include using namespace llvm; diff --git a/llvm/lib/Option/OptTable.cpp b/llvm/lib/Option/OptTable.cpp index 0450b2fd172ef..20398b5f582f4 100644 --- a/llvm/lib/Option/OptTable.cpp +++ b/llvm/lib/Option/OptTable.cpp @@ -25,7 +25,6 @@ #include #include #include -#include #include using namespace llvm; diff --git a/llvm/lib/TableGen/DetailedRecordsBackend.cpp b/llvm/lib/TableGen/DetailedRecordsBackend.cpp index 1ed64356b7c62..b1152bf680c69 100644 --- a/llvm/lib/TableGen/DetailedRecordsBackend.cpp +++ b/llvm/lib/TableGen/DetailedRecordsBackend.cpp @@ -22,7 +22,6 @@ #include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" #include -#include using namespace llvm; diff --git a/llvm/lib/Target/AArch64/AArch64StackTagging.cpp b/llvm/lib/Target/AArch64/AArch64StackTagging.cpp index a67bd42aa16e0..d87bb522c99e8 100644 --- a/llvm/lib/Target/AArch64/AArch64StackTagging.cpp +++ b/llvm/lib/Target/AArch64/AArch64StackTagging.cpp @@ -46,7 +46,6 @@ #include "llvm/Transforms/Utils/MemoryTaggingSupport.h" #include #include -#include using namespace llvm; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h index dad94b83aa84f..8838a94a639eb 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h @@ -52,7 +52,7 @@ struct ArgDescriptor { } static ArgDescriptor createArg(const ArgDescriptor &Arg, unsigned Mask) { - return ArgDescriptor(Arg.Reg, Mask, Arg.IsStack, Arg.IsSet); + return ArgDescriptor(Arg.Reg.id(), Mask, Arg.IsStack, Arg.IsSet); } bool isSet() const { diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index e3f3abae01648..dd3120f05ce26 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -1199,8 +1199,8 @@ void AMDGPUDisassembler::convertVOP3DPPInst(MCInst &MI) const { // Given a wide tuple \p Reg check if it will overflow 256 registers. // \returns \p Reg on success or NoRegister otherwise. -static unsigned CheckVGPROverflow(unsigned Reg, const MCRegisterClass &RC, - const MCRegisterInfo &MRI) { +static MCRegister CheckVGPROverflow(MCRegister Reg, const MCRegisterClass &RC, + const MCRegisterInfo &MRI) { unsigned NumRegs = RC.getSizeInBits() / 32; MCRegister Sub0 = MRI.getSubReg(Reg, AMDGPU::sub0); if (!Sub0) @@ -1214,7 +1214,7 @@ static unsigned CheckVGPROverflow(unsigned Reg, const MCRegisterClass &RC, assert(BaseReg && "Only vector registers expected"); - return (Sub0 - BaseReg + NumRegs <= 256) ? Reg : AMDGPU::NoRegister; + return (Sub0 - BaseReg + NumRegs <= 256) ? Reg : MCRegister(); } // Note that before gfx10, the MIMG encoding provided no information about @@ -1456,9 +1456,8 @@ MCOperand AMDGPUDisassembler::errOperand(unsigned V, return MCOperand(); } -inline -MCOperand AMDGPUDisassembler::createRegOperand(unsigned int RegId) const { - return MCOperand::createReg(AMDGPU::getMCReg(RegId, STI)); +inline MCOperand AMDGPUDisassembler::createRegOperand(MCRegister Reg) const { + return MCOperand::createReg(AMDGPU::getMCReg(Reg, STI)); } inline diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h index d103d79fdabb9..ab130dbb08ff9 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -69,7 +69,7 @@ class AMDGPUDisassembler : public MCDisassembler { const char* getRegClassName(unsigned RegClassID) const; - MCOperand createRegOperand(unsigned int RegId) const; + MCOperand createRegOperand(MCRegister Reg) const; MCOperand createRegOperand(unsigned RegClassID, unsigned Val) const; MCOperand createSRegOperand(unsigned SRegClassID, unsigned Val) const; MCOperand createVGPR16Operand(unsigned RegIdx, bool IsHi) const; diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp index 703ec0a4befa5..207c1da56ca59 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -336,7 +336,7 @@ void AMDGPUInstPrinter::printSymbolicFormat(const MCInst *MI, // \returns a low 256 vgpr representing a high vgpr \p Reg [v256..v1023] or // \p Reg itself otherwise. -static MCPhysReg getRegForPrinting(MCPhysReg Reg, const MCRegisterInfo &MRI) { +static MCRegister getRegForPrinting(MCRegister Reg, const MCRegisterInfo &MRI) { unsigned Enc = MRI.getEncodingValue(Reg); unsigned Idx = Enc & AMDGPU::HWEncoding::REG_IDX_MASK; if (Idx < 0x100) @@ -355,10 +355,10 @@ static MCPhysReg getRegForPrinting(MCPhysReg Reg, const MCRegisterInfo &MRI) { } // Restore MSBs of a VGPR above 255 from the MCInstrAnalysis. -static MCPhysReg getRegFromMIA(MCPhysReg Reg, unsigned OpNo, - const MCInstrDesc &Desc, - const MCRegisterInfo &MRI, - const AMDGPUMCInstrAnalysis &MIA) { +static MCRegister getRegFromMIA(MCRegister Reg, unsigned OpNo, + const MCInstrDesc &Desc, + const MCRegisterInfo &MRI, + const AMDGPUMCInstrAnalysis &MIA) { unsigned VgprMSBs = MIA.getVgprMSBs(); if (!VgprMSBs) return Reg; @@ -403,10 +403,10 @@ void AMDGPUInstPrinter::printRegOperand(MCRegister Reg, raw_ostream &O, } #endif - unsigned PrintReg = getRegForPrinting(Reg, MRI); + MCRegister PrintReg = getRegForPrinting(Reg, MRI); O << getRegisterName(PrintReg); - if (PrintReg != Reg.id()) + if (PrintReg != Reg) O << " /*" << getRegisterName(Reg) << "*/"; } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 2c689cc8bcbef..ae86d1382382f 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -17389,12 +17389,14 @@ void SITargetLowering::AddMemOpInit(MachineInstr &MI) const { // Abandon attempt if the dst size isn't large enough // - this is in fact an error but this is picked up elsewhere and // reported correctly. - uint32_t DstSize = - TRI.getRegSizeInBits(*TII->getOpRegClass(MI, DstIdx)) / 32; + const TargetRegisterClass *DstRC = TII->getRegClass(MI.getDesc(), DstIdx); + + uint32_t DstSize = TRI.getRegSizeInBits(*DstRC) / 32; if (DstSize < InitIdx) return; } else if (TII->isMUBUF(MI) && AMDGPU::getMUBUFTfe(MI.getOpcode())) { - InitIdx = TRI.getRegSizeInBits(*TII->getOpRegClass(MI, DstIdx)) / 32; + const TargetRegisterClass *DstRC = TII->getRegClass(MI.getDesc(), DstIdx); + InitIdx = TRI.getRegSizeInBits(*DstRC) / 32; } else { return; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index b3c351a5ba6ce..259442fe774bf 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -7686,6 +7686,8 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist, MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); unsigned Opcode = Inst.getOpcode(); unsigned NewOpcode = getVALUOp(Inst); + const DebugLoc &DL = Inst.getDebugLoc(); + // Handle some special cases switch (Opcode) { default: @@ -7923,7 +7925,6 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist, return; case AMDGPU::S_UADDO_PSEUDO: case AMDGPU::S_USUBO_PSEUDO: { - const DebugLoc &DL = Inst.getDebugLoc(); MachineOperand &Dest0 = Inst.getOperand(0); MachineOperand &Dest1 = Inst.getOperand(1); MachineOperand &Src0 = Inst.getOperand(2); @@ -7943,12 +7944,37 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist, legalizeOperands(*NewInstr, MDT); MRI.replaceRegWith(Dest0.getReg(), DestReg); - addUsersToMoveToVALUWorklist(NewInstr->getOperand(0).getReg(), MRI, - Worklist); + addUsersToMoveToVALUWorklist(DestReg, MRI, Worklist); Inst.eraseFromParent(); } return; + case AMDGPU::S_LSHL1_ADD_U32: + case AMDGPU::S_LSHL2_ADD_U32: + case AMDGPU::S_LSHL3_ADD_U32: + case AMDGPU::S_LSHL4_ADD_U32: { + MachineOperand &Dest = Inst.getOperand(0); + MachineOperand &Src0 = Inst.getOperand(1); + MachineOperand &Src1 = Inst.getOperand(2); + unsigned ShiftAmt = (Opcode == AMDGPU::S_LSHL1_ADD_U32 ? 1 + : Opcode == AMDGPU::S_LSHL2_ADD_U32 ? 2 + : Opcode == AMDGPU::S_LSHL3_ADD_U32 ? 3 + : 4); + + const TargetRegisterClass *NewRC = + RI.getEquivalentVGPRClass(MRI.getRegClass(Dest.getReg())); + Register DestReg = MRI.createVirtualRegister(NewRC); + MachineInstr *NewInstr = + BuildMI(*MBB, &Inst, DL, get(AMDGPU::V_LSHL_ADD_U32_e64), DestReg) + .add(Src0) + .addImm(ShiftAmt) + .add(Src1); + legalizeOperands(*NewInstr, MDT); + MRI.replaceRegWith(Dest.getReg(), DestReg); + addUsersToMoveToVALUWorklist(DestReg, MRI, Worklist); + Inst.eraseFromParent(); + } + return; case AMDGPU::S_CSELECT_B32: case AMDGPU::S_CSELECT_B64: lowerSelect(Worklist, Inst, MDT); @@ -8045,7 +8071,6 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist, return; } case AMDGPU::S_CVT_HI_F32_F16: { - const DebugLoc &DL = Inst.getDebugLoc(); Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); Register NewDst = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); if (ST.useRealTrue16Insts()) { @@ -8075,7 +8100,6 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist, } case AMDGPU::S_MINIMUM_F32: case AMDGPU::S_MAXIMUM_F32: { - const DebugLoc &DL = Inst.getDebugLoc(); Register NewDst = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); MachineInstr *NewInstr = BuildMI(*MBB, Inst, DL, get(NewOpcode), NewDst) .addImm(0) // src0_modifiers @@ -8093,7 +8117,6 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist, } case AMDGPU::S_MINIMUM_F16: case AMDGPU::S_MAXIMUM_F16: { - const DebugLoc &DL = Inst.getDebugLoc(); Register NewDst = MRI.createVirtualRegister(ST.useRealTrue16Insts() ? &AMDGPU::VGPR_16RegClass : &AMDGPU::VGPR_32RegClass); @@ -8117,7 +8140,6 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist, case AMDGPU::V_S_RCP_F16_e64: case AMDGPU::V_S_RSQ_F16_e64: case AMDGPU::V_S_SQRT_F16_e64: { - const DebugLoc &DL = Inst.getDebugLoc(); Register NewDst = MRI.createVirtualRegister(ST.useRealTrue16Insts() ? &AMDGPU::VGPR_16RegClass : &AMDGPU::VGPR_32RegClass); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index b7f63eceb5d5c..0bde5d3fd2f26 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -776,11 +776,7 @@ def xnor : PatFrag < foreach I = 1-4 in { def shl#I#_add : PatFrag < (ops node:$src0, node:$src1), - (add (shl_oneuse $src0, (i32 I)), $src1)> { - // FIXME: Poor substitute for disabling pattern in SelectionDAG - let PredicateCode = [{return false;}]; - let GISelPredicateCode = [{return true;}]; -} + (add (shl_oneuse $src0, (i32 I)), $src1)>; } multiclass SIAtomicM0Glue2 isBottomOfStack()) { // Convert to a swizzled stack address by scaling by the wave size. // In an entry function/kernel the offset is already swizzled. - bool IsSALU = isSGPRClass(TII->getOpRegClass(*MI, FIOperandNum)); + bool IsSALU = isSGPRClass(TII->getRegClass(MI->getDesc(), FIOperandNum)); bool LiveSCC = RS->isRegUsed(AMDGPU::SCC) && !MI->definesRegister(AMDGPU::SCC, /*TRI=*/nullptr); const TargetRegisterClass *RC = IsSALU && !LiveSCC diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 3e1b058726dbb..37bf2d2463ae2 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -897,7 +897,7 @@ unsigned ComponentInfo::getIndexInParsedOperands(unsigned CompOprIdx) const { } std::optional InstInfo::getInvalidCompOperandIndex( - std::function GetRegIdx, + std::function GetRegIdx, const MCRegisterInfo &MRI, bool SkipSrc, bool AllowSameVGPR, bool VOPD3) const { @@ -914,12 +914,13 @@ std::optional InstInfo::getInvalidCompOperandIndex( BaseX = X; if (!BaseY) BaseY = Y; - if ((BaseX & BanksMask) == (BaseY & BanksMask)) + if ((BaseX.id() & BanksMask) == (BaseY.id() & BanksMask)) return true; if (BaseX != X /* This is 64-bit register */ && - ((BaseX + 1) & BanksMask) == (BaseY & BanksMask)) + ((BaseX.id() + 1) & BanksMask) == (BaseY.id() & BanksMask)) return true; - if (BaseY != Y && (BaseX & BanksMask) == ((BaseY + 1) & BanksMask)) + if (BaseY != Y && + (BaseX.id() & BanksMask) == ((BaseY.id() + 1) & BanksMask)) return true; // If both are 64-bit bank conflict will be detected yet while checking @@ -968,7 +969,7 @@ std::optional InstInfo::getInvalidCompOperandIndex( // if the operand is not a register or not a VGPR. InstInfo::RegIndices InstInfo::getRegIndices(unsigned CompIdx, - std::function GetRegIdx, + std::function GetRegIdx, bool VOPD3) const { assert(CompIdx < COMPONENTS_NUM); @@ -983,7 +984,7 @@ InstInfo::getRegIndices(unsigned CompIdx, Comp.hasRegSrcOperand(CompSrcIdx) ? GetRegIdx(CompIdx, Comp.getIndexOfSrcInMCOperands(CompSrcIdx, VOPD3)) - : 0; + : MCRegister(); } return RegIndices; } @@ -2697,8 +2698,8 @@ MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI) { MCRegister mc2PseudoReg(MCRegister Reg) { MAP_REG2REG } -bool isInlineValue(unsigned Reg) { - switch (Reg) { +bool isInlineValue(MCRegister Reg) { + switch (Reg.id()) { case AMDGPU::SRC_SHARED_BASE_LO: case AMDGPU::SRC_SHARED_BASE: case AMDGPU::SRC_SHARED_LIMIT_LO: @@ -3361,7 +3362,7 @@ const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format, : getGfx9BufferFormatInfo(Format); } -const MCRegisterClass *getVGPRPhysRegClass(MCPhysReg Reg, +const MCRegisterClass *getVGPRPhysRegClass(MCRegister Reg, const MCRegisterInfo &MRI) { const unsigned VGPRClasses[] = { AMDGPU::VGPR_16RegClassID, AMDGPU::VGPR_32RegClassID, @@ -3382,22 +3383,22 @@ const MCRegisterClass *getVGPRPhysRegClass(MCPhysReg Reg, return nullptr; } -unsigned getVGPREncodingMSBs(MCPhysReg Reg, const MCRegisterInfo &MRI) { +unsigned getVGPREncodingMSBs(MCRegister Reg, const MCRegisterInfo &MRI) { unsigned Enc = MRI.getEncodingValue(Reg); unsigned Idx = Enc & AMDGPU::HWEncoding::REG_IDX_MASK; return Idx >> 8; } -MCPhysReg getVGPRWithMSBs(MCPhysReg Reg, unsigned MSBs, - const MCRegisterInfo &MRI) { +MCRegister getVGPRWithMSBs(MCRegister Reg, unsigned MSBs, + const MCRegisterInfo &MRI) { unsigned Enc = MRI.getEncodingValue(Reg); unsigned Idx = Enc & AMDGPU::HWEncoding::REG_IDX_MASK; if (Idx >= 0x100) - return AMDGPU::NoRegister; + return MCRegister(); const MCRegisterClass *RC = getVGPRPhysRegClass(Reg, MRI); if (!RC) - return AMDGPU::NoRegister; + return MCRegister(); Idx |= MSBs << 8; if (RC->getID() == AMDGPU::VGPR_16RegClassID) { diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 5e3195b36fe4c..9f65f9326a73e 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -909,7 +909,7 @@ class InstInfo { const ComponentInfo CompInfo[COMPONENTS_NUM]; public: - using RegIndices = std::array; + using RegIndices = std::array; InstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY) : CompInfo{OpX, OpY} {} @@ -932,9 +932,10 @@ class InstInfo { // even though it violates requirement to be from different banks. // If \p VOPD3 is set to true both dst registers allowed to be either odd // or even and instruction may have real src2 as opposed to tied accumulator. - bool hasInvalidOperand(std::function GetRegIdx, - const MCRegisterInfo &MRI, bool SkipSrc = false, - bool AllowSameVGPR = false, bool VOPD3 = false) const { + bool + hasInvalidOperand(std::function GetRegIdx, + const MCRegisterInfo &MRI, bool SkipSrc = false, + bool AllowSameVGPR = false, bool VOPD3 = false) const { return getInvalidCompOperandIndex(GetRegIdx, MRI, SkipSrc, AllowSameVGPR, VOPD3) .has_value(); @@ -949,14 +950,14 @@ class InstInfo { // If \p VOPD3 is set to true both dst registers allowed to be either odd // or even and instruction may have real src2 as opposed to tied accumulator. std::optional getInvalidCompOperandIndex( - std::function GetRegIdx, + std::function GetRegIdx, const MCRegisterInfo &MRI, bool SkipSrc = false, bool AllowSameVGPR = false, bool VOPD3 = false) const; private: RegIndices getRegIndices(unsigned ComponentIdx, - std::function GetRegIdx, + std::function GetRegIdx, bool VOPD3) const; }; @@ -1599,7 +1600,7 @@ LLVM_READNONE MCRegister mc2PseudoReg(MCRegister Reg); LLVM_READNONE -bool isInlineValue(unsigned Reg); +bool isInlineValue(MCRegister Reg); /// Is this an AMDGPU specific source operand? These include registers, /// inline constants, literals and mandatory literals (KImm). @@ -1798,16 +1799,16 @@ bool isIntrinsicAlwaysUniform(unsigned IntrID); /// \returns a register class for the physical register \p Reg if it is a VGPR /// or nullptr otherwise. -const MCRegisterClass *getVGPRPhysRegClass(MCPhysReg Reg, +const MCRegisterClass *getVGPRPhysRegClass(MCRegister Reg, const MCRegisterInfo &MRI); /// \returns the MODE bits which have to be set by the S_SET_VGPR_MSB for the /// physical register \p Reg. -unsigned getVGPREncodingMSBs(MCPhysReg Reg, const MCRegisterInfo &MRI); +unsigned getVGPREncodingMSBs(MCRegister Reg, const MCRegisterInfo &MRI); /// If \p Reg is a low VGPR return a corresponding high VGPR with \p MSBs set. -MCPhysReg getVGPRWithMSBs(MCPhysReg Reg, unsigned MSBs, - const MCRegisterInfo &MRI); +MCRegister getVGPRWithMSBs(MCRegister Reg, unsigned MSBs, + const MCRegisterInfo &MRI); // Returns a table for the opcode with a given \p Desc to map the VGPR MSB // set by the S_SET_VGPR_MSB to one of 4 sources. In case of VOPD returns 2 diff --git a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp index f43ec73db7e1f..80494d993f425 100644 --- a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -51,7 +51,6 @@ #include #include #include -#include #include using namespace llvm; diff --git a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h index 72eb3d0f8b7f4..b6897608a952c 100644 --- a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -19,7 +19,6 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/Support/ErrorHandling.h" -#include namespace llvm { diff --git a/llvm/lib/Target/CSKY/CSKYFrameLowering.cpp b/llvm/lib/Target/CSKY/CSKYFrameLowering.cpp index e81bb4745faff..98798275e7979 100644 --- a/llvm/lib/Target/CSKY/CSKYFrameLowering.cpp +++ b/llvm/lib/Target/CSKY/CSKYFrameLowering.cpp @@ -476,7 +476,7 @@ bool CSKYFrameLowering::spillCalleeSavedRegisters( // Insert the spill to the stack frame. MCRegister Reg = CS.getReg(); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.storeRegToStackSlot(MBB, MI, Reg, true, CS.getFrameIdx(), RC, TRI, + TII.storeRegToStackSlot(MBB, MI, Reg, true, CS.getFrameIdx(), RC, Register()); } @@ -498,8 +498,7 @@ bool CSKYFrameLowering::restoreCalleeSavedRegisters( for (auto &CS : reverse(CSI)) { MCRegister Reg = CS.getReg(); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI, - Register()); + TII.loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, Register()); assert(MI != MBB.begin() && "loadRegFromStackSlot didn't insert any code!"); } diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp b/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp index 34a7de8d8ae96..3ab09902be3aa 100644 --- a/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp +++ b/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp @@ -24,8 +24,9 @@ using namespace llvm; #define GET_INSTRINFO_CTOR_DTOR #include "CSKYGenInstrInfo.inc" -CSKYInstrInfo::CSKYInstrInfo(const CSKYSubtarget &STI) - : CSKYGenInstrInfo(STI, RI, CSKY::ADJCALLSTACKDOWN, CSKY::ADJCALLSTACKUP), +CSKYInstrInfo::CSKYInstrInfo(const CSKYSubtarget &STI, + const CSKYRegisterInfo &TRI) + : CSKYGenInstrInfo(STI, TRI, CSKY::ADJCALLSTACKDOWN, CSKY::ADJCALLSTACKUP), STI(STI) { v2sf = STI.hasFPUv2SingleFloat(); v2df = STI.hasFPUv2DoubleFloat(); @@ -393,7 +394,6 @@ void CSKYInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register SrcReg, bool IsKill, int FI, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags) const { DebugLoc DL; @@ -434,10 +434,12 @@ void CSKYInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, .addMemOperand(MMO); } -void CSKYInstrInfo::loadRegFromStackSlot( - MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DestReg, - int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, - Register VReg, MachineInstr::MIFlag Flags) const { +void CSKYInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + Register DestReg, int FI, + const TargetRegisterClass *RC, + Register VReg, + MachineInstr::MIFlag Flags) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo.h b/llvm/lib/Target/CSKY/CSKYInstrInfo.h index 6451c0af14fc0..d1cd0395f3b95 100644 --- a/llvm/lib/Target/CSKY/CSKYInstrInfo.h +++ b/llvm/lib/Target/CSKY/CSKYInstrInfo.h @@ -21,6 +21,7 @@ namespace llvm { +class CSKYRegisterInfo; class CSKYSubtarget; class CSKYInstrInfo : public CSKYGenInstrInfo { @@ -33,7 +34,7 @@ class CSKYInstrInfo : public CSKYGenInstrInfo { const CSKYSubtarget &STI; public: - explicit CSKYInstrInfo(const CSKYSubtarget &STI); + CSKYInstrInfo(const CSKYSubtarget &STI, const CSKYRegisterInfo &RI); Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override; @@ -42,14 +43,12 @@ class CSKYInstrInfo : public CSKYGenInstrInfo { void storeRegToStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, - bool IsKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + bool IsKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; void loadRegFromStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, - int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, diff --git a/llvm/lib/Target/CSKY/CSKYSubtarget.cpp b/llvm/lib/Target/CSKY/CSKYSubtarget.cpp index a554d1c0e739b..94e412ec81725 100644 --- a/llvm/lib/Target/CSKY/CSKYSubtarget.cpp +++ b/llvm/lib/Target/CSKY/CSKYSubtarget.cpp @@ -92,7 +92,7 @@ CSKYSubtarget::CSKYSubtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU, StringRef FS, const TargetMachine &TM) : CSKYGenSubtargetInfo(TT, CPU, TuneCPU, FS), FrameLowering(initializeSubtargetDependencies(TT, CPU, TuneCPU, FS)), - InstrInfo(*this), RegInfo(), TLInfo(TM, *this) { + InstrInfo(*this, RegInfo), TLInfo(TM, *this) { TSInfo = std::make_unique(); } diff --git a/llvm/lib/Target/CSKY/CSKYSubtarget.h b/llvm/lib/Target/CSKY/CSKYSubtarget.h index a3f2ddcb7165b..f5ad26a20d8a5 100644 --- a/llvm/lib/Target/CSKY/CSKYSubtarget.h +++ b/llvm/lib/Target/CSKY/CSKYSubtarget.h @@ -30,8 +30,8 @@ class CSKYSubtarget : public CSKYGenSubtargetInfo { virtual void anchor(); CSKYFrameLowering FrameLowering; - CSKYInstrInfo InstrInfo; CSKYRegisterInfo RegInfo; + CSKYInstrInfo InstrInfo; CSKYTargetLowering TLInfo; std::unique_ptr TSInfo; diff --git a/llvm/lib/Target/Hexagon/HexagonBlockRanges.cpp b/llvm/lib/Target/Hexagon/HexagonBlockRanges.cpp index eca5ac140f3c3..bae3484eee1cb 100644 --- a/llvm/lib/Target/Hexagon/HexagonBlockRanges.cpp +++ b/llvm/lib/Target/Hexagon/HexagonBlockRanges.cpp @@ -24,7 +24,6 @@ #include #include #include -#include using namespace llvm; diff --git a/llvm/lib/Target/Hexagon/HexagonGenMux.cpp b/llvm/lib/Target/Hexagon/HexagonGenMux.cpp index 74e5abe2599c7..c6fffde84af58 100644 --- a/llvm/lib/Target/Hexagon/HexagonGenMux.cpp +++ b/llvm/lib/Target/Hexagon/HexagonGenMux.cpp @@ -43,7 +43,6 @@ #include #include #include -#include #define DEBUG_TYPE "hexmux" diff --git a/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp b/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp index 9c81e9638f8e2..5344ed8446efc 100644 --- a/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp +++ b/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp @@ -30,7 +30,6 @@ #include #include #include -#include #define DEBUG_TYPE "gen-pred" diff --git a/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp b/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp index 54f5608d460af..f375b25e4ceb8 100644 --- a/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp +++ b/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp @@ -34,7 +34,6 @@ #include "llvm/Support/raw_ostream.h" #include #include -#include using namespace llvm; using namespace rdf; diff --git a/llvm/lib/Target/Hexagon/RDFCopy.cpp b/llvm/lib/Target/Hexagon/RDFCopy.cpp index 3b1d3bd89680b..4cab5da7b1caf 100644 --- a/llvm/lib/Target/Hexagon/RDFCopy.cpp +++ b/llvm/lib/Target/Hexagon/RDFCopy.cpp @@ -26,7 +26,6 @@ #include "llvm/Support/raw_ostream.h" #include #include -#include using namespace llvm; using namespace rdf; diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index e1b37fd88f44d..9bbb3aad89c44 100644 --- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -89,7 +89,6 @@ #include #include #include -#include using namespace llvm; diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h index 98c873824bc1d..a2b75e4a42e76 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h @@ -11,7 +11,6 @@ #include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCFixup.h" -#include #undef RISCV diff --git a/llvm/lib/Target/SPIRV/SPIRVIRMapping.h b/llvm/lib/Target/SPIRV/SPIRVIRMapping.h index a329fd5ed9d29..c99d603d340ea 100644 --- a/llvm/lib/Target/SPIRV/SPIRVIRMapping.h +++ b/llvm/lib/Target/SPIRV/SPIRVIRMapping.h @@ -22,8 +22,6 @@ #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include - namespace llvm { namespace SPIRV { diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 5bce539c45341..fa3dce256046f 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -53412,7 +53412,7 @@ static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL, return SDValue(); // SrcVal must be a matching normal load further up the chain. - auto *Ld = dyn_cast(SrcVal); + auto *Ld = dyn_cast(peekThroughBitcasts(SrcVal)); if (!Ld || !ISD::isNormalLoad(Ld) || !Ld->isSimple() || Ld->getBasePtr() != St->getBasePtr() || Ld->getOffset() != St->getOffset() || diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 1b748b7355716..70564973816b1 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -3161,6 +3161,12 @@ multiclass avx512_mask_setop_w { defm KSET0 : avx512_mask_setop_w; defm KSET1 : avx512_mask_setop_w; +// 8-bit mask set operations for AVX512DQ +let Predicates = [HasDQI] in { + defm KSET0B : avx512_mask_setop; + defm KSET1B : avx512_mask_setop; +} + // With AVX-512 only, 8-bit mask is promoted to 16-bit mask. let Predicates = [HasAVX512] in { def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>; @@ -3173,6 +3179,34 @@ let Predicates = [HasAVX512] in { def : Pat<(v1i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK1)>; } +// With AVX512DQ, use 8-bit operations for 8-bit masks to avoid setting upper +// bits +let Predicates = [HasDQI] in { + def : Pat<(v8i1 immAllZerosV), (KSET0B)>; + def : Pat<(v8i1 immAllOnesV), (KSET1B)>; +} + +// Optimize bitconvert of all-ones constants to use kxnor instructions +let Predicates = [HasDQI] in { + def : Pat<(v8i1(bitconvert(i8 255))), (KSET1B)>; + def : Pat<(v16i1(bitconvert(i16 255))), (COPY_TO_REGCLASS(KSET1B), VK16)>; +} +let Predicates = [HasBWI] in { + def : Pat<(v32i1(bitconvert(i32 -1))), (KSET1D)>; + def : Pat<(v64i1(bitconvert(i64 -1))), (KSET1Q)>; +} +// Submask patterns: lower N bits set in larger mask registers +let Predicates = [HasBWI, HasDQI] in { + // v32i1 submasks + def : Pat<(v32i1(bitconvert(i32 255))), (COPY_TO_REGCLASS(KSET1B), VK32)>; + def : Pat<(v32i1(bitconvert(i32 65535))), (COPY_TO_REGCLASS(KSET1W), VK32)>; + // v64i1 submasks + def : Pat<(v64i1(bitconvert(i64 255))), (COPY_TO_REGCLASS(KSET1B), VK64)>; + def : Pat<(v64i1(bitconvert(i64 65535))), (COPY_TO_REGCLASS(KSET1W), VK64)>; + def : Pat<(v64i1(bitconvert(i64 4294967295))), (COPY_TO_REGCLASS(KSET1D), + VK64)>; +} + // Patterns for kmask insert_subvector/extract_subvector to/from index=0 multiclass operation_subvector_mask_lowering { diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 61d9608160197..cb0208a4a5f32 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -788,9 +788,11 @@ bool X86InstrInfo::isReMaterializableImpl( case X86::FsFLD0SS: case X86::FsFLD0SH: case X86::FsFLD0F128: + case X86::KSET0B: case X86::KSET0D: case X86::KSET0Q: case X86::KSET0W: + case X86::KSET1B: case X86::KSET1D: case X86::KSET1Q: case X86::KSET1W: @@ -6352,12 +6354,16 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { // registers, since it is not usable as a write mask. // FIXME: A more advanced approach would be to choose the best input mask // register based on context. + case X86::KSET0B: + return Expand2AddrKreg(MIB, get(X86::KXORBkk), X86::K0); case X86::KSET0W: return Expand2AddrKreg(MIB, get(X86::KXORWkk), X86::K0); case X86::KSET0D: return Expand2AddrKreg(MIB, get(X86::KXORDkk), X86::K0); case X86::KSET0Q: return Expand2AddrKreg(MIB, get(X86::KXORQkk), X86::K0); + case X86::KSET1B: + return Expand2AddrKreg(MIB, get(X86::KXNORBkk), X86::K0); case X86::KSET1W: return Expand2AddrKreg(MIB, get(X86::KXNORWkk), X86::K0); case X86::KSET1D: diff --git a/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp b/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp index d0d897e6784d3..829a32eb37118 100644 --- a/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp +++ b/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp @@ -54,7 +54,6 @@ #include #include #include -#include using namespace llvm; diff --git a/llvm/lib/Transforms/Scalar/LoopDistribute.cpp b/llvm/lib/Transforms/Scalar/LoopDistribute.cpp index 1099aa335e4c5..0c8b9043fcbbb 100644 --- a/llvm/lib/Transforms/Scalar/LoopDistribute.cpp +++ b/llvm/lib/Transforms/Scalar/LoopDistribute.cpp @@ -65,7 +65,6 @@ #include #include #include -#include using namespace llvm; diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 0bc5964132b72..20cfbe460a351 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -63,7 +63,6 @@ #include #include #include -#include #include using namespace llvm; diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp index 94dfd3a974923..5b94897f4342f 100644 --- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -66,7 +66,6 @@ #include "llvm/Transforms/Utils/ValueMapper.h" #include #include -#include #include namespace llvm { diff --git a/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp b/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp index ca90bb65f5708..1e614bd29ee6e 100644 --- a/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp @@ -53,7 +53,6 @@ #include "llvm/Transforms/Utils/ValueMapper.h" #include #include -#include #include using namespace llvm; diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 3840b464e6c2c..72858e1265d86 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1787,6 +1787,12 @@ class LLVM_ABI_FOR_TEST VPWidenGEPRecipe : public VPRecipeWithIRFlags { return getOperand(I + 1)->isDefinedOutsideLoopRegions(); } + bool areAllOperandsInvariant() const { + return all_of(operands(), [](VPValue *Op) { + return Op->isDefinedOutsideLoopRegions(); + }); + } + public: VPWidenGEPRecipe(GetElementPtrInst *GEP, ArrayRef Operands) : VPRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, *GEP), diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index ba145ffa0b681..9a5591feb3d05 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -2523,32 +2523,51 @@ void VPWidenGEPRecipe::execute(VPTransformState &State) { // is vector-typed. Thus, to keep the representation compact, we only use // vector-typed operands for loop-varying values. - assert( - any_of(operands(), - [](VPValue *Op) { return !Op->isDefinedOutsideLoopRegions(); }) && - "Expected at least one loop-variant operand"); - - // If the GEP has at least one loop-varying operand, we are sure to - // produce a vector of pointers unless VF is scalar. - // The pointer operand of the new GEP. If it's loop-invariant, we - // won't broadcast it. - auto *Ptr = State.get(getOperand(0), isPointerLoopInvariant()); - - // Collect all the indices for the new GEP. If any index is - // loop-invariant, we won't broadcast it. - SmallVector Indices; - for (unsigned I = 1, E = getNumOperands(); I < E; I++) { - VPValue *Operand = getOperand(I); - Indices.push_back(State.get(Operand, isIndexLoopInvariant(I - 1))); - } - - // Create the new GEP. Note that this GEP may be a scalar if VF == 1, - // but it should be a vector, otherwise. - auto *NewGEP = State.Builder.CreateGEP(getSourceElementType(), Ptr, Indices, - "", getGEPNoWrapFlags()); - assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) && - "NewGEP is not a pointer vector"); - State.set(this, NewGEP); + if (areAllOperandsInvariant()) { + // If we are vectorizing, but the GEP has only loop-invariant operands, + // the GEP we build (by only using vector-typed operands for + // loop-varying values) would be a scalar pointer. Thus, to ensure we + // produce a vector of pointers, we need to either arbitrarily pick an + // operand to broadcast, or broadcast a clone of the original GEP. + // Here, we broadcast a clone of the original. + // + // TODO: If at some point we decide to scalarize instructions having + // loop-invariant operands, this special case will no longer be + // required. We would add the scalarization decision to + // collectLoopScalars() and teach getVectorValue() to broadcast + // the lane-zero scalar value. + SmallVector Ops; + for (unsigned I = 0, E = getNumOperands(); I != E; I++) + Ops.push_back(State.get(getOperand(I), VPLane(0))); + + auto *NewGEP = + State.Builder.CreateGEP(getSourceElementType(), Ops[0], drop_begin(Ops), + "", getGEPNoWrapFlags()); + Value *Splat = State.Builder.CreateVectorSplat(State.VF, NewGEP); + State.set(this, Splat); + } else { + // If the GEP has at least one loop-varying operand, we are sure to + // produce a vector of pointers unless VF is scalar. + // The pointer operand of the new GEP. If it's loop-invariant, we + // won't broadcast it. + auto *Ptr = State.get(getOperand(0), isPointerLoopInvariant()); + + // Collect all the indices for the new GEP. If any index is + // loop-invariant, we won't broadcast it. + SmallVector Indices; + for (unsigned I = 1, E = getNumOperands(); I < E; I++) { + VPValue *Operand = getOperand(I); + Indices.push_back(State.get(Operand, isIndexLoopInvariant(I - 1))); + } + + // Create the new GEP. Note that this GEP may be a scalar if VF == 1, + // but it should be a vector, otherwise. + auto *NewGEP = State.Builder.CreateGEP(getSourceElementType(), Ptr, Indices, + "", getGEPNoWrapFlags()); + assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) && + "NewGEP is not a pointer vector"); + State.set(this, NewGEP); + } } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 10afd006c90c9..1b9bc4cc45163 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1387,8 +1387,7 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) { for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly( vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry()))) { for (VPRecipeBase &R : make_early_inc_range(reverse(*VPBB))) { - if (!isa(&R)) + if (!isa(&R)) continue; auto *RepR = dyn_cast(&R); if (RepR && (RepR->isSingleScalar() || RepR->isPredicated())) diff --git a/llvm/test/CodeGen/AArch64/pr166870.ll b/llvm/test/CodeGen/AArch64/pr166870.ll new file mode 100644 index 0000000000000..dc23f51987635 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/pr166870.ll @@ -0,0 +1,68 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -O3 < %s -mtriple=aarch64 | FileCheck %s + +; The seemingly redundant mov where src_reg == dst_reg shouldn't be removed, +; because it has the effect of zeroing the upper bits in x8. + +define i32 @ham(i32 %arg, i1 %arg1, i1 %arg2, ptr %arg3) nounwind { +; CHECK-LABEL: ham: +; CHECK: // %bb.0: // %bb +; CHECK-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: tbnz w1, #0, .LBB0_3 +; CHECK-NEXT: // %bb.1: // %bb4 +; CHECK-NEXT: tbnz w2, #0, .LBB0_3 +; CHECK-NEXT: // %bb.2: // %bb5 +; CHECK-NEXT: mov x19, x3 +; CHECK-NEXT: mov w21, w1 +; CHECK-NEXT: mov w20, w0 +; CHECK-NEXT: bl zot +; CHECK-NEXT: tbz w21, #0, .LBB0_4 +; CHECK-NEXT: .LBB0_3: // %bb6 +; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_4: +; CHECK-NEXT: mov w8, w20 +; CHECK-NEXT: mov w20, wzr +; CHECK-NEXT: mov w8, w8 +; CHECK-NEXT: mov w21, w8 +; CHECK-NEXT: .LBB0_5: // %bb7 +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: strb w20, [x19] +; CHECK-NEXT: cbnz x21, .LBB0_5 +; CHECK-NEXT: // %bb.6: // %bb8 +; CHECK-NEXT: // in Loop: Header=BB0_5 Depth=1 +; CHECK-NEXT: bl quux +; CHECK-NEXT: b .LBB0_5 +bb: + br i1 %arg1, label %bb6, label %bb4 + +bb4: + %load = load ptr, ptr null, align 8 + br i1 %arg2, label %bb6, label %bb5 + +bb5: + %call = call i32 @zot() #0 + %zext = zext i32 %arg to i64 + br i1 %arg1, label %bb6, label %bb7 + +bb6: + ret i32 0 + +bb7: + store i8 0, ptr %arg3, align 1 + %icmp = icmp eq i64 %zext, 0 + br i1 %icmp, label %bb8, label %bb7 + +bb8: + call void @quux() + br label %bb7 +} + +declare i32 @zot() + +declare void @quux() + +attributes #0 = { returns_twice } diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-fp-nosave.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-fp-nosave.ll index 881cf977eab90..16ba79403cef2 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-fp-nosave.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-fp-nosave.ll @@ -52,16 +52,14 @@ define amdgpu_cs_chain void @test_alloca_var_uniform(i32 inreg %count) { ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: s_mov_b32 s32, 16 -; GFX12-NEXT: s_lshl_b32 s0, s0, 2 +; GFX12-NEXT: s_lshl2_add_u32 s0, s0, 15 ; GFX12-NEXT: v_mov_b32_e32 v0, 0 ; GFX12-NEXT: s_wait_alu 0xfffe -; GFX12-NEXT: s_add_co_i32 s0, s0, 15 -; GFX12-NEXT: s_mov_b32 s1, s32 -; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_and_b32 s0, s0, -16 -; GFX12-NEXT: scratch_store_b32 off, v0, s1 +; GFX12-NEXT: s_mov_b32 s1, s32 ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_lshl_b32 s0, s0, 5 +; GFX12-NEXT: scratch_store_b32 off, v0, s1 ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_add_co_i32 s32, s1, s0 ; GFX12-NEXT: s_endpgm @@ -70,8 +68,7 @@ define amdgpu_cs_chain void @test_alloca_var_uniform(i32 inreg %count) { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: s_mov_b32 s32, 16 -; GFX942-NEXT: s_lshl_b32 s0, s0, 2 -; GFX942-NEXT: s_add_i32 s0, s0, 15 +; GFX942-NEXT: s_lshl2_add_u32 s0, s0, 15 ; GFX942-NEXT: s_and_b32 s0, s0, -16 ; GFX942-NEXT: v_mov_b32_e32 v0, 0 ; GFX942-NEXT: s_lshl_b32 s0, s0, 6 @@ -211,18 +208,16 @@ define amdgpu_cs_chain void @test_alloca_and_call_var_uniform(i32 inreg %count) ; GFX12-NEXT: s_add_co_u32 s2, s2, foo@gotpcrel32@lo+12 ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_add_co_ci_u32 s3, s3, foo@gotpcrel32@hi+24 -; GFX12-NEXT: s_lshl_b32 s0, s0, 2 +; GFX12-NEXT: s_lshl2_add_u32 s0, s0, 15 ; GFX12-NEXT: s_load_b64 s[2:3], s[2:3], 0x0 -; GFX12-NEXT: s_add_co_i32 s0, s0, 15 ; GFX12-NEXT: v_mov_b32_e32 v0, 0 -; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_and_b32 s0, s0, -16 ; GFX12-NEXT: s_mov_b32 s1, s32 ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_lshl_b32 s0, s0, 5 -; GFX12-NEXT: scratch_store_b32 off, v0, s1 ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_add_co_i32 s32, s1, s0 +; GFX12-NEXT: scratch_store_b32 off, v0, s1 ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_swappc_b64 s[30:31], s[2:3] @@ -232,8 +227,7 @@ define amdgpu_cs_chain void @test_alloca_and_call_var_uniform(i32 inreg %count) ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: s_mov_b32 s32, 16 -; GFX942-NEXT: s_lshl_b32 s0, s0, 2 -; GFX942-NEXT: s_add_i32 s0, s0, 15 +; GFX942-NEXT: s_lshl2_add_u32 s0, s0, 15 ; GFX942-NEXT: s_and_b32 s0, s0, -16 ; GFX942-NEXT: s_lshl_b32 s2, s0, 6 ; GFX942-NEXT: s_getpc_b64 s[0:1] @@ -395,15 +389,13 @@ define amdgpu_cs_chain void @test_call_and_alloca_var_uniform(i32 inreg %count) ; GFX12-NEXT: s_add_co_u32 s2, s2, foo@gotpcrel32@lo+12 ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_add_co_ci_u32 s3, s3, foo@gotpcrel32@hi+24 -; GFX12-NEXT: s_lshl_b32 s0, s0, 2 +; GFX12-NEXT: s_lshl2_add_u32 s0, s0, 15 ; GFX12-NEXT: s_load_b64 s[2:3], s[2:3], 0x0 -; GFX12-NEXT: s_add_co_i32 s0, s0, 15 -; GFX12-NEXT: s_mov_b32 s4, s32 -; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_and_b32 s0, s0, -16 -; GFX12-NEXT: v_mov_b32_e32 v40, 0 +; GFX12-NEXT: s_mov_b32 s4, s32 ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_lshl_b32 s0, s0, 5 +; GFX12-NEXT: v_mov_b32_e32 v40, 0 ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_add_co_i32 s32, s4, s0 ; GFX12-NEXT: s_wait_kmcnt 0x0 @@ -416,8 +408,7 @@ define amdgpu_cs_chain void @test_call_and_alloca_var_uniform(i32 inreg %count) ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: s_mov_b32 s32, 16 -; GFX942-NEXT: s_lshl_b32 s0, s0, 2 -; GFX942-NEXT: s_add_i32 s0, s0, 15 +; GFX942-NEXT: s_lshl2_add_u32 s0, s0, 15 ; GFX942-NEXT: s_and_b32 s0, s0, -16 ; GFX942-NEXT: s_lshl_b32 s2, s0, 6 ; GFX942-NEXT: s_getpc_b64 s[0:1] diff --git a/llvm/test/CodeGen/AMDGPU/atomic_cmp_swap_local.ll b/llvm/test/CodeGen/AMDGPU/atomic_cmp_swap_local.ll index 9a4040a25419a..49977a4c64784 100644 --- a/llvm/test/CodeGen/AMDGPU/atomic_cmp_swap_local.ll +++ b/llvm/test/CodeGen/AMDGPU/atomic_cmp_swap_local.ll @@ -265,8 +265,7 @@ define amdgpu_kernel void @lds_atomic_cmpxchg_ret_i32_bad_si_offset(ptr addrspac ; GFX9-NEXT: v_mov_b32_e32 v0, 7 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_sub_i32 s2, s2, s3 -; GFX9-NEXT: s_lshl_b32 s2, s2, 2 -; GFX9-NEXT: s_add_i32 s0, s0, s2 +; GFX9-NEXT: s_lshl2_add_u32 s0, s2, s0 ; GFX9-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-NEXT: v_mov_b32_e32 v2, s1 ; GFX9-NEXT: ds_cmpst_rtn_b32 v0, v1, v0, v2 offset:16 @@ -282,9 +281,8 @@ define amdgpu_kernel void @lds_atomic_cmpxchg_ret_i32_bad_si_offset(ptr addrspac ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_sub_i32 s2, s2, s3 ; GFX11-NEXT: v_mov_b32_e32 v2, s1 -; GFX11-NEXT: s_lshl_b32 s2, s2, 2 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX11-NEXT: s_add_i32 s0, s0, s2 +; GFX11-NEXT: s_lshl2_add_u32 s0, s2, s0 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: v_mov_b32_e32 v1, s0 ; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/dynamic_stackalloc.ll b/llvm/test/CodeGen/AMDGPU/dynamic_stackalloc.ll index ed767aeaf112f..6dad2258781b3 100644 --- a/llvm/test/CodeGen/AMDGPU/dynamic_stackalloc.ll +++ b/llvm/test/CodeGen/AMDGPU/dynamic_stackalloc.ll @@ -13,8 +13,7 @@ define amdgpu_kernel void @test_dynamic_stackalloc_kernel_uniform(i32 %n) { ; GFX9-SDAG-NEXT: s_movk_i32 s32, 0x400 ; GFX9-SDAG-NEXT: s_mov_b32 s5, s32 ; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-SDAG-NEXT: s_lshl_b32 s4, s4, 2 -; GFX9-SDAG-NEXT: s_add_i32 s4, s4, 15 +; GFX9-SDAG-NEXT: s_lshl2_add_u32 s4, s4, 15 ; GFX9-SDAG-NEXT: s_and_b32 s4, s4, -16 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX9-SDAG-NEXT: s_lshl_b32 s4, s4, 6 @@ -53,12 +52,11 @@ define amdgpu_kernel void @test_dynamic_stackalloc_kernel_uniform(i32 %n) { ; GFX11-SDAG-NEXT: scratch_store_b32 off, v0, s1 dlc ; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-NEXT: s_lshl_b32 s0, s0, 2 +; GFX11-SDAG-NEXT: s_lshl2_add_u32 s0, s0, 15 ; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX11-SDAG-NEXT: s_add_i32 s0, s0, 15 ; GFX11-SDAG-NEXT: s_and_b32 s0, s0, -16 -; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX11-SDAG-NEXT: s_lshl_b32 s0, s0, 5 +; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-SDAG-NEXT: s_add_i32 s32, s1, s0 ; GFX11-SDAG-NEXT: s_endpgm ; @@ -88,13 +86,12 @@ define amdgpu_kernel void @test_dynamic_stackalloc_kernel_uniform_over_aligned(i ; GFX9-SDAG-LABEL: test_dynamic_stackalloc_kernel_uniform_over_aligned: ; GFX9-SDAG: ; %bb.0: ; GFX9-SDAG-NEXT: s_load_dword s4, s[8:9], 0x0 -; GFX9-SDAG-NEXT: s_add_u32 s0, s0, s17 ; GFX9-SDAG-NEXT: s_movk_i32 s32, 0x2000 +; GFX9-SDAG-NEXT: s_add_u32 s0, s0, s17 ; GFX9-SDAG-NEXT: s_addc_u32 s1, s1, 0 ; GFX9-SDAG-NEXT: s_add_i32 s5, s32, 0x1fff ; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-SDAG-NEXT: s_lshl_b32 s4, s4, 2 -; GFX9-SDAG-NEXT: s_add_i32 s4, s4, 15 +; GFX9-SDAG-NEXT: s_lshl2_add_u32 s4, s4, 15 ; GFX9-SDAG-NEXT: s_and_b32 s5, s5, 0xffffe000 ; GFX9-SDAG-NEXT: s_and_b32 s4, s4, -16 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 10 @@ -137,12 +134,11 @@ define amdgpu_kernel void @test_dynamic_stackalloc_kernel_uniform_over_aligned(i ; GFX11-SDAG-NEXT: scratch_store_b32 off, v0, s1 dlc ; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-NEXT: s_lshl_b32 s0, s0, 2 +; GFX11-SDAG-NEXT: s_lshl2_add_u32 s0, s0, 15 ; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX11-SDAG-NEXT: s_add_i32 s0, s0, 15 ; GFX11-SDAG-NEXT: s_and_b32 s0, s0, -16 -; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX11-SDAG-NEXT: s_lshl_b32 s0, s0, 5 +; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-SDAG-NEXT: s_add_i32 s32, s1, s0 ; GFX11-SDAG-NEXT: s_endpgm ; @@ -178,8 +174,7 @@ define amdgpu_kernel void @test_dynamic_stackalloc_kernel_uniform_under_aligned( ; GFX9-SDAG-NEXT: s_movk_i32 s32, 0x400 ; GFX9-SDAG-NEXT: s_mov_b32 s5, s32 ; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-SDAG-NEXT: s_lshl_b32 s4, s4, 2 -; GFX9-SDAG-NEXT: s_add_i32 s4, s4, 15 +; GFX9-SDAG-NEXT: s_lshl2_add_u32 s4, s4, 15 ; GFX9-SDAG-NEXT: s_and_b32 s4, s4, -16 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 22 ; GFX9-SDAG-NEXT: s_lshl_b32 s4, s4, 6 @@ -218,12 +213,11 @@ define amdgpu_kernel void @test_dynamic_stackalloc_kernel_uniform_under_aligned( ; GFX11-SDAG-NEXT: scratch_store_b32 off, v0, s1 dlc ; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-NEXT: s_lshl_b32 s0, s0, 2 +; GFX11-SDAG-NEXT: s_lshl2_add_u32 s0, s0, 15 ; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX11-SDAG-NEXT: s_add_i32 s0, s0, 15 ; GFX11-SDAG-NEXT: s_and_b32 s0, s0, -16 -; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX11-SDAG-NEXT: s_lshl_b32 s0, s0, 5 +; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-SDAG-NEXT: s_add_i32 s32, s1, s0 ; GFX11-SDAG-NEXT: s_endpgm ; @@ -367,26 +361,26 @@ define amdgpu_kernel void @test_dynamic_stackalloc_kernel_divergent() { define amdgpu_kernel void @test_dynamic_stackalloc_kernel_divergent_over_aligned() { ; GFX9-SDAG-LABEL: test_dynamic_stackalloc_kernel_divergent_over_aligned: ; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_movk_i32 s32, 0x2000 ; GFX9-SDAG-NEXT: s_add_u32 s0, s0, s17 -; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX9-SDAG-NEXT: s_addc_u32 s1, s1, 0 +; GFX9-SDAG-NEXT: s_add_i32 s4, s32, 0x1fff +; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 +; GFX9-SDAG-NEXT: s_and_b32 s6, s4, 0xffffe000 ; GFX9-SDAG-NEXT: v_and_b32_e32 v0, 0x1ff0, v0 ; GFX9-SDAG-NEXT: s_mov_b64 s[4:5], exec -; GFX9-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX9-SDAG-NEXT: s_mov_b32 s7, 0 ; GFX9-SDAG-NEXT: s_mov_b32 s33, 0 -; GFX9-SDAG-NEXT: s_movk_i32 s32, 0x2000 ; GFX9-SDAG-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 -; GFX9-SDAG-NEXT: s_ff1_i32_b64 s7, s[4:5] -; GFX9-SDAG-NEXT: v_readlane_b32 s8, v0, s7 -; GFX9-SDAG-NEXT: s_bitset0_b64 s[4:5], s7 -; GFX9-SDAG-NEXT: s_max_u32 s6, s6, s8 +; GFX9-SDAG-NEXT: s_ff1_i32_b64 s8, s[4:5] +; GFX9-SDAG-NEXT: v_readlane_b32 s9, v0, s8 +; GFX9-SDAG-NEXT: s_bitset0_b64 s[4:5], s8 +; GFX9-SDAG-NEXT: s_max_u32 s7, s7, s9 ; GFX9-SDAG-NEXT: s_cmp_lg_u64 s[4:5], 0 ; GFX9-SDAG-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX9-SDAG-NEXT: ; %bb.2: -; GFX9-SDAG-NEXT: s_add_i32 s4, s32, 0x1fff -; GFX9-SDAG-NEXT: s_and_b32 s4, s4, 0xffffe000 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s4 -; GFX9-SDAG-NEXT: v_lshl_add_u32 v1, s6, 6, v0 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s6 +; GFX9-SDAG-NEXT: v_lshl_add_u32 v1, s7, 6, v0 ; GFX9-SDAG-NEXT: v_readfirstlane_b32 s32, v1 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, 0x1bc ; GFX9-SDAG-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen @@ -609,8 +603,7 @@ define amdgpu_kernel void @test_dynamic_stackalloc_kernel_multiple_allocas(i32 % ; GFX9-SDAG-NEXT: s_movk_i32 s32, 0x2000 ; GFX9-SDAG-NEXT: s_cbranch_scc1 .LBB6_4 ; GFX9-SDAG-NEXT: ; %bb.1: ; %bb.0 -; GFX9-SDAG-NEXT: s_lshl_b32 s5, s5, 2 -; GFX9-SDAG-NEXT: s_add_i32 s5, s5, 15 +; GFX9-SDAG-NEXT: s_lshl2_add_u32 s5, s5, 15 ; GFX9-SDAG-NEXT: s_add_i32 s6, s32, 0xfff ; GFX9-SDAG-NEXT: s_and_b32 s5, s5, -16 ; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 @@ -639,8 +632,7 @@ define amdgpu_kernel void @test_dynamic_stackalloc_kernel_multiple_allocas(i32 % ; GFX9-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) ; GFX9-SDAG-NEXT: .LBB6_4: ; %bb.1 -; GFX9-SDAG-NEXT: s_lshl_b32 s4, s4, 2 -; GFX9-SDAG-NEXT: s_add_i32 s4, s4, 15 +; GFX9-SDAG-NEXT: s_lshl2_add_u32 s4, s4, 15 ; GFX9-SDAG-NEXT: s_and_b32 s4, s4, -16 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 1 ; GFX9-SDAG-NEXT: s_lshl_b32 s4, s4, 6 @@ -719,20 +711,17 @@ define amdgpu_kernel void @test_dynamic_stackalloc_kernel_multiple_allocas(i32 % ; GFX11-SDAG-NEXT: s_cbranch_scc1 .LBB6_4 ; GFX11-SDAG-NEXT: ; %bb.1: ; %bb.0 ; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX11-SDAG-NEXT: s_lshl_b32 s1, s1, 2 +; GFX11-SDAG-NEXT: s_lshl2_add_u32 s1, s1, 15 ; GFX11-SDAG-NEXT: s_add_i32 s3, s32, 0x7ff -; GFX11-SDAG-NEXT: s_add_i32 s1, s1, 15 -; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(SALU_CYCLE_1) ; GFX11-SDAG-NEXT: s_and_b32 s4, s1, -16 -; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX11-SDAG-NEXT: s_and_b32 s1, s3, 0xfffff800 +; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX11-SDAG-NEXT: s_lshl_b32 s3, s4, 5 +; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX11-SDAG-NEXT: s_add_i32 s32, s1, s3 -; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x1ff0, v0 ; GFX11-SDAG-NEXT: s_mov_b32 s3, exec_lo +; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x1ff0, v0 ; GFX11-SDAG-NEXT: .LBB6_2: ; =>This Inner Loop Header: Depth=1 -; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-SDAG-NEXT: s_ctz_i32_b32 s4, s3 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX11-SDAG-NEXT: v_readlane_b32 s5, v0, s4 @@ -750,18 +739,16 @@ define amdgpu_kernel void @test_dynamic_stackalloc_kernel_multiple_allocas(i32 % ; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: v_readfirstlane_b32 s32, v0 ; GFX11-SDAG-NEXT: .LBB6_4: ; %bb.1 -; GFX11-SDAG-NEXT: s_lshl_b32 s0, s0, 2 +; GFX11-SDAG-NEXT: s_lshl2_add_u32 s0, s0, 15 ; GFX11-SDAG-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2 -; GFX11-SDAG-NEXT: s_add_i32 s0, s0, 15 +; GFX11-SDAG-NEXT: s_and_b32 s0, s0, -16 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-SDAG-NEXT: s_mov_b32 s1, s32 -; GFX11-SDAG-NEXT: s_and_b32 s0, s0, -16 +; GFX11-SDAG-NEXT: s_lshl_b32 s0, s0, 5 ; GFX11-SDAG-NEXT: scratch_store_b32 off, v0, s33 dlc ; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: scratch_store_b32 off, v1, s1 dlc ; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-SDAG-NEXT: s_lshl_b32 s0, s0, 5 -; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-SDAG-NEXT: s_add_i32 s32, s1, s0 ; GFX11-SDAG-NEXT: s_endpgm ; @@ -866,9 +853,8 @@ define amdgpu_kernel void @test_dynamic_stackalloc_kernel_control_flow(i32 %n, i ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) ; GFX9-SDAG-NEXT: s_cbranch_execnz .LBB7_5 ; GFX9-SDAG-NEXT: .LBB7_4: ; %bb.0 -; GFX9-SDAG-NEXT: s_lshl_b32 s5, s5, 2 ; GFX9-SDAG-NEXT: s_add_i32 s4, s32, 0xfff -; GFX9-SDAG-NEXT: s_add_i32 s5, s5, 15 +; GFX9-SDAG-NEXT: s_lshl2_add_u32 s5, s5, 15 ; GFX9-SDAG-NEXT: s_and_b32 s4, s4, 0xfffff000 ; GFX9-SDAG-NEXT: s_and_b32 s5, s5, -16 ; GFX9-SDAG-NEXT: s_lshl_b32 s5, s5, 6 @@ -964,16 +950,15 @@ define amdgpu_kernel void @test_dynamic_stackalloc_kernel_control_flow(i32 %n, i ; GFX11-SDAG-NEXT: v_readfirstlane_b32 s32, v0 ; GFX11-SDAG-NEXT: s_cbranch_execnz .LBB7_5 ; GFX11-SDAG-NEXT: .LBB7_4: ; %bb.0 -; GFX11-SDAG-NEXT: s_lshl_b32 s0, s1, 2 +; GFX11-SDAG-NEXT: s_lshl2_add_u32 s1, s1, 15 ; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, 2 -; GFX11-SDAG-NEXT: s_add_i32 s0, s0, 15 -; GFX11-SDAG-NEXT: s_add_i32 s1, s32, 0x7ff -; GFX11-SDAG-NEXT: s_and_b32 s0, s0, -16 -; GFX11-SDAG-NEXT: s_and_b32 s1, s1, 0xfffff800 -; GFX11-SDAG-NEXT: s_lshl_b32 s0, s0, 5 -; GFX11-SDAG-NEXT: scratch_store_b32 off, v0, s1 dlc +; GFX11-SDAG-NEXT: s_add_i32 s0, s32, 0x7ff +; GFX11-SDAG-NEXT: s_and_b32 s1, s1, -16 +; GFX11-SDAG-NEXT: s_and_b32 s0, s0, 0xfffff800 +; GFX11-SDAG-NEXT: s_lshl_b32 s1, s1, 5 +; GFX11-SDAG-NEXT: scratch_store_b32 off, v0, s0 dlc ; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-SDAG-NEXT: s_add_i32 s32, s1, s0 +; GFX11-SDAG-NEXT: s_add_i32 s32, s0, s1 ; GFX11-SDAG-NEXT: .LBB7_5: ; %bb.2 ; GFX11-SDAG-NEXT: s_endpgm ; GFX11-SDAG-NEXT: .LBB7_6: @@ -1173,35 +1158,35 @@ define void @test_dynamic_stackalloc_device_uniform_over_aligned(i32 %n) { ; GFX9-SDAG-LABEL: test_dynamic_stackalloc_device_uniform_over_aligned: ; GFX9-SDAG: ; %bb.0: ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: s_mov_b32 s9, s33 +; GFX9-SDAG-NEXT: s_mov_b32 s10, s33 ; GFX9-SDAG-NEXT: s_add_i32 s33, s32, 0x1fc0 -; GFX9-SDAG-NEXT: s_mov_b32 s10, s34 +; GFX9-SDAG-NEXT: s_mov_b32 s11, s34 ; GFX9-SDAG-NEXT: s_and_b32 s33, s33, 0xffffe000 ; GFX9-SDAG-NEXT: s_mov_b32 s34, s32 +; GFX9-SDAG-NEXT: s_addk_i32 s32, 0x4000 +; GFX9-SDAG-NEXT: s_add_i32 s4, s32, 0x1fff ; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 +; GFX9-SDAG-NEXT: s_and_b32 s6, s4, 0xffffe000 ; GFX9-SDAG-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX9-SDAG-NEXT: s_mov_b64 s[4:5], exec -; GFX9-SDAG-NEXT: s_mov_b32 s6, 0 -; GFX9-SDAG-NEXT: s_addk_i32 s32, 0x4000 +; GFX9-SDAG-NEXT: s_mov_b32 s7, 0 ; GFX9-SDAG-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 -; GFX9-SDAG-NEXT: s_ff1_i32_b64 s7, s[4:5] -; GFX9-SDAG-NEXT: v_readlane_b32 s8, v0, s7 -; GFX9-SDAG-NEXT: s_bitset0_b64 s[4:5], s7 -; GFX9-SDAG-NEXT: s_max_u32 s6, s6, s8 +; GFX9-SDAG-NEXT: s_ff1_i32_b64 s8, s[4:5] +; GFX9-SDAG-NEXT: v_readlane_b32 s9, v0, s8 +; GFX9-SDAG-NEXT: s_bitset0_b64 s[4:5], s8 +; GFX9-SDAG-NEXT: s_max_u32 s7, s7, s9 ; GFX9-SDAG-NEXT: s_cmp_lg_u64 s[4:5], 0 ; GFX9-SDAG-NEXT: s_cbranch_scc1 .LBB9_1 ; GFX9-SDAG-NEXT: ; %bb.2: -; GFX9-SDAG-NEXT: s_add_i32 s4, s32, 0x1fff -; GFX9-SDAG-NEXT: s_and_b32 s4, s4, 0xffffe000 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s4 -; GFX9-SDAG-NEXT: v_lshl_add_u32 v1, s6, 6, v0 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s6 +; GFX9-SDAG-NEXT: v_lshl_add_u32 v1, s7, 6, v0 ; GFX9-SDAG-NEXT: v_readfirstlane_b32 s32, v1 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, 10 ; GFX9-SDAG-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) ; GFX9-SDAG-NEXT: s_mov_b32 s32, s34 -; GFX9-SDAG-NEXT: s_mov_b32 s34, s10 -; GFX9-SDAG-NEXT: s_mov_b32 s33, s9 +; GFX9-SDAG-NEXT: s_mov_b32 s34, s11 +; GFX9-SDAG-NEXT: s_mov_b32 s33, s10 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-GISEL-LABEL: test_dynamic_stackalloc_device_uniform_over_aligned: @@ -1241,37 +1226,36 @@ define void @test_dynamic_stackalloc_device_uniform_over_aligned(i32 %n) { ; GFX11-SDAG-LABEL: test_dynamic_stackalloc_device_uniform_over_aligned: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_mov_b32 s4, s33 +; GFX11-SDAG-NEXT: s_mov_b32 s5, s33 ; GFX11-SDAG-NEXT: s_add_i32 s33, s32, 0x7f -; GFX11-SDAG-NEXT: s_mov_b32 s5, s34 +; GFX11-SDAG-NEXT: s_mov_b32 s6, s34 ; GFX11-SDAG-NEXT: s_and_b32 s33, s33, 0xffffff80 ; GFX11-SDAG-NEXT: s_mov_b32 s34, s32 ; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 -; GFX11-SDAG-NEXT: s_mov_b32 s1, exec_lo -; GFX11-SDAG-NEXT: s_mov_b32 s0, 0 ; GFX11-SDAG-NEXT: s_addk_i32 s32, 0x100 -; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: s_mov_b32 s2, exec_lo +; GFX11-SDAG-NEXT: s_add_i32 s0, s32, 0xfff +; GFX11-SDAG-NEXT: s_mov_b32 s1, 0 ; GFX11-SDAG-NEXT: v_and_b32_e32 v0, -16, v0 +; GFX11-SDAG-NEXT: s_and_b32 s0, s0, 0xfffff000 ; GFX11-SDAG-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 -; GFX11-SDAG-NEXT: s_ctz_i32_b32 s2, s1 +; GFX11-SDAG-NEXT: s_ctz_i32_b32 s3, s2 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) -; GFX11-SDAG-NEXT: v_readlane_b32 s3, v0, s2 -; GFX11-SDAG-NEXT: s_bitset0_b32 s1, s2 -; GFX11-SDAG-NEXT: s_max_u32 s0, s0, s3 -; GFX11-SDAG-NEXT: s_cmp_lg_u32 s1, 0 +; GFX11-SDAG-NEXT: v_readlane_b32 s4, v0, s3 +; GFX11-SDAG-NEXT: s_bitset0_b32 s2, s3 +; GFX11-SDAG-NEXT: s_max_u32 s1, s1, s4 +; GFX11-SDAG-NEXT: s_cmp_lg_u32 s2, 0 ; GFX11-SDAG-NEXT: s_cbranch_scc1 .LBB9_1 ; GFX11-SDAG-NEXT: ; %bb.2: -; GFX11-SDAG-NEXT: s_add_i32 s1, s32, 0xfff +; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, s1, 5, s0 ; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 10 -; GFX11-SDAG-NEXT: s_and_b32 s1, s1, 0xfffff000 -; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, s0, 5, s1 -; GFX11-SDAG-NEXT: scratch_store_b32 off, v1, s1 dlc -; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-SDAG-NEXT: v_readfirstlane_b32 s32, v0 +; GFX11-SDAG-NEXT: scratch_store_b32 off, v1, s0 dlc +; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: s_mov_b32 s32, s34 -; GFX11-SDAG-NEXT: s_mov_b32 s34, s5 -; GFX11-SDAG-NEXT: s_mov_b32 s33, s4 +; GFX11-SDAG-NEXT: s_mov_b32 s34, s6 +; GFX11-SDAG-NEXT: s_mov_b32 s33, s5 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-GISEL-LABEL: test_dynamic_stackalloc_device_uniform_over_aligned: @@ -1861,20 +1845,20 @@ define void @test_dynamic_stackalloc_device_multiple_allocas(i32 %n, i32 %m) { ; GFX9-SDAG-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX9-SDAG-NEXT: s_cbranch_execz .LBB14_6 ; GFX9-SDAG-NEXT: ; %bb.1: ; %bb.0 +; GFX9-SDAG-NEXT: s_add_i32 s6, s32, 0xfff ; GFX9-SDAG-NEXT: v_lshl_add_u32 v1, v1, 2, 15 +; GFX9-SDAG-NEXT: s_and_b32 s9, s6, 0xfffff000 ; GFX9-SDAG-NEXT: v_and_b32_e32 v1, -16, v1 ; GFX9-SDAG-NEXT: s_mov_b64 s[6:7], exec ; GFX9-SDAG-NEXT: s_mov_b32 s10, 0 ; GFX9-SDAG-NEXT: .LBB14_2: ; =>This Inner Loop Header: Depth=1 -; GFX9-SDAG-NEXT: s_ff1_i32_b64 s9, s[6:7] -; GFX9-SDAG-NEXT: v_readlane_b32 s11, v1, s9 -; GFX9-SDAG-NEXT: s_bitset0_b64 s[6:7], s9 -; GFX9-SDAG-NEXT: s_max_u32 s10, s10, s11 +; GFX9-SDAG-NEXT: s_ff1_i32_b64 s11, s[6:7] +; GFX9-SDAG-NEXT: v_readlane_b32 s12, v1, s11 +; GFX9-SDAG-NEXT: s_bitset0_b64 s[6:7], s11 +; GFX9-SDAG-NEXT: s_max_u32 s10, s10, s12 ; GFX9-SDAG-NEXT: s_cmp_lg_u64 s[6:7], 0 ; GFX9-SDAG-NEXT: s_cbranch_scc1 .LBB14_2 ; GFX9-SDAG-NEXT: ; %bb.3: -; GFX9-SDAG-NEXT: s_add_i32 s6, s32, 0xfff -; GFX9-SDAG-NEXT: s_and_b32 s9, s6, 0xfffff000 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s9 ; GFX9-SDAG-NEXT: v_lshl_add_u32 v1, s10, 6, v1 ; GFX9-SDAG-NEXT: v_readfirstlane_b32 s32, v1 @@ -1905,7 +1889,6 @@ define void @test_dynamic_stackalloc_device_multiple_allocas(i32 %n, i32 %m) { ; GFX9-SDAG-NEXT: .LBB14_6: ; %bb.1 ; GFX9-SDAG-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, 2 ; GFX9-SDAG-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX9-SDAG-NEXT: s_mov_b64 s[4:5], exec ; GFX9-SDAG-NEXT: .LBB14_7: ; =>This Inner Loop Header: Depth=1 @@ -1923,7 +1906,8 @@ define void @test_dynamic_stackalloc_device_multiple_allocas(i32 %n, i32 %m) { ; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 1 ; GFX9-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], s33 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX9-SDAG-NEXT: buffer_store_dword v1, off, s[0:3], s4 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 2 +; GFX9-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], s4 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) ; GFX9-SDAG-NEXT: s_mov_b32 s32, s34 ; GFX9-SDAG-NEXT: s_mov_b32 s34, s14 @@ -2027,27 +2011,26 @@ define void @test_dynamic_stackalloc_device_multiple_allocas(i32 %n, i32 %m) { ; GFX11-SDAG-NEXT: s_cbranch_execz .LBB14_6 ; GFX11-SDAG-NEXT: ; %bb.1: ; %bb.0 ; GFX11-SDAG-NEXT: v_lshl_add_u32 v1, v1, 2, 15 -; GFX11-SDAG-NEXT: s_mov_b32 s2, exec_lo +; GFX11-SDAG-NEXT: s_add_i32 s2, s32, 0x7ff +; GFX11-SDAG-NEXT: s_mov_b32 s4, exec_lo +; GFX11-SDAG-NEXT: s_and_b32 s2, s2, 0xfffff800 ; GFX11-SDAG-NEXT: s_mov_b32 s3, 0 -; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-SDAG-NEXT: v_and_b32_e32 v1, -16, v1 ; GFX11-SDAG-NEXT: .LBB14_2: ; =>This Inner Loop Header: Depth=1 -; GFX11-SDAG-NEXT: s_ctz_i32_b32 s4, s2 +; GFX11-SDAG-NEXT: s_ctz_i32_b32 s5, s4 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) -; GFX11-SDAG-NEXT: v_readlane_b32 s5, v1, s4 -; GFX11-SDAG-NEXT: s_bitset0_b32 s2, s4 -; GFX11-SDAG-NEXT: s_max_u32 s3, s3, s5 -; GFX11-SDAG-NEXT: s_cmp_lg_u32 s2, 0 +; GFX11-SDAG-NEXT: v_readlane_b32 s6, v1, s5 +; GFX11-SDAG-NEXT: s_bitset0_b32 s4, s5 +; GFX11-SDAG-NEXT: s_max_u32 s3, s3, s6 +; GFX11-SDAG-NEXT: s_cmp_lg_u32 s4, 0 ; GFX11-SDAG-NEXT: s_cbranch_scc1 .LBB14_2 ; GFX11-SDAG-NEXT: ; %bb.3: ; GFX11-SDAG-NEXT: v_and_b32_e32 v1, 0x3ff, v31 -; GFX11-SDAG-NEXT: s_add_i32 s2, s32, 0x7ff -; GFX11-SDAG-NEXT: s_mov_b32 s4, exec_lo -; GFX11-SDAG-NEXT: s_and_b32 s2, s2, 0xfffff800 -; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) ; GFX11-SDAG-NEXT: v_lshl_add_u32 v2, s3, 5, s2 -; GFX11-SDAG-NEXT: v_lshl_add_u32 v1, v1, 2, 15 +; GFX11-SDAG-NEXT: s_mov_b32 s4, exec_lo ; GFX11-SDAG-NEXT: s_mov_b32 s3, 0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-SDAG-NEXT: v_lshl_add_u32 v1, v1, 2, 15 ; GFX11-SDAG-NEXT: v_readfirstlane_b32 s32, v2 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-SDAG-NEXT: v_and_b32_e32 v1, 0x1ff0, v1 @@ -2070,28 +2053,27 @@ define void @test_dynamic_stackalloc_device_multiple_allocas(i32 %n, i32 %m) { ; GFX11-SDAG-NEXT: v_readfirstlane_b32 s32, v1 ; GFX11-SDAG-NEXT: .LBB14_6: ; %bb.1 ; GFX11-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s1 -; GFX11-SDAG-NEXT: v_lshl_add_u32 v1, v0, 2, 15 -; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, 2 +; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX11-SDAG-NEXT: s_mov_b32 s1, exec_lo -; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-SDAG-NEXT: v_and_b32_e32 v1, -16, v1 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX11-SDAG-NEXT: .LBB14_7: ; =>This Inner Loop Header: Depth=1 ; GFX11-SDAG-NEXT: s_ctz_i32_b32 s2, s1 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) -; GFX11-SDAG-NEXT: v_readlane_b32 s3, v1, s2 +; GFX11-SDAG-NEXT: v_readlane_b32 s3, v0, s2 ; GFX11-SDAG-NEXT: s_bitset0_b32 s1, s2 ; GFX11-SDAG-NEXT: s_max_u32 s0, s0, s3 ; GFX11-SDAG-NEXT: s_cmp_lg_u32 s1, 0 ; GFX11-SDAG-NEXT: s_cbranch_scc1 .LBB14_7 ; GFX11-SDAG-NEXT: ; %bb.8: ; GFX11-SDAG-NEXT: s_mov_b32 s1, s32 -; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 1 -; GFX11-SDAG-NEXT: v_lshl_add_u32 v1, s0, 5, s1 -; GFX11-SDAG-NEXT: scratch_store_b32 off, v2, s33 dlc +; GFX11-SDAG-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_mov_b32 v2, 2 +; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, s0, 5, s1 +; GFX11-SDAG-NEXT: scratch_store_b32 off, v1, s33 dlc ; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-SDAG-NEXT: scratch_store_b32 off, v0, s1 dlc +; GFX11-SDAG-NEXT: scratch_store_b32 off, v2, s1 dlc ; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-SDAG-NEXT: v_readfirstlane_b32 s32, v1 +; GFX11-SDAG-NEXT: v_readfirstlane_b32 s32, v0 ; GFX11-SDAG-NEXT: s_mov_b32 s32, s34 ; GFX11-SDAG-NEXT: s_mov_b32 s34, s8 ; GFX11-SDAG-NEXT: s_mov_b32 s33, s7 @@ -2200,9 +2182,9 @@ define void @test_dynamic_stackalloc_device_control_flow(i32 %n, i32 %m) { ; GFX9-SDAG-LABEL: test_dynamic_stackalloc_device_control_flow: ; GFX9-SDAG: ; %bb.0: ; %entry ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: s_mov_b32 s11, s33 +; GFX9-SDAG-NEXT: s_mov_b32 s12, s33 ; GFX9-SDAG-NEXT: s_add_i32 s33, s32, 0xfc0 -; GFX9-SDAG-NEXT: s_mov_b32 s12, s34 +; GFX9-SDAG-NEXT: s_mov_b32 s13, s34 ; GFX9-SDAG-NEXT: s_and_b32 s33, s33, 0xfffff000 ; GFX9-SDAG-NEXT: s_mov_b32 s34, s32 ; GFX9-SDAG-NEXT: s_mov_b32 s8, 0 @@ -2212,24 +2194,24 @@ define void @test_dynamic_stackalloc_device_control_flow(i32 %n, i32 %m) { ; GFX9-SDAG-NEXT: s_xor_b64 s[4:5], exec, s[4:5] ; GFX9-SDAG-NEXT: s_cbranch_execz .LBB15_4 ; GFX9-SDAG-NEXT: ; %bb.1: ; %bb.1 -; GFX9-SDAG-NEXT: v_lshl_add_u32 v1, v1, 2, 15 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 2 -; GFX9-SDAG-NEXT: v_and_b32_e32 v1, -16, v1 +; GFX9-SDAG-NEXT: s_add_i32 s6, s32, 0xfff +; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v1, 2, 15 +; GFX9-SDAG-NEXT: s_and_b32 s9, s6, 0xfffff000 +; GFX9-SDAG-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX9-SDAG-NEXT: s_mov_b64 s[6:7], exec ; GFX9-SDAG-NEXT: .LBB15_2: ; =>This Inner Loop Header: Depth=1 -; GFX9-SDAG-NEXT: s_ff1_i32_b64 s9, s[6:7] -; GFX9-SDAG-NEXT: v_readlane_b32 s10, v1, s9 -; GFX9-SDAG-NEXT: s_bitset0_b64 s[6:7], s9 -; GFX9-SDAG-NEXT: s_max_u32 s8, s8, s10 +; GFX9-SDAG-NEXT: s_ff1_i32_b64 s10, s[6:7] +; GFX9-SDAG-NEXT: v_readlane_b32 s11, v0, s10 +; GFX9-SDAG-NEXT: s_bitset0_b64 s[6:7], s10 +; GFX9-SDAG-NEXT: s_max_u32 s8, s8, s11 ; GFX9-SDAG-NEXT: s_cmp_lg_u64 s[6:7], 0 ; GFX9-SDAG-NEXT: s_cbranch_scc1 .LBB15_2 ; GFX9-SDAG-NEXT: ; %bb.3: -; GFX9-SDAG-NEXT: s_add_i32 s6, s32, 0xfff -; GFX9-SDAG-NEXT: s_and_b32 s6, s6, 0xfffff000 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-SDAG-NEXT: v_lshl_add_u32 v2, s8, 6, v1 -; GFX9-SDAG-NEXT: v_readfirstlane_b32 s32, v2 -; GFX9-SDAG-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen +; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s9 +; GFX9-SDAG-NEXT: v_lshl_add_u32 v1, s8, 6, v0 +; GFX9-SDAG-NEXT: v_readfirstlane_b32 s32, v1 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, 2 +; GFX9-SDAG-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) ; GFX9-SDAG-NEXT: ; implicit-def: $vgpr31 ; GFX9-SDAG-NEXT: .LBB15_4: ; %Flow @@ -2259,8 +2241,8 @@ define void @test_dynamic_stackalloc_device_control_flow(i32 %n, i32 %m) { ; GFX9-SDAG-NEXT: .LBB15_8: ; %bb.2 ; GFX9-SDAG-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX9-SDAG-NEXT: s_mov_b32 s32, s34 -; GFX9-SDAG-NEXT: s_mov_b32 s34, s12 -; GFX9-SDAG-NEXT: s_mov_b32 s33, s11 +; GFX9-SDAG-NEXT: s_mov_b32 s34, s13 +; GFX9-SDAG-NEXT: s_mov_b32 s33, s12 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-GISEL-LABEL: test_dynamic_stackalloc_device_control_flow: @@ -2332,9 +2314,9 @@ define void @test_dynamic_stackalloc_device_control_flow(i32 %n, i32 %m) { ; GFX11-SDAG-LABEL: test_dynamic_stackalloc_device_control_flow: ; GFX11-SDAG: ; %bb.0: ; %entry ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_mov_b32 s5, s33 +; GFX11-SDAG-NEXT: s_mov_b32 s6, s33 ; GFX11-SDAG-NEXT: s_add_i32 s33, s32, 63 -; GFX11-SDAG-NEXT: s_mov_b32 s6, s34 +; GFX11-SDAG-NEXT: s_mov_b32 s7, s34 ; GFX11-SDAG-NEXT: s_and_not1_b32 s33, s33, 63 ; GFX11-SDAG-NEXT: s_mov_b32 s34, s32 ; GFX11-SDAG-NEXT: s_mov_b32 s1, 0 @@ -2344,28 +2326,28 @@ define void @test_dynamic_stackalloc_device_control_flow(i32 %n, i32 %m) { ; GFX11-SDAG-NEXT: s_xor_b32 s0, exec_lo, s0 ; GFX11-SDAG-NEXT: s_cbranch_execz .LBB15_4 ; GFX11-SDAG-NEXT: ; %bb.1: ; %bb.1 -; GFX11-SDAG-NEXT: v_lshl_add_u32 v1, v1, 2, 15 -; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, 2 -; GFX11-SDAG-NEXT: s_mov_b32 s2, exec_lo -; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-SDAG-NEXT: v_and_b32_e32 v1, -16, v1 +; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v1, 2, 15 +; GFX11-SDAG-NEXT: s_add_i32 s2, s32, 0x7ff +; GFX11-SDAG-NEXT: s_mov_b32 s3, exec_lo +; GFX11-SDAG-NEXT: s_and_b32 s2, s2, 0xfffff800 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX11-SDAG-NEXT: .LBB15_2: ; =>This Inner Loop Header: Depth=1 -; GFX11-SDAG-NEXT: s_ctz_i32_b32 s3, s2 +; GFX11-SDAG-NEXT: s_ctz_i32_b32 s4, s3 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) -; GFX11-SDAG-NEXT: v_readlane_b32 s4, v1, s3 -; GFX11-SDAG-NEXT: s_bitset0_b32 s2, s3 -; GFX11-SDAG-NEXT: s_max_u32 s1, s1, s4 -; GFX11-SDAG-NEXT: s_cmp_lg_u32 s2, 0 +; GFX11-SDAG-NEXT: v_readlane_b32 s5, v0, s4 +; GFX11-SDAG-NEXT: s_bitset0_b32 s3, s4 +; GFX11-SDAG-NEXT: s_max_u32 s1, s1, s5 +; GFX11-SDAG-NEXT: s_cmp_lg_u32 s3, 0 ; GFX11-SDAG-NEXT: s_cbranch_scc1 .LBB15_2 ; GFX11-SDAG-NEXT: ; %bb.3: -; GFX11-SDAG-NEXT: s_add_i32 s2, s32, 0x7ff +; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, s1, 5, s2 +; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 2 ; GFX11-SDAG-NEXT: ; implicit-def: $vgpr31 -; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX11-SDAG-NEXT: s_and_b32 s2, s2, 0xfffff800 -; GFX11-SDAG-NEXT: v_lshl_add_u32 v1, s1, 5, s2 -; GFX11-SDAG-NEXT: scratch_store_b32 off, v0, s2 dlc +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-SDAG-NEXT: v_readfirstlane_b32 s32, v0 +; GFX11-SDAG-NEXT: scratch_store_b32 off, v1, s2 dlc ; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-SDAG-NEXT: v_readfirstlane_b32 s32, v1 ; GFX11-SDAG-NEXT: .LBB15_4: ; %Flow ; GFX11-SDAG-NEXT: s_and_not1_saveexec_b32 s0, s0 ; GFX11-SDAG-NEXT: s_cbranch_execz .LBB15_8 @@ -2394,8 +2376,8 @@ define void @test_dynamic_stackalloc_device_control_flow(i32 %n, i32 %m) { ; GFX11-SDAG-NEXT: .LBB15_8: ; %bb.2 ; GFX11-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX11-SDAG-NEXT: s_mov_b32 s32, s34 -; GFX11-SDAG-NEXT: s_mov_b32 s34, s6 -; GFX11-SDAG-NEXT: s_mov_b32 s33, s5 +; GFX11-SDAG-NEXT: s_mov_b32 s34, s7 +; GFX11-SDAG-NEXT: s_mov_b32 s33, s6 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-GISEL-LABEL: test_dynamic_stackalloc_device_control_flow: diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll index b0e6752386285..e01cb79382c05 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll @@ -524,7 +524,7 @@ define amdgpu_kernel void @soff2_voff1(i32 %soff) { ; GFX942-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 1 ; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX942-SDAG-NEXT: s_lshl_b32 s0, s0, 1 +; GFX942-SDAG-NEXT: s_lshl1_add_u32 s0, s0, 0 ; GFX942-SDAG-NEXT: v_add_u32_e32 v0, s0, v0 ; GFX942-SDAG-NEXT: v_add_u32_e32 v2, 1, v0 ; GFX942-SDAG-NEXT: v_add_u32_e32 v3, 2, v0 @@ -695,7 +695,7 @@ define amdgpu_kernel void @soff2_voff2(i32 %soff) { ; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 1 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v2, 2 ; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX942-SDAG-NEXT: s_lshl_b32 s0, s0, 1 +; GFX942-SDAG-NEXT: s_lshl1_add_u32 s0, s0, 0 ; GFX942-SDAG-NEXT: v_mad_u32_u24 v0, v0, 2, s0 ; GFX942-SDAG-NEXT: scratch_store_byte v0, v1, off offset:1 sc0 sc1 ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) @@ -875,7 +875,7 @@ define amdgpu_kernel void @soff2_voff4(i32 %soff) { ; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 1 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v2, 2 ; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX942-SDAG-NEXT: s_lshl_b32 s0, s0, 1 +; GFX942-SDAG-NEXT: s_lshl1_add_u32 s0, s0, 0 ; GFX942-SDAG-NEXT: v_mad_u32_u24 v0, v0, 4, s0 ; GFX942-SDAG-NEXT: scratch_store_byte v0, v1, off offset:1 sc0 sc1 ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) @@ -1054,7 +1054,7 @@ define amdgpu_kernel void @soff4_voff1(i32 %soff) { ; GFX942-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 1 ; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX942-SDAG-NEXT: s_lshl_b32 s0, s0, 2 +; GFX942-SDAG-NEXT: s_lshl2_add_u32 s0, s0, 0 ; GFX942-SDAG-NEXT: v_add_u32_e32 v0, s0, v0 ; GFX942-SDAG-NEXT: v_add_u32_e32 v2, 1, v0 ; GFX942-SDAG-NEXT: v_add_u32_e32 v3, 2, v0 @@ -1225,7 +1225,7 @@ define amdgpu_kernel void @soff4_voff2(i32 %soff) { ; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 1 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v2, 2 ; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX942-SDAG-NEXT: s_lshl_b32 s0, s0, 2 +; GFX942-SDAG-NEXT: s_lshl2_add_u32 s0, s0, 0 ; GFX942-SDAG-NEXT: v_mad_u32_u24 v0, v0, 2, s0 ; GFX942-SDAG-NEXT: scratch_store_byte v0, v1, off offset:1 sc0 sc1 ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) @@ -1405,7 +1405,7 @@ define amdgpu_kernel void @soff4_voff4(i32 %soff) { ; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 1 ; GFX942-SDAG-NEXT: v_mov_b32_e32 v2, 2 ; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX942-SDAG-NEXT: s_lshl_b32 s0, s0, 2 +; GFX942-SDAG-NEXT: s_lshl2_add_u32 s0, s0, 0 ; GFX942-SDAG-NEXT: v_mad_u32_u24 v0, v0, 4, s0 ; GFX942-SDAG-NEXT: scratch_store_byte v0, v1, off offset:1 sc0 sc1 ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll index fc8883924dfbc..870b679a84d11 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll @@ -857,13 +857,13 @@ define void @store_load_vindex_foo(i32 %idx) { ; GFX9-LABEL: store_load_vindex_foo: ; GFX9: ; %bb.0: ; %bb ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v1, s32 -; GFX9-NEXT: v_lshl_add_u32 v2, v0, 2, v1 -; GFX9-NEXT: v_mov_b32_e32 v3, 15 +; GFX9-NEXT: s_mov_b32 s0, s32 +; GFX9-NEXT: v_lshl_add_u32 v1, v0, 2, s0 +; GFX9-NEXT: v_mov_b32_e32 v2, 15 ; GFX9-NEXT: v_and_b32_e32 v0, 15, v0 -; GFX9-NEXT: scratch_store_dword v2, v3, off +; GFX9-NEXT: scratch_store_dword v1, v2, off ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v1 +; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, s0 ; GFX9-NEXT: scratch_load_dword v0, v0, off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -915,13 +915,13 @@ define void @store_load_vindex_foo(i32 %idx) { ; GFX9-PAL-LABEL: store_load_vindex_foo: ; GFX9-PAL: ; %bb.0: ; %bb ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-PAL-NEXT: v_mov_b32_e32 v1, s32 -; GFX9-PAL-NEXT: v_lshl_add_u32 v2, v0, 2, v1 -; GFX9-PAL-NEXT: v_mov_b32_e32 v3, 15 +; GFX9-PAL-NEXT: s_mov_b32 s0, s32 +; GFX9-PAL-NEXT: v_lshl_add_u32 v1, v0, 2, s0 +; GFX9-PAL-NEXT: v_mov_b32_e32 v2, 15 ; GFX9-PAL-NEXT: v_and_b32_e32 v0, 15, v0 -; GFX9-PAL-NEXT: scratch_store_dword v2, v3, off +; GFX9-PAL-NEXT: scratch_store_dword v1, v2, off ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX9-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, v1 +; GFX9-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, s0 ; GFX9-PAL-NEXT: scratch_load_dword v0, v0, off glc ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_setpc_b64 s[30:31] @@ -929,8 +929,8 @@ define void @store_load_vindex_foo(i32 %idx) { ; GFX942-LABEL: store_load_vindex_foo: ; GFX942: ; %bb.0: ; %bb ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX942-NEXT: v_mov_b32_e32 v1, s32 -; GFX942-NEXT: v_lshl_add_u32 v1, v0, 2, v1 +; GFX942-NEXT: s_mov_b32 s0, s32 +; GFX942-NEXT: v_lshl_add_u32 v1, v0, 2, s0 ; GFX942-NEXT: v_mov_b32_e32 v2, 15 ; GFX942-NEXT: v_and_b32_e32 v0, 15, v0 ; GFX942-NEXT: scratch_store_dword v1, v2, off sc0 sc1 @@ -2146,16 +2146,16 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) { ; GFX9-LABEL: store_load_vindex_small_offset_foo: ; GFX9: ; %bb.0: ; %bb ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_add_i32 s1, s32, 0x100 ; GFX9-NEXT: scratch_load_dword v1, off, s32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_add_i32 s0, s32, 0x100 -; GFX9-NEXT: v_mov_b32_e32 v1, s0 -; GFX9-NEXT: v_lshl_add_u32 v2, v0, 2, v1 -; GFX9-NEXT: v_mov_b32_e32 v3, 15 +; GFX9-NEXT: s_mov_b32 s0, s1 +; GFX9-NEXT: v_lshl_add_u32 v1, v0, 2, s0 +; GFX9-NEXT: v_mov_b32_e32 v2, 15 ; GFX9-NEXT: v_and_b32_e32 v0, 15, v0 -; GFX9-NEXT: scratch_store_dword v2, v3, off +; GFX9-NEXT: scratch_store_dword v1, v2, off ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v1 +; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, s0 ; GFX9-NEXT: scratch_load_dword v0, v0, off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -2214,16 +2214,16 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) { ; GFX9-PAL-LABEL: store_load_vindex_small_offset_foo: ; GFX9-PAL: ; %bb.0: ; %bb ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-PAL-NEXT: s_add_i32 s1, s32, 0x100 ; GFX9-PAL-NEXT: scratch_load_dword v1, off, s32 glc ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX9-PAL-NEXT: s_add_i32 s0, s32, 0x100 -; GFX9-PAL-NEXT: v_mov_b32_e32 v1, s0 -; GFX9-PAL-NEXT: v_lshl_add_u32 v2, v0, 2, v1 -; GFX9-PAL-NEXT: v_mov_b32_e32 v3, 15 +; GFX9-PAL-NEXT: s_mov_b32 s0, s1 +; GFX9-PAL-NEXT: v_lshl_add_u32 v1, v0, 2, s0 +; GFX9-PAL-NEXT: v_mov_b32_e32 v2, 15 ; GFX9-PAL-NEXT: v_and_b32_e32 v0, 15, v0 -; GFX9-PAL-NEXT: scratch_store_dword v2, v3, off +; GFX9-PAL-NEXT: scratch_store_dword v1, v2, off ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX9-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, v1 +; GFX9-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, s0 ; GFX9-PAL-NEXT: scratch_load_dword v0, v0, off glc ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_setpc_b64 s[30:31] @@ -2231,11 +2231,11 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) { ; GFX942-LABEL: store_load_vindex_small_offset_foo: ; GFX942: ; %bb.0: ; %bb ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_add_i32 s1, s32, 0x100 ; GFX942-NEXT: scratch_load_dword v1, off, s32 sc0 sc1 ; GFX942-NEXT: s_waitcnt vmcnt(0) -; GFX942-NEXT: s_add_i32 s0, s32, 0x100 -; GFX942-NEXT: v_mov_b32_e32 v1, s0 -; GFX942-NEXT: v_lshl_add_u32 v1, v0, 2, v1 +; GFX942-NEXT: s_mov_b32 s0, s1 +; GFX942-NEXT: v_lshl_add_u32 v1, v0, 2, s0 ; GFX942-NEXT: v_mov_b32_e32 v2, 15 ; GFX942-NEXT: v_and_b32_e32 v0, 15, v0 ; GFX942-NEXT: scratch_store_dword v1, v2, off sc0 sc1 @@ -3447,16 +3447,16 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) { ; GFX9-LABEL: store_load_vindex_large_offset_foo: ; GFX9: ; %bb.0: ; %bb ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_add_i32 s1, s32, 0x4004 ; GFX9-NEXT: scratch_load_dword v1, off, s32 offset:4 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_add_i32 s0, s32, 0x4004 -; GFX9-NEXT: v_mov_b32_e32 v1, s0 -; GFX9-NEXT: v_lshl_add_u32 v2, v0, 2, v1 -; GFX9-NEXT: v_mov_b32_e32 v3, 15 +; GFX9-NEXT: s_mov_b32 s0, s1 +; GFX9-NEXT: v_lshl_add_u32 v1, v0, 2, s0 +; GFX9-NEXT: v_mov_b32_e32 v2, 15 ; GFX9-NEXT: v_and_b32_e32 v0, 15, v0 -; GFX9-NEXT: scratch_store_dword v2, v3, off +; GFX9-NEXT: scratch_store_dword v1, v2, off ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v1 +; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, s0 ; GFX9-NEXT: scratch_load_dword v0, v0, off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -3516,16 +3516,16 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) { ; GFX9-PAL-LABEL: store_load_vindex_large_offset_foo: ; GFX9-PAL: ; %bb.0: ; %bb ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-PAL-NEXT: s_add_i32 s1, s32, 0x4004 ; GFX9-PAL-NEXT: scratch_load_dword v1, off, s32 offset:4 glc ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX9-PAL-NEXT: s_add_i32 s0, s32, 0x4004 -; GFX9-PAL-NEXT: v_mov_b32_e32 v1, s0 -; GFX9-PAL-NEXT: v_lshl_add_u32 v2, v0, 2, v1 -; GFX9-PAL-NEXT: v_mov_b32_e32 v3, 15 +; GFX9-PAL-NEXT: s_mov_b32 s0, s1 +; GFX9-PAL-NEXT: v_lshl_add_u32 v1, v0, 2, s0 +; GFX9-PAL-NEXT: v_mov_b32_e32 v2, 15 ; GFX9-PAL-NEXT: v_and_b32_e32 v0, 15, v0 -; GFX9-PAL-NEXT: scratch_store_dword v2, v3, off +; GFX9-PAL-NEXT: scratch_store_dword v1, v2, off ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX9-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, v1 +; GFX9-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, s0 ; GFX9-PAL-NEXT: scratch_load_dword v0, v0, off glc ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_setpc_b64 s[30:31] @@ -3533,11 +3533,11 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) { ; GFX942-LABEL: store_load_vindex_large_offset_foo: ; GFX942: ; %bb.0: ; %bb ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_add_i32 s1, s32, 0x4004 ; GFX942-NEXT: scratch_load_dword v1, off, s32 offset:4 sc0 sc1 ; GFX942-NEXT: s_waitcnt vmcnt(0) -; GFX942-NEXT: s_add_i32 s0, s32, 0x4004 -; GFX942-NEXT: v_mov_b32_e32 v1, s0 -; GFX942-NEXT: v_lshl_add_u32 v1, v0, 2, v1 +; GFX942-NEXT: s_mov_b32 s0, s1 +; GFX942-NEXT: v_lshl_add_u32 v1, v0, 2, s0 ; GFX942-NEXT: v_mov_b32_e32 v2, 15 ; GFX942-NEXT: v_and_b32_e32 v0, 15, v0 ; GFX942-NEXT: scratch_store_dword v1, v2, off sc0 sc1 @@ -3940,12 +3940,12 @@ define amdgpu_kernel void @store_load_vidx_sidx_offset(i32 %sidx) { ; GFX9: ; %bb.0: ; %bb ; GFX9-NEXT: s_load_dword s0, s[4:5], 0x24 ; GFX9-NEXT: s_add_u32 flat_scratch_lo, s8, s13 -; GFX9-NEXT: v_mov_b32_e32 v1, 0 +; GFX9-NEXT: s_mov_b32 s1, 0 ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s9, 0 +; GFX9-NEXT: v_mov_b32_e32 v1, 15 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_add_u32_e32 v0, s0, v0 -; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v1 -; GFX9-NEXT: v_mov_b32_e32 v1, 15 +; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, s1 ; GFX9-NEXT: scratch_store_dword v0, v1, off offset:1024 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: scratch_load_dword v0, v0, off offset:1024 glc @@ -4001,15 +4001,15 @@ define amdgpu_kernel void @store_load_vidx_sidx_offset(i32 %sidx) { ; GFX9-PAL-NEXT: s_getpc_b64 s[12:13] ; GFX9-PAL-NEXT: s_mov_b32 s12, s0 ; GFX9-PAL-NEXT: s_load_dwordx2 s[12:13], s[12:13], 0x0 -; GFX9-PAL-NEXT: v_mov_b32_e32 v1, 0 +; GFX9-PAL-NEXT: s_mov_b32 s1, 0 ; GFX9-PAL-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX9-PAL-NEXT: v_mov_b32_e32 v1, 15 ; GFX9-PAL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-PAL-NEXT: s_and_b32 s13, s13, 0xffff ; GFX9-PAL-NEXT: s_add_u32 flat_scratch_lo, s12, s11 ; GFX9-PAL-NEXT: v_add_u32_e32 v0, s0, v0 ; GFX9-PAL-NEXT: s_addc_u32 flat_scratch_hi, s13, 0 -; GFX9-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, v1 -; GFX9-PAL-NEXT: v_mov_b32_e32 v1, 15 +; GFX9-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, s1 ; GFX9-PAL-NEXT: scratch_store_dword v0, v1, off offset:1024 ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: scratch_load_dword v0, v0, off offset:1024 glc @@ -4020,11 +4020,11 @@ define amdgpu_kernel void @store_load_vidx_sidx_offset(i32 %sidx) { ; GFX942: ; %bb.0: ; %bb ; GFX942-NEXT: s_load_dword s0, s[4:5], 0x24 ; GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX942-NEXT: v_mov_b32_e32 v1, 0 +; GFX942-NEXT: s_mov_b32 s1, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, 15 ; GFX942-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-NEXT: v_add_u32_e32 v0, s0, v0 -; GFX942-NEXT: v_lshl_add_u32 v0, v0, 2, v1 -; GFX942-NEXT: v_mov_b32_e32 v1, 15 +; GFX942-NEXT: v_lshl_add_u32 v0, v0, 2, s1 ; GFX942-NEXT: scratch_store_dword v0, v1, off offset:1024 sc0 sc1 ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: scratch_load_dword v0, v0, off offset:1024 sc0 sc1 diff --git a/llvm/test/CodeGen/AMDGPU/fmed3.ll b/llvm/test/CodeGen/AMDGPU/fmed3.ll index 60ac0b943faf4..29163c111fc5e 100644 --- a/llvm/test/CodeGen/AMDGPU/fmed3.ll +++ b/llvm/test/CodeGen/AMDGPU/fmed3.ll @@ -8894,6 +8894,501 @@ define double @v_test_fmed3_r_i_i_f64_minimumnum_maximumnum(double %a) { ret double %med } +define float @v_test_nnan_input_fmed3_r_i_i_f32_maximum_minimum(float %a) { +; SI-SDAG-LABEL: v_test_nnan_input_fmed3_r_i_i_f32_maximum_minimum: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_add_f32_e32 v0, 1.0, v0 +; SI-SDAG-NEXT: v_med3_f32 v0, v0, 2.0, 4.0 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_test_nnan_input_fmed3_r_i_i_f32_maximum_minimum: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_add_f32_e32 v0, 1.0, v0 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7fc00000 +; SI-GISEL-NEXT: v_max_f32_e32 v2, 2.0, v0 +; SI-GISEL-NEXT: v_cmp_o_f32_e32 vcc, 2.0, v0 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc +; SI-GISEL-NEXT: v_min_f32_e32 v2, 4.0, v0 +; SI-GISEL-NEXT: v_cmp_o_f32_e32 vcc, 4.0, v0 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-SDAG-LABEL: v_test_nnan_input_fmed3_r_i_i_f32_maximum_minimum: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_add_f32_e32 v0, 1.0, v0 +; VI-SDAG-NEXT: v_med3_f32 v0, v0, 2.0, 4.0 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_test_nnan_input_fmed3_r_i_i_f32_maximum_minimum: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_add_f32_e32 v0, 1.0, v0 +; VI-GISEL-NEXT: v_max_f32_e32 v1, 2.0, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fc00000 +; VI-GISEL-NEXT: v_cmp_o_f32_e32 vcc, 2.0, v0 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; VI-GISEL-NEXT: v_min_f32_e32 v1, 4.0, v0 +; VI-GISEL-NEXT: v_cmp_o_f32_e32 vcc, 4.0, v0 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: v_test_nnan_input_fmed3_r_i_i_f32_maximum_minimum: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_add_f32_e32 v0, 1.0, v0 +; GFX9-SDAG-NEXT: v_med3_f32 v0, v0, 2.0, 4.0 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: v_test_nnan_input_fmed3_r_i_i_f32_maximum_minimum: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_add_f32_e32 v0, 1.0, v0 +; GFX9-GISEL-NEXT: v_max_f32_e32 v1, 2.0, v0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fc00000 +; GFX9-GISEL-NEXT: v_cmp_o_f32_e32 vcc, 2.0, v0 +; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX9-GISEL-NEXT: v_min_f32_e32 v1, 4.0, v0 +; GFX9-GISEL-NEXT: v_cmp_o_f32_e32 vcc, 4.0, v0 +; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_test_nnan_input_fmed3_r_i_i_f32_maximum_minimum: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_add_f32_e32 v0, 1.0, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_med3_f32 v0, v0, 2.0, 4.0 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_test_nnan_input_fmed3_r_i_i_f32_maximum_minimum: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_add_f32_e32 v0, 1.0, v0 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-GISEL-NEXT: v_max_f32_e32 v1, 2.0, v0 +; GFX11-GISEL-NEXT: v_cmp_o_f32_e32 vcc_lo, 2.0, v0 +; GFX11-GISEL-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-GISEL-NEXT: v_min_f32_e32 v1, 4.0, v0 +; GFX11-GISEL-NEXT: v_cmp_o_f32_e32 vcc_lo, 4.0, v0 +; GFX11-GISEL-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] + %a.add = fadd nnan float %a, 1.0 + %max = call float @llvm.maximum.f32(float %a.add, float 2.0) + %med = call float @llvm.minimum.f32(float %max, float 4.0) + ret float %med +} + +define <2 x half> @v_test_nnan_input_fmed3_r_i_i_v2f16_maximum_minimum(<2 x half> %a) { +; SI-SDAG-LABEL: v_test_nnan_input_fmed3_r_i_i_v2f16_maximum_minimum: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-SDAG-NEXT: v_add_f32_e32 v1, 1.0, v1 +; SI-SDAG-NEXT: v_add_f32_e32 v0, 1.0, v0 +; SI-SDAG-NEXT: v_med3_f32 v0, v0, 2.0, 4.0 +; SI-SDAG-NEXT: v_med3_f32 v1, v1, 2.0, 4.0 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_test_nnan_input_fmed3_r_i_i_v2f16_maximum_minimum: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fc00000 +; SI-GISEL-NEXT: v_add_f32_e32 v0, 1.0, v0 +; SI-GISEL-NEXT: v_add_f32_e32 v1, 1.0, v1 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-GISEL-NEXT: v_max_f32_e32 v3, 2.0, v0 +; SI-GISEL-NEXT: v_max_f32_e32 v4, 2.0, v1 +; SI-GISEL-NEXT: v_cmp_o_f32_e32 vcc, 2.0, v0 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc +; SI-GISEL-NEXT: v_cmp_o_f32_e32 vcc, 2.0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-GISEL-NEXT: v_min_f32_e32 v3, 4.0, v0 +; SI-GISEL-NEXT: v_min_f32_e32 v4, 4.0, v1 +; SI-GISEL-NEXT: v_cmp_o_f32_e32 vcc, 4.0, v0 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc +; SI-GISEL-NEXT: v_cmp_o_f32_e32 vcc, 4.0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-SDAG-LABEL: v_test_nnan_input_fmed3_r_i_i_v2f16_maximum_minimum: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x3c00 +; VI-SDAG-NEXT: v_add_f16_e32 v1, 1.0, v0 +; VI-SDAG-NEXT: v_add_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; VI-SDAG-NEXT: v_max_f16_e32 v0, 2.0, v0 +; VI-SDAG-NEXT: v_max_f16_e32 v1, 2.0, v1 +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4400 +; VI-SDAG-NEXT: v_min_f16_e32 v1, 4.0, v1 +; VI-SDAG-NEXT: v_min_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-SDAG-NEXT: v_or_b32_e32 v0, v1, v0 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_test_nnan_input_fmed3_r_i_i_v2f16_maximum_minimum: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_add_f16_e32 v1, 1.0, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3c00 +; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; VI-GISEL-NEXT: v_add_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fc00000 +; VI-GISEL-NEXT: v_max_f32_e32 v2, 2.0, v1 +; VI-GISEL-NEXT: v_cmp_o_f32_e32 vcc, 2.0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc +; VI-GISEL-NEXT: v_max_f32_e32 v2, 2.0, v0 +; VI-GISEL-NEXT: v_cmp_o_f32_e32 vcc, 2.0, v0 +; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; VI-GISEL-NEXT: v_min_f32_e32 v2, 4.0, v1 +; VI-GISEL-NEXT: v_cmp_o_f32_e32 vcc, 4.0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc +; VI-GISEL-NEXT: v_min_f32_e32 v2, 4.0, v0 +; VI-GISEL-NEXT: v_cmp_o_f32_e32 vcc, 4.0, v0 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; VI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: v_test_nnan_input_fmed3_r_i_i_v2f16_maximum_minimum: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_pk_add_f16 v0, v0, 1.0 op_sel_hi:[1,0] +; GFX9-SDAG-NEXT: v_pk_max_f16 v0, v0, 2.0 op_sel_hi:[1,0] +; GFX9-SDAG-NEXT: v_pk_min_f16 v0, v0, 4.0 op_sel_hi:[1,0] +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: v_test_nnan_input_fmed3_r_i_i_v2f16_maximum_minimum: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_pk_add_f16 v0, v0, 1.0 op_sel_hi:[1,0] +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0x4000 +; GFX9-GISEL-NEXT: v_pk_max_f16 v1, v0, 2.0 op_sel_hi:[1,0] +; GFX9-GISEL-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:DWORD +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0x7e00 +; GFX9-GISEL-NEXT: v_cmp_o_f16_e64 s[4:5], 2.0, v0 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5] +; GFX9-GISEL-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX9-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v0 +; GFX9-GISEL-NEXT: v_lshl_or_b32 v3, v1, 16, v3 +; GFX9-GISEL-NEXT: v_pk_min_f16 v3, v3, 4.0 op_sel_hi:[1,0] +; GFX9-GISEL-NEXT: v_cmp_o_f16_e64 s[4:5], 4.0, v0 +; GFX9-GISEL-NEXT: v_cmp_o_f16_e32 vcc, 4.0, v1 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, v3, s[4:5] +; GFX9-GISEL-NEXT: v_cndmask_b32_sdwa v1, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_test_nnan_input_fmed3_r_i_i_v2f16_maximum_minimum: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_pk_add_f16 v0, v0, 1.0 op_sel_hi:[1,0] +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_pk_max_f16 v0, v0, 2.0 op_sel_hi:[1,0] +; GFX11-SDAG-NEXT: v_pk_min_f16 v0, v0, 4.0 op_sel_hi:[1,0] +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-FAKE16-LABEL: v_test_nnan_input_fmed3_r_i_i_v2f16_maximum_minimum: +; GFX11-GISEL-FAKE16: ; %bb.0: +; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-FAKE16-NEXT: v_pk_add_f16 v0, v0, 1.0 op_sel_hi:[1,0] +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX11-GISEL-FAKE16-NEXT: v_pk_max_f16 v1, v0, 2.0 op_sel_hi:[1,0] +; GFX11-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX11-GISEL-FAKE16-NEXT: v_cmp_o_f16_e32 vcc_lo, 2.0, v0 +; GFX11-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; GFX11-GISEL-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v1, vcc_lo +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-GISEL-FAKE16-NEXT: v_cmp_o_f16_e32 vcc_lo, 2.0, v2 +; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v0 +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-GISEL-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v3, vcc_lo +; GFX11-GISEL-FAKE16-NEXT: v_cmp_o_f16_e32 vcc_lo, 4.0, v0 +; GFX11-GISEL-FAKE16-NEXT: v_lshl_or_b32 v2, v1, 16, v2 +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-GISEL-FAKE16-NEXT: v_pk_min_f16 v2, v2, 4.0 op_sel_hi:[1,0] +; GFX11-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 +; GFX11-GISEL-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo +; GFX11-GISEL-FAKE16-NEXT: v_cmp_o_f16_e32 vcc_lo, 4.0, v1 +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-GISEL-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v3, vcc_lo +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-FAKE16-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-TRUE16-LABEL: v_test_nnan_input_fmed3_r_i_i_v2f16_maximum_minimum: +; GFX11-GISEL-TRUE16: ; %bb.0: +; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-TRUE16-NEXT: v_pk_add_f16 v0, v0, 1.0 op_sel_hi:[1,0] +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX11-GISEL-TRUE16-NEXT: v_pk_max_f16 v1, v0, 2.0 op_sel_hi:[1,0] +; GFX11-GISEL-TRUE16-NEXT: v_cmp_o_f16_e32 vcc_lo, 2.0, v0.l +; GFX11-GISEL-TRUE16-NEXT: v_cmp_o_f16_e64 s0, 2.0, v0.h +; GFX11-GISEL-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x7e00, v1.l, vcc_lo +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-GISEL-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x7e00, v1.h, s0 +; GFX11-GISEL-TRUE16-NEXT: v_cmp_o_f16_e32 vcc_lo, 4.0, v0.l +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-GISEL-TRUE16-NEXT: v_pk_min_f16 v1, v0, 4.0 op_sel_hi:[1,0] +; GFX11-GISEL-TRUE16-NEXT: v_cmp_o_f16_e64 s0, 4.0, v0.h +; GFX11-GISEL-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x7e00, v1.l, vcc_lo +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-GISEL-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x7e00, v1.h, s0 +; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] + %a.add = fadd nnan <2 x half> %a, splat (half 1.0) + %max = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a.add, <2 x half> splat (half 2.0)) + %med = call <2 x half> @llvm.minimum.v2f16(<2 x half> %max, <2 x half> splat (half 4.0)) + ret <2 x half> %med +} + +define half @v_test_nnan_input_fmed3_r_i_i_f16_maximum_minimum(half %a) { +; SI-SDAG-LABEL: v_test_nnan_input_fmed3_r_i_i_f16_maximum_minimum: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-SDAG-NEXT: v_add_f32_e32 v0, 1.0, v0 +; SI-SDAG-NEXT: v_med3_f32 v0, v0, 2.0, 4.0 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_test_nnan_input_fmed3_r_i_i_f16_maximum_minimum: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7fc00000 +; SI-GISEL-NEXT: v_add_f32_e32 v0, 1.0, v0 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-GISEL-NEXT: v_max_f32_e32 v2, 2.0, v0 +; SI-GISEL-NEXT: v_cmp_o_f32_e32 vcc, 2.0, v0 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-GISEL-NEXT: v_min_f32_e32 v2, 4.0, v0 +; SI-GISEL-NEXT: v_cmp_o_f32_e32 vcc, 4.0, v0 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-SDAG-LABEL: v_test_nnan_input_fmed3_r_i_i_f16_maximum_minimum: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_add_f16_e32 v0, 1.0, v0 +; VI-SDAG-NEXT: v_max_f16_e32 v0, 2.0, v0 +; VI-SDAG-NEXT: v_min_f16_e32 v0, 4.0, v0 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_test_nnan_input_fmed3_r_i_i_f16_maximum_minimum: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_add_f16_e32 v0, 1.0, v0 +; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7fc00000 +; VI-GISEL-NEXT: v_max_f32_e32 v2, 2.0, v0 +; VI-GISEL-NEXT: v_cmp_o_f32_e32 vcc, 2.0, v0 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc +; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; VI-GISEL-NEXT: v_min_f32_e32 v2, 4.0, v0 +; VI-GISEL-NEXT: v_cmp_o_f32_e32 vcc, 4.0, v0 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc +; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: v_test_nnan_input_fmed3_r_i_i_f16_maximum_minimum: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_add_f16_e32 v0, 1.0, v0 +; GFX9-SDAG-NEXT: v_med3_f16 v0, v0, 2.0, 4.0 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: v_test_nnan_input_fmed3_r_i_i_f16_maximum_minimum: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_add_f16_e32 v0, 1.0, v0 +; GFX9-GISEL-NEXT: v_max_f16_e32 v1, 2.0, v0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0x7e00 +; GFX9-GISEL-NEXT: v_cmp_o_f16_e32 vcc, 2.0, v0 +; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX9-GISEL-NEXT: v_min_f16_e32 v1, 4.0, v0 +; GFX9-GISEL-NEXT: v_cmp_o_f16_e32 vcc, 4.0, v0 +; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-FAKE16-LABEL: v_test_nnan_input_fmed3_r_i_i_f16_maximum_minimum: +; GFX11-SDAG-FAKE16: ; %bb.0: +; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-FAKE16-NEXT: v_add_f16_e32 v0, 1.0, v0 +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-FAKE16-NEXT: v_med3_f16 v0, v0, 2.0, 4.0 +; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-FAKE16-LABEL: v_test_nnan_input_fmed3_r_i_i_f16_maximum_minimum: +; GFX11-GISEL-FAKE16: ; %bb.0: +; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-FAKE16-NEXT: v_add_f16_e32 v0, 1.0, v0 +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-GISEL-FAKE16-NEXT: v_max_f16_e32 v1, 2.0, v0 +; GFX11-GISEL-FAKE16-NEXT: v_cmp_o_f16_e32 vcc_lo, 2.0, v0 +; GFX11-GISEL-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v1, vcc_lo +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-GISEL-FAKE16-NEXT: v_min_f16_e32 v1, 4.0, v0 +; GFX11-GISEL-FAKE16-NEXT: v_cmp_o_f16_e32 vcc_lo, 4.0, v0 +; GFX11-GISEL-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v1, vcc_lo +; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-TRUE16-LABEL: v_test_nnan_input_fmed3_r_i_i_f16_maximum_minimum: +; GFX11-SDAG-TRUE16: ; %bb.0: +; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-TRUE16-NEXT: v_add_f16_e32 v0.l, 1.0, v0.l +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-TRUE16-NEXT: v_med3_f16 v0.l, v0.l, 2.0, 4.0 +; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-TRUE16-LABEL: v_test_nnan_input_fmed3_r_i_i_f16_maximum_minimum: +; GFX11-GISEL-TRUE16: ; %bb.0: +; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-TRUE16-NEXT: v_add_f16_e32 v0.l, 1.0, v0.l +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-GISEL-TRUE16-NEXT: v_max_f16_e32 v0.h, 2.0, v0.l +; GFX11-GISEL-TRUE16-NEXT: v_cmp_o_f16_e32 vcc_lo, 2.0, v0.l +; GFX11-GISEL-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x7e00, v0.h, vcc_lo +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-GISEL-TRUE16-NEXT: v_min_f16_e32 v1.l, 4.0, v0.l +; GFX11-GISEL-TRUE16-NEXT: v_cmp_o_f16_e32 vcc_lo, 4.0, v0.l +; GFX11-GISEL-TRUE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v1, vcc_lo +; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] + %a.add = fadd nnan half %a, 1.0 + %max = call half @llvm.maximum.f16(half %a.add, half 2.0) + %med = call half @llvm.minimum.f16(half %max, half 4.0) + ret half %med +} + +define double @v_test_nnan_input_fmed3_r_i_i_f64_maximum_minimum(double %a) { +; SI-SDAG-LABEL: v_test_nnan_input_fmed3_r_i_i_f64_maximum_minimum: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: v_add_f64 v[0:1], v[0:1], 1.0 +; SI-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], 2.0 +; SI-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], 4.0 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; SI-GISEL-LABEL: v_test_nnan_input_fmed3_r_i_i_f64_maximum_minimum: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], 1.0 +; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x7ff80000 +; SI-GISEL-NEXT: v_max_f64 v[2:3], v[0:1], 2.0 +; SI-GISEL-NEXT: v_cmp_o_f64_e32 vcc, 2.0, v[0:1] +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc +; SI-GISEL-NEXT: v_min_f64 v[2:3], v[0:1], 4.0 +; SI-GISEL-NEXT: v_cmp_o_f64_e32 vcc, 4.0, v[0:1] +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-SDAG-LABEL: v_test_nnan_input_fmed3_r_i_i_f64_maximum_minimum: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: v_add_f64 v[0:1], v[0:1], 1.0 +; VI-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], 2.0 +; VI-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], 4.0 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_test_nnan_input_fmed3_r_i_i_f64_maximum_minimum: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], 1.0 +; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x7ff80000 +; VI-GISEL-NEXT: v_max_f64 v[2:3], v[0:1], 2.0 +; VI-GISEL-NEXT: v_cmp_o_f64_e32 vcc, 2.0, v[0:1] +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc +; VI-GISEL-NEXT: v_min_f64 v[2:3], v[0:1], 4.0 +; VI-GISEL-NEXT: v_cmp_o_f64_e32 vcc, 4.0, v[0:1] +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: v_test_nnan_input_fmed3_r_i_i_f64_maximum_minimum: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_add_f64 v[0:1], v[0:1], 1.0 +; GFX9-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], 2.0 +; GFX9-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], 4.0 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: v_test_nnan_input_fmed3_r_i_i_f64_maximum_minimum: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], 1.0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x7ff80000 +; GFX9-GISEL-NEXT: v_max_f64 v[2:3], v[0:1], 2.0 +; GFX9-GISEL-NEXT: v_cmp_o_f64_e32 vcc, 2.0, v[0:1] +; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc +; GFX9-GISEL-NEXT: v_min_f64 v[2:3], v[0:1], 4.0 +; GFX9-GISEL-NEXT: v_cmp_o_f64_e32 vcc, 4.0, v[0:1] +; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_test_nnan_input_fmed3_r_i_i_f64_maximum_minimum: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_add_f64 v[0:1], v[0:1], 1.0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], 2.0 +; GFX11-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], 4.0 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_test_nnan_input_fmed3_r_i_i_f64_maximum_minimum: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], 1.0 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-GISEL-NEXT: v_max_f64 v[2:3], v[0:1], 2.0 +; GFX11-GISEL-NEXT: v_cmp_o_f64_e32 vcc_lo, 2.0, v[0:1] +; GFX11-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX11-GISEL-NEXT: v_min_f64 v[2:3], v[0:1], 4.0 +; GFX11-GISEL-NEXT: v_cmp_o_f64_e32 vcc_lo, 4.0, v[0:1] +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo +; GFX11-GISEL-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] + %a.add = fadd nnan double %a, 1.0 + %max = call double @llvm.maximum.f64(double %a.add, double 2.0) + %med = call double @llvm.minimum.f64(double %max, double 4.0) + ret double %med +} + declare i32 @llvm.amdgcn.workitem.id.x() #0 declare float @llvm.fabs.f32(float) #0 declare float @llvm.minnum.f32(float, float) #0 diff --git a/llvm/test/CodeGen/AMDGPU/gep-flags-stack-offsets.ll b/llvm/test/CodeGen/AMDGPU/gep-flags-stack-offsets.ll index b5f0b2ff9ef4c..61902b5fd4661 100644 --- a/llvm/test/CodeGen/AMDGPU/gep-flags-stack-offsets.ll +++ b/llvm/test/CodeGen/AMDGPU/gep-flags-stack-offsets.ll @@ -18,8 +18,8 @@ define void @gep_noflags_alloca(i32 %idx, i32 %val) #0 { ; GFX9-LABEL: gep_noflags_alloca: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e64 v2, 6, s32 -; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v2 +; GFX9-NEXT: s_lshr_b32 s4, s32, 6 +; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, s4 ; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:16 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -45,8 +45,8 @@ define void @gep_inbounds_alloca(i32 %idx, i32 %val) #0 { ; GFX9-LABEL: gep_inbounds_alloca: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e64 v2, 6, s32 -; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v2 +; GFX9-NEXT: s_lshr_b32 s4, s32, 6 +; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, s4 ; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:16 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -72,8 +72,8 @@ define void @gep_nuw_alloca(i32 %idx, i32 %val) #0 { ; GFX9-LABEL: gep_nuw_alloca: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e64 v2, 6, s32 -; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v2 +; GFX9-NEXT: s_lshr_b32 s4, s32, 6 +; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, s4 ; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:16 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -99,8 +99,8 @@ define void @gep_nusw_alloca(i32 %idx, i32 %val) #0 { ; GFX9-LABEL: gep_nusw_alloca: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e64 v2, 6, s32 -; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v2 +; GFX9-NEXT: s_lshr_b32 s4, s32, 6 +; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, s4 ; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:16 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -126,8 +126,8 @@ define void @gep_inbounds_nuw_alloca(i32 %idx, i32 %val) #0 { ; GFX9-LABEL: gep_inbounds_nuw_alloca: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e64 v2, 6, s32 -; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v2 +; GFX9-NEXT: s_lshr_b32 s4, s32, 6 +; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, s4 ; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:16 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -153,8 +153,8 @@ define void @gep_nusw_nuw_alloca(i32 %idx, i32 %val) #0 { ; GFX9-LABEL: gep_nusw_nuw_alloca: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e64 v2, 6, s32 -; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v2 +; GFX9-NEXT: s_lshr_b32 s4, s32, 6 +; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, s4 ; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:16 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/hip.extern.shared.array.ll b/llvm/test/CodeGen/AMDGPU/hip.extern.shared.array.ll index 8bd6c0f2652cf..d24b3a23cb9cd 100644 --- a/llvm/test/CodeGen/AMDGPU/hip.extern.shared.array.ll +++ b/llvm/test/CodeGen/AMDGPU/hip.extern.shared.array.ll @@ -22,7 +22,7 @@ define amdgpu_kernel void @dynamic_shared_array_0(ptr addrspace(1) %out) { } ; CHECK-LABEL: {{^}}dynamic_shared_array_1: -; CHECK: v_mov_b32_e32 [[DYNLDS:v[0-9]+]], 0xc00 +; CHECK: s_movk_i32 [[DYNLDS:s[0-9]+]], 0xc00 ; CHECK: v_lshl_add_u32 {{v[0-9]+}}, {{v[0-9]+}}, 2, [[DYNLDS]] define amdgpu_kernel void @dynamic_shared_array_1(ptr addrspace(1) %out, i32 %cond) { entry: @@ -49,7 +49,7 @@ endif: ; preds = %else, %if } ; CHECK-LABEL: {{^}}dynamic_shared_array_2: -; CHECK: v_mov_b32_e32 [[DYNLDS:v[0-9]+]], 0x4000 +; CHECK: s_movk_i32 [[DYNLDS:s[0-9]+]], 0x4000 ; CHECK: v_lshl_add_u32 {{v[0-9]+}}, {{v[0-9]+}}, 2, [[DYNLDS]] define amdgpu_kernel void @dynamic_shared_array_2(i32 %idx) { %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -64,7 +64,7 @@ define amdgpu_kernel void @dynamic_shared_array_2(i32 %idx) { ; The offset to the dynamic shared memory array should be aligned on the type ; specified. ; CHECK-LABEL: {{^}}dynamic_shared_array_3: -; CHECK: v_mov_b32_e32 [[DYNLDS:v[0-9]+]], 0x44 +; CHECK: s_movk_i32 [[DYNLDS:s[0-9]+]], 0x44 ; CHECK: v_lshl_add_u32 {{v[0-9]+}}, {{v[0-9]+}}, 2, [[DYNLDS]] define amdgpu_kernel void @dynamic_shared_array_3(i32 %idx) { %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -80,7 +80,7 @@ define amdgpu_kernel void @dynamic_shared_array_3(i32 %idx) { ; The offset to the dynamic shared memory array should be aligned on the ; maximal one. ; CHECK-LABEL: {{^}}dynamic_shared_array_4: -; CHECK: v_mov_b32_e32 [[DYNLDS:v[0-9]+]], 0x48 +; CHECK: s_movk_i32 [[DYNLDS:s[0-9]+]], 0x48 ; CHECK-DAG: v_lshl_add_u32 {{v[0-9]+}}, {{v[0-9]+}}, 2, [[DYNLDS]] ; CHECK-DAG: v_lshl_add_u32 {{v[0-9]+}}, {{v[0-9]+}}, 3, [[DYNLDS]] define amdgpu_kernel void @dynamic_shared_array_4(i32 %idx) { @@ -99,7 +99,7 @@ define amdgpu_kernel void @dynamic_shared_array_4(i32 %idx) { ; Honor the explicit alignment from the specified variable. ; CHECK-LABEL: {{^}}dynamic_shared_array_5: -; CHECK: v_mov_b32_e32 [[DYNLDS:v[0-9]+]], 0x44 +; CHECK: s_movk_i32 [[DYNLDS:s[0-9]+]], 0x44 ; CHECK-DAG: v_lshl_add_u32 {{v[0-9]+}}, {{v[0-9]+}}, 2, [[DYNLDS]] ; CHECK-DAG: v_lshl_add_u32 {{v[0-9]+}}, {{v[0-9]+}}, 3, [[DYNLDS]] define amdgpu_kernel void @dynamic_shared_array_5(i32 %idx) { @@ -118,7 +118,7 @@ define amdgpu_kernel void @dynamic_shared_array_5(i32 %idx) { ; Honor the explicit alignment from the specified variable. ; CHECK-LABEL: {{^}}dynamic_shared_array_6: -; CHECK: v_mov_b32_e32 [[DYNLDS:v[0-9]+]], 0x50 +; CHECK: s_movk_i32 [[DYNLDS:s[0-9]+]], 0x50 ; CHECK-DAG: v_lshl_add_u32 {{v[0-9]+}}, {{v[0-9]+}}, 2, [[DYNLDS]] ; CHECK-DAG: v_lshl_add_u32 {{v[0-9]+}}, {{v[0-9]+}}, 3, [[DYNLDS]] define amdgpu_kernel void @dynamic_shared_array_6(i32 %idx) { diff --git a/llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll b/llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll index 3eef616ba267d..ad894ce36c55b 100644 --- a/llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll +++ b/llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll @@ -97,8 +97,7 @@ define amdgpu_kernel void @module_0_kernel_normal_extern_normal(i32 %idx) { ; CHECK-NEXT: v_mov_b32_e32 v0, 2 ; CHECK-NEXT: v_mov_b32_e32 v1, 0 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: s_lshl_b32 s0, s0, 2 -; CHECK-NEXT: s_add_i32 s0, s0, 4 +; CHECK-NEXT: s_lshl2_add_u32 s0, s0, 4 ; CHECK-NEXT: v_mov_b32_e32 v2, s0 ; CHECK-NEXT: ds_write_b16 v1, v0 ; CHECK-NEXT: ds_write_b32 v2, v1 @@ -136,10 +135,9 @@ define amdgpu_kernel void @module_1_kernel_normal_extern_normal(i32 %idx) { ; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[20:21] -; CHECK-NEXT: s_lshl_b32 s4, s17, 2 ; CHECK-NEXT: v_mov_b32_e32 v0, 1 ; CHECK-NEXT: v_mov_b32_e32 v1, 0 -; CHECK-NEXT: s_add_i32 s4, s4, 4 +; CHECK-NEXT: s_lshl2_add_u32 s4, s17, 4 ; CHECK-NEXT: v_mov_b32_e32 v2, 2 ; CHECK-NEXT: v_mov_b32_e32 v3, s4 ; CHECK-NEXT: ds_write_b16 v1, v0 @@ -163,8 +161,7 @@ define amdgpu_kernel void @module_0_kernel_overalign_extern_normal(i32 %idx) { ; CHECK-NEXT: v_mov_b32_e32 v0, 2 ; CHECK-NEXT: v_mov_b32_e32 v1, 0 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: s_lshl_b32 s0, s0, 2 -; CHECK-NEXT: s_add_i32 s0, s0, 4 +; CHECK-NEXT: s_lshl2_add_u32 s0, s0, 4 ; CHECK-NEXT: v_mov_b32_e32 v2, s0 ; CHECK-NEXT: ds_write_b16 v1, v0 ; CHECK-NEXT: ds_write_b32 v2, v1 @@ -202,10 +199,9 @@ define amdgpu_kernel void @module_1_kernel_overalign_extern_normal(i32 %idx) { ; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[20:21] -; CHECK-NEXT: s_lshl_b32 s4, s17, 2 ; CHECK-NEXT: v_mov_b32_e32 v0, 1 ; CHECK-NEXT: v_mov_b32_e32 v1, 0 -; CHECK-NEXT: s_add_i32 s4, s4, 8 +; CHECK-NEXT: s_lshl2_add_u32 s4, s17, 8 ; CHECK-NEXT: v_mov_b32_e32 v2, 2 ; CHECK-NEXT: v_mov_b32_e32 v3, s4 ; CHECK-NEXT: ds_write_b16 v1, v0 @@ -229,8 +225,7 @@ define amdgpu_kernel void @module_0_kernel_normal_extern_overalign(i32 %idx) { ; CHECK-NEXT: v_mov_b32_e32 v0, 2 ; CHECK-NEXT: v_mov_b32_e32 v1, 0 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: s_lshl_b32 s0, s0, 2 -; CHECK-NEXT: s_add_i32 s0, s0, 8 +; CHECK-NEXT: s_lshl2_add_u32 s0, s0, 8 ; CHECK-NEXT: v_mov_b32_e32 v2, s0 ; CHECK-NEXT: ds_write_b16 v1, v0 ; CHECK-NEXT: ds_write_b32 v2, v1 @@ -268,10 +263,9 @@ define amdgpu_kernel void @module_1_kernel_normal_extern_overalign(i32 %idx) { ; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[20:21] -; CHECK-NEXT: s_lshl_b32 s4, s17, 2 ; CHECK-NEXT: v_mov_b32_e32 v0, 1 ; CHECK-NEXT: v_mov_b32_e32 v1, 0 -; CHECK-NEXT: s_add_i32 s4, s4, 8 +; CHECK-NEXT: s_lshl2_add_u32 s4, s17, 8 ; CHECK-NEXT: v_mov_b32_e32 v2, 2 ; CHECK-NEXT: v_mov_b32_e32 v3, s4 ; CHECK-NEXT: ds_write_b16 v1, v0 @@ -295,8 +289,7 @@ define amdgpu_kernel void @module_0_kernel_overalign_extern_overalign(i32 %idx) ; CHECK-NEXT: v_mov_b32_e32 v0, 2 ; CHECK-NEXT: v_mov_b32_e32 v1, 0 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: s_lshl_b32 s0, s0, 2 -; CHECK-NEXT: s_add_i32 s0, s0, 8 +; CHECK-NEXT: s_lshl2_add_u32 s0, s0, 8 ; CHECK-NEXT: v_mov_b32_e32 v2, s0 ; CHECK-NEXT: ds_write_b16 v1, v0 ; CHECK-NEXT: ds_write_b32 v2, v1 @@ -334,10 +327,9 @@ define amdgpu_kernel void @module_1_kernel_overalign_extern_overalign(i32 %idx) ; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[20:21] -; CHECK-NEXT: s_lshl_b32 s4, s17, 2 ; CHECK-NEXT: v_mov_b32_e32 v0, 1 ; CHECK-NEXT: v_mov_b32_e32 v1, 0 -; CHECK-NEXT: s_add_i32 s4, s4, 8 +; CHECK-NEXT: s_lshl2_add_u32 s4, s17, 8 ; CHECK-NEXT: v_mov_b32_e32 v2, 2 ; CHECK-NEXT: v_mov_b32_e32 v3, s4 ; CHECK-NEXT: ds_write_b16 v1, v0 diff --git a/llvm/test/CodeGen/AMDGPU/lds-relocs.ll b/llvm/test/CodeGen/AMDGPU/lds-relocs.ll index 3c55dcb486675..447cb62643384 100644 --- a/llvm/test/CodeGen/AMDGPU/lds-relocs.ll +++ b/llvm/test/CodeGen/AMDGPU/lds-relocs.ll @@ -6,8 +6,8 @@ ; ELF: Relocations [ ; ELF-NEXT: Section (3) .rel.text { -; ELF-NEXT: 0x{{[0-9a-f]*}} R_AMDGPU_ABS32_LO lds.external -; ELF-NEXT: 0x{{[0-9a-f]*}} R_AMDGPU_ABS32_LO lds.defined +; ELF-NEXT: 0x{{[0-9A-F]*}} R_AMDGPU_ABS32_LO lds.external +; ELF-NEXT: 0x{{[0-9A-F]*}} R_AMDGPU_ABS32_LO lds.defined ; ELF-NEXT: } ; ELF-NEXT: ] @@ -32,10 +32,10 @@ ; ELF-NEXT: } ; GCN-LABEL: {{^}}test_basic: -; GCN: v_mov_b32_e32 v1, lds.external@abs32@lo ; encoding: [0xff,0x02,0x02,0x7e,A,A,A,A] +; GCN: s_mov_b32 s0, lds.external@abs32@lo ; encoding: [0xff,0x00,0x80,0xbe,A,A,A,A] ; GCN-NEXT: ; fixup A - offset: 4, value: lds.external@abs32@lo, kind: FK_Data_4{{$}} ; -; GCN: s_add_i32 s0, s0, lds.defined@abs32@lo ; encoding: [0x00,0xff,0x00,0x81,A,A,A,A] +; GCN: s_lshl2_add_u32 s0, s2, lds.defined@abs32@lo ; encoding: [0x02,0xff,0x80,0x97,A,A,A,A] ; GCN-NEXT: ; fixup A - offset: 4, value: lds.defined@abs32@lo, kind: FK_Data_4{{$}} ; ; GCN: .globl lds.external diff --git a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-opt.ll b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-opt.ll index 69439d49e588f..de82dcdecda48 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-opt.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-opt.ll @@ -102,10 +102,9 @@ define void @test_workgroup_id_x_non_kernel_optimized_fixed(ptr addrspace(1) %ou ; GFX1250-SDAG: ; %bb.0: ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 -; GFX1250-SDAG-NEXT: s_lshl_b32 s0, ttmp9, 1 -; GFX1250-SDAG-NEXT: s_and_b32 s1, ttmp6, 15 +; GFX1250-SDAG-NEXT: s_and_b32 s0, ttmp6, 15 ; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1250-SDAG-NEXT: s_add_co_i32 s0, s1, s0 +; GFX1250-SDAG-NEXT: s_lshl1_add_u32 s0, ttmp9, s0 ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s0 ; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v2, off ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll index 497241cff392d..6b6658bd672de 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll @@ -234,19 +234,18 @@ define amdgpu_cs void @workgroup_id_optimized() "amdgpu-cluster-dims"="2,3,4" { ; ; GFX1250-SDAG-LABEL: workgroup_id_optimized: ; GFX1250-SDAG: ; %bb.0: ; %.entry -; GFX1250-SDAG-NEXT: s_lshl_b32 s0, ttmp9, 1 -; GFX1250-SDAG-NEXT: s_and_b32 s1, ttmp6, 15 -; GFX1250-SDAG-NEXT: s_lshr_b32 s2, ttmp7, 14 -; GFX1250-SDAG-NEXT: s_add_co_i32 s1, s1, s0 -; GFX1250-SDAG-NEXT: s_and_b32 s0, s2, 0x3fffc +; GFX1250-SDAG-NEXT: s_lshr_b32 s1, ttmp7, 14 ; GFX1250-SDAG-NEXT: s_and_b32 s2, ttmp7, 0xffff +; GFX1250-SDAG-NEXT: s_and_b32 s0, ttmp6, 15 +; GFX1250-SDAG-NEXT: s_and_b32 s1, s1, 0x3fffc ; GFX1250-SDAG-NEXT: s_bfe_u32 s3, ttmp6, 0x40008 ; GFX1250-SDAG-NEXT: s_mul_i32 s2, s2, 3 ; GFX1250-SDAG-NEXT: s_bfe_u32 s4, ttmp6, 0x40004 -; GFX1250-SDAG-NEXT: s_add_co_i32 s3, s3, s0 +; GFX1250-SDAG-NEXT: s_lshl1_add_u32 s0, ttmp9, s0 +; GFX1250-SDAG-NEXT: s_add_co_i32 s3, s3, s1 ; GFX1250-SDAG-NEXT: s_add_co_i32 s4, s4, s2 ; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1250-SDAG-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s4 +; GFX1250-SDAG-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s4 ; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s3 ; GFX1250-SDAG-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null ; GFX1250-SDAG-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.gfx10.ll b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.gfx10.ll index 52671f5d3deb4..c4e77c457db3a 100644 --- a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.gfx10.ll +++ b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.gfx10.ll @@ -1622,15 +1622,14 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offse ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 ; GFX10_1-NEXT: v_writelane_b32 v1, s55, 0 ; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32 -; GFX10_1-NEXT: s_lshl_b32 s4, s16, 2 -; GFX10_1-NEXT: s_lshr_b32 s55, s32, 5 -; GFX10_1-NEXT: s_add_i32 s55, s55, s4 +; GFX10_1-NEXT: s_lshr_b32 s4, s32, 5 +; GFX10_1-NEXT: s_addk_i32 s4, 0x4040 +; GFX10_1-NEXT: s_lshl2_add_u32 s55, s16, s4 ; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0 -; GFX10_1-NEXT: s_addk_i32 s55, 0x4040 +; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; use alloca0 v0 ; GFX10_1-NEXT: ;;#ASMEND -; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; use s55, scc ; GFX10_1-NEXT: ;;#ASMEND @@ -1652,15 +1651,14 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offse ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 ; GFX10_3-NEXT: v_writelane_b32 v1, s55, 0 ; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32 -; GFX10_3-NEXT: s_lshl_b32 s4, s16, 2 -; GFX10_3-NEXT: s_lshr_b32 s55, s32, 5 -; GFX10_3-NEXT: s_add_i32 s55, s55, s4 +; GFX10_3-NEXT: s_lshr_b32 s4, s32, 5 +; GFX10_3-NEXT: s_addk_i32 s4, 0x4040 +; GFX10_3-NEXT: s_lshl2_add_u32 s55, s16, s4 ; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0 -; GFX10_3-NEXT: s_addk_i32 s55, 0x4040 +; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; use alloca0 v0 ; GFX10_3-NEXT: ;;#ASMEND -; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; use s55, scc ; GFX10_3-NEXT: ;;#ASMEND @@ -1681,13 +1679,13 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offse ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v1, s55, 0 ; GFX11-NEXT: s_add_i32 s1, s32, 64 -; GFX11-NEXT: s_lshl_b32 s0, s0, 2 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: v_mov_b32_e32 v0, s1 -; GFX11-NEXT: s_add_i32 s55, s32, s0 +; GFX11-NEXT: s_add_i32 s1, s32, 0x4040 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use alloca0 v0 ; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: s_addk_i32 s55, 0x4040 +; GFX11-NEXT: s_lshl2_add_u32 s55, s0, s1 ; GFX11-NEXT: s_and_b32 s0, 0, exec_lo ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use s55, scc @@ -1712,16 +1710,14 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offse ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_mov_b32 exec_lo, s1 ; GFX12-NEXT: v_writelane_b32 v1, s55, 0 -; GFX12-NEXT: s_lshl_b32 s0, s0, 2 +; GFX12-NEXT: s_add_co_i32 s1, s32, 0x4000 ; GFX12-NEXT: v_mov_b32_e32 v0, s32 ; GFX12-NEXT: s_wait_alu 0xfffe -; GFX12-NEXT: s_add_co_i32 s55, s32, s0 +; GFX12-NEXT: s_lshl2_add_u32 s55, s0, s1 +; GFX12-NEXT: s_and_b32 s0, 0, exec_lo ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; use alloca0 v0 ; GFX12-NEXT: ;;#ASMEND -; GFX12-NEXT: s_wait_alu 0xfffe -; GFX12-NEXT: s_addk_co_i32 s55, 0x4000 -; GFX12-NEXT: s_and_b32 s0, 0, exec_lo ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; use s55, scc ; GFX12-NEXT: ;;#ASMEND @@ -1770,10 +1766,9 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offse ; GFX900-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v1, s55, 0 -; GFX900-NEXT: s_lshl_b32 s4, s16, 2 -; GFX900-NEXT: s_lshr_b32 s55, s32, 6 -; GFX900-NEXT: s_add_i32 s55, s55, s4 -; GFX900-NEXT: s_addk_i32 s55, 0x4040 +; GFX900-NEXT: s_lshr_b32 s4, s32, 6 +; GFX900-NEXT: s_addk_i32 s4, 0x4040 +; GFX900-NEXT: s_lshl2_add_u32 s55, s16, s4 ; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 ; GFX900-NEXT: v_add_u32_e32 v0, 64, v0 ; GFX900-NEXT: ;;#ASMSTART @@ -1799,9 +1794,8 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offse ; GFX942-NEXT: scratch_store_dword off, v1, s1 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[2:3] ; GFX942-NEXT: v_writelane_b32 v1, s55, 0 -; GFX942-NEXT: s_lshl_b32 s0, s0, 2 -; GFX942-NEXT: s_add_i32 s55, s32, s0 -; GFX942-NEXT: s_addk_i32 s55, 0x4040 +; GFX942-NEXT: s_add_i32 s1, s32, 0x4040 +; GFX942-NEXT: s_lshl2_add_u32 s55, s0, s1 ; GFX942-NEXT: s_add_i32 s0, s32, 64 ; GFX942-NEXT: v_mov_b32_e32 v0, s0 ; GFX942-NEXT: ;;#ASMSTART diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-nontemporal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-nontemporal.ll index c6f7ce51f5ea2..9888204b997a9 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-nontemporal.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-nontemporal.ll @@ -260,12 +260,11 @@ define amdgpu_kernel void @local_nontemporal_load_1( ; GFX10-WGP-LABEL: local_nontemporal_load_1: ; GFX10-WGP: ; %bb.0: ; %entry ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, v0 -; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x0 +; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 -; GFX10-WGP-NEXT: s_mov_b32 s6, 2 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-WGP-NEXT: v_lshl_add_u32 v1, v1, s6, s7 +; GFX10-WGP-NEXT: v_lshl_add_u32 v1, v1, 2, s6 ; GFX10-WGP-NEXT: ds_read_b32 v1, v1 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] @@ -274,12 +273,11 @@ define amdgpu_kernel void @local_nontemporal_load_1( ; GFX10-CU-LABEL: local_nontemporal_load_1: ; GFX10-CU: ; %bb.0: ; %entry ; GFX10-CU-NEXT: v_mov_b32_e32 v1, v0 -; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x0 +; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 -; GFX10-CU-NEXT: s_mov_b32 s6, 2 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-CU-NEXT: v_lshl_add_u32 v1, v1, s6, s7 +; GFX10-CU-NEXT: v_lshl_add_u32 v1, v1, 2, s6 ; GFX10-CU-NEXT: ds_read_b32 v1, v1 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] @@ -311,15 +309,13 @@ define amdgpu_kernel void @local_nontemporal_load_1( ; GFX90A-NOTTGSPLIT-LABEL: local_nontemporal_load_1: ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, v0 -; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x0 +; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX90A-NOTTGSPLIT-NEXT: s_mov_b32 s6, 0x3ff -; GFX90A-NOTTGSPLIT-NEXT: v_and_b32_e64 v1, v1, s6 -; GFX90A-NOTTGSPLIT-NEXT: s_mov_b32 s6, 2 +; GFX90A-NOTTGSPLIT-NEXT: s_mov_b32 s7, 0x3ff +; GFX90A-NOTTGSPLIT-NEXT: v_and_b32_e64 v1, v1, s7 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 -; GFX90A-NOTTGSPLIT-NEXT: v_lshl_add_u32 v1, v1, s6, v2 +; GFX90A-NOTTGSPLIT-NEXT: v_lshl_add_u32 v1, v1, 2, s6 ; GFX90A-NOTTGSPLIT-NEXT: ds_read_b32 v1, v1 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] @@ -328,15 +324,13 @@ define amdgpu_kernel void @local_nontemporal_load_1( ; GFX90A-TGSPLIT-LABEL: local_nontemporal_load_1: ; GFX90A-TGSPLIT: ; %bb.0: ; %entry ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, v0 -; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x0 +; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX90A-TGSPLIT-NEXT: s_mov_b32 s6, 0x3ff -; GFX90A-TGSPLIT-NEXT: v_and_b32_e64 v1, v1, s6 -; GFX90A-TGSPLIT-NEXT: s_mov_b32 s6, 2 +; GFX90A-TGSPLIT-NEXT: s_mov_b32 s7, 0x3ff +; GFX90A-TGSPLIT-NEXT: v_and_b32_e64 v1, v1, s7 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 -; GFX90A-TGSPLIT-NEXT: v_lshl_add_u32 v1, v1, s6, v2 +; GFX90A-TGSPLIT-NEXT: v_lshl_add_u32 v1, v1, 2, s6 ; GFX90A-TGSPLIT-NEXT: ds_read_b32 v1, v1 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] @@ -345,15 +339,13 @@ define amdgpu_kernel void @local_nontemporal_load_1( ; GFX942-NOTTGSPLIT-LABEL: local_nontemporal_load_1: ; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry ; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, v0 -; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 ; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 ; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX942-NOTTGSPLIT-NEXT: s_mov_b32 s2, 0x3ff -; GFX942-NOTTGSPLIT-NEXT: v_and_b32_e64 v1, v1, s2 -; GFX942-NOTTGSPLIT-NEXT: s_mov_b32 s2, 2 +; GFX942-NOTTGSPLIT-NEXT: s_mov_b32 s3, 0x3ff +; GFX942-NOTTGSPLIT-NEXT: v_and_b32_e64 v1, v1, s3 ; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX942-NOTTGSPLIT-NEXT: v_lshl_add_u32 v1, v1, s2, v2 +; GFX942-NOTTGSPLIT-NEXT: v_lshl_add_u32 v1, v1, 2, s2 ; GFX942-NOTTGSPLIT-NEXT: ds_read_b32 v1, v1 ; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] @@ -362,15 +354,13 @@ define amdgpu_kernel void @local_nontemporal_load_1( ; GFX942-TGSPLIT-LABEL: local_nontemporal_load_1: ; GFX942-TGSPLIT: ; %bb.0: ; %entry ; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, v0 -; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 ; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 ; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX942-TGSPLIT-NEXT: s_mov_b32 s2, 0x3ff -; GFX942-TGSPLIT-NEXT: v_and_b32_e64 v1, v1, s2 -; GFX942-TGSPLIT-NEXT: s_mov_b32 s2, 2 +; GFX942-TGSPLIT-NEXT: s_mov_b32 s3, 0x3ff +; GFX942-TGSPLIT-NEXT: v_and_b32_e64 v1, v1, s3 ; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX942-TGSPLIT-NEXT: v_lshl_add_u32 v1, v1, s2, v2 +; GFX942-TGSPLIT-NEXT: v_lshl_add_u32 v1, v1, 2, s2 ; GFX942-TGSPLIT-NEXT: ds_read_b32 v1, v1 ; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] @@ -379,14 +369,13 @@ define amdgpu_kernel void @local_nontemporal_load_1( ; GFX11-WGP-LABEL: local_nontemporal_load_1: ; GFX11-WGP: ; %bb.0: ; %entry ; GFX11-WGP-NEXT: v_mov_b32_e32 v1, v0 -; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x0 +; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-WGP-NEXT: s_mov_b32 s2, 0x3ff -; GFX11-WGP-NEXT: v_and_b32_e64 v1, v1, s2 -; GFX11-WGP-NEXT: s_mov_b32 s2, 2 +; GFX11-WGP-NEXT: s_mov_b32 s3, 0x3ff +; GFX11-WGP-NEXT: v_and_b32_e64 v1, v1, s3 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-WGP-NEXT: v_lshl_add_u32 v1, v1, s2, s3 +; GFX11-WGP-NEXT: v_lshl_add_u32 v1, v1, 2, s2 ; GFX11-WGP-NEXT: ds_load_b32 v1, v1 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -395,14 +384,13 @@ define amdgpu_kernel void @local_nontemporal_load_1( ; GFX11-CU-LABEL: local_nontemporal_load_1: ; GFX11-CU: ; %bb.0: ; %entry ; GFX11-CU-NEXT: v_mov_b32_e32 v1, v0 -; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x0 +; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-CU-NEXT: s_mov_b32 s2, 0x3ff -; GFX11-CU-NEXT: v_and_b32_e64 v1, v1, s2 -; GFX11-CU-NEXT: s_mov_b32 s2, 2 +; GFX11-CU-NEXT: s_mov_b32 s3, 0x3ff +; GFX11-CU-NEXT: v_and_b32_e64 v1, v1, s3 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-CU-NEXT: v_lshl_add_u32 v1, v1, s2, s3 +; GFX11-CU-NEXT: v_lshl_add_u32 v1, v1, 2, s2 ; GFX11-CU-NEXT: ds_load_b32 v1, v1 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -411,15 +399,13 @@ define amdgpu_kernel void @local_nontemporal_load_1( ; GFX12-WGP-LABEL: local_nontemporal_load_1: ; GFX12-WGP: ; %bb.0: ; %entry ; GFX12-WGP-NEXT: v_mov_b32_e32 v1, v0 -; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x0 +; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 -; GFX12-WGP-NEXT: s_mov_b32 s2, 0x3ff -; GFX12-WGP-NEXT: v_and_b32_e64 v1, v1, s2 -; GFX12-WGP-NEXT: s_mov_b32 s2, 2 +; GFX12-WGP-NEXT: s_mov_b32 s3, 0x3ff +; GFX12-WGP-NEXT: v_and_b32_e64 v1, v1, s3 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 -; GFX12-WGP-NEXT: s_wait_alu 0xfffe -; GFX12-WGP-NEXT: v_lshl_add_u32 v1, v1, s2, s3 +; GFX12-WGP-NEXT: v_lshl_add_u32 v1, v1, 2, s2 ; GFX12-WGP-NEXT: ds_load_b32 v1, v1 ; GFX12-WGP-NEXT: s_wait_dscnt 0x0 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -428,15 +414,13 @@ define amdgpu_kernel void @local_nontemporal_load_1( ; GFX12-CU-LABEL: local_nontemporal_load_1: ; GFX12-CU: ; %bb.0: ; %entry ; GFX12-CU-NEXT: v_mov_b32_e32 v1, v0 -; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x0 +; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 -; GFX12-CU-NEXT: s_mov_b32 s2, 0x3ff -; GFX12-CU-NEXT: v_and_b32_e64 v1, v1, s2 -; GFX12-CU-NEXT: s_mov_b32 s2, 2 +; GFX12-CU-NEXT: s_mov_b32 s3, 0x3ff +; GFX12-CU-NEXT: v_and_b32_e64 v1, v1, s3 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 -; GFX12-CU-NEXT: s_wait_alu 0xfffe -; GFX12-CU-NEXT: v_lshl_add_u32 v1, v1, s2, s3 +; GFX12-CU-NEXT: v_lshl_add_u32 v1, v1, 2, s2 ; GFX12-CU-NEXT: ds_load_b32 v1, v1 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -445,14 +429,13 @@ define amdgpu_kernel void @local_nontemporal_load_1( ; GFX1250-LABEL: local_nontemporal_load_1: ; GFX1250: ; %bb.0: ; %entry ; GFX1250-NEXT: v_mov_b32_e32 v1, v0 -; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x0 +; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0 ; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 ; GFX1250-NEXT: v_mov_b32_e32 v0, 0 -; GFX1250-NEXT: s_mov_b32 s2, 0x3ff -; GFX1250-NEXT: v_and_b32_e64 v1, v1, s2 -; GFX1250-NEXT: s_mov_b32 s2, 2 +; GFX1250-NEXT: s_mov_b32 s3, 0x3ff +; GFX1250-NEXT: v_and_b32_e64 v1, v1, s3 ; GFX1250-NEXT: s_wait_kmcnt 0x0 -; GFX1250-NEXT: v_lshl_add_u32 v1, v1, s2, s3 +; GFX1250-NEXT: v_lshl_add_u32 v1, v1, 2, s2 ; GFX1250-NEXT: ds_load_b32 v1, v1 ; GFX1250-NEXT: s_wait_dscnt 0x0 ; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] @@ -679,12 +662,11 @@ define amdgpu_kernel void @local_nontemporal_store_1( ; ; GFX10-WGP-LABEL: local_nontemporal_store_1: ; GFX10-WGP: ; %bb.0: ; %entry -; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 -; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 +; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 +; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x8 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-WGP-NEXT: s_load_dword s4, s[4:5], 0x0 -; GFX10-WGP-NEXT: s_mov_b32 s5, 2 -; GFX10-WGP-NEXT: v_lshl_add_u32 v0, v0, s5, s6 +; GFX10-WGP-NEXT: s_load_dword s4, s[6:7], 0x0 +; GFX10-WGP-NEXT: v_lshl_add_u32 v0, v0, 2, s5 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s4 ; GFX10-WGP-NEXT: ds_write_b32 v0, v1 @@ -692,12 +674,11 @@ define amdgpu_kernel void @local_nontemporal_store_1( ; ; GFX10-CU-LABEL: local_nontemporal_store_1: ; GFX10-CU: ; %bb.0: ; %entry -; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 -; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 +; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 +; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x8 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-CU-NEXT: s_load_dword s4, s[4:5], 0x0 -; GFX10-CU-NEXT: s_mov_b32 s5, 2 -; GFX10-CU-NEXT: v_lshl_add_u32 v0, v0, s5, s6 +; GFX10-CU-NEXT: s_load_dword s4, s[6:7], 0x0 +; GFX10-CU-NEXT: v_lshl_add_u32 v0, v0, 2, s5 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s4 ; GFX10-CU-NEXT: ds_write_b32 v0, v1 @@ -720,15 +701,13 @@ define amdgpu_kernel void @local_nontemporal_store_1( ; ; GFX90A-NOTTGSPLIT-LABEL: local_nontemporal_store_1: ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 -; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 +; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 +; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[4:5], 0x0 -; GFX90A-NOTTGSPLIT-NEXT: s_mov_b32 s5, 0x3ff -; GFX90A-NOTTGSPLIT-NEXT: v_and_b32_e64 v0, v0, s5 -; GFX90A-NOTTGSPLIT-NEXT: s_mov_b32 s5, 2 -; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 -; GFX90A-NOTTGSPLIT-NEXT: v_lshl_add_u32 v0, v0, s5, v1 +; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[6:7], 0x0 +; GFX90A-NOTTGSPLIT-NEXT: s_mov_b32 s6, 0x3ff +; GFX90A-NOTTGSPLIT-NEXT: v_and_b32_e64 v0, v0, s6 +; GFX90A-NOTTGSPLIT-NEXT: v_lshl_add_u32 v0, v0, 2, s5 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -736,15 +715,13 @@ define amdgpu_kernel void @local_nontemporal_store_1( ; ; GFX90A-TGSPLIT-LABEL: local_nontemporal_store_1: ; GFX90A-TGSPLIT: ; %bb.0: ; %entry -; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 -; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 +; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 +; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[4:5], 0x0 -; GFX90A-TGSPLIT-NEXT: s_mov_b32 s5, 0x3ff -; GFX90A-TGSPLIT-NEXT: v_and_b32_e64 v0, v0, s5 -; GFX90A-TGSPLIT-NEXT: s_mov_b32 s5, 2 -; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 -; GFX90A-TGSPLIT-NEXT: v_lshl_add_u32 v0, v0, s5, v1 +; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[6:7], 0x0 +; GFX90A-TGSPLIT-NEXT: s_mov_b32 s6, 0x3ff +; GFX90A-TGSPLIT-NEXT: v_and_b32_e64 v0, v0, s6 +; GFX90A-TGSPLIT-NEXT: v_lshl_add_u32 v0, v0, 2, s5 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -752,15 +729,13 @@ define amdgpu_kernel void @local_nontemporal_store_1( ; ; GFX942-NOTTGSPLIT-LABEL: local_nontemporal_store_1: ; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 ; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[0:1], 0x0 -; GFX942-NOTTGSPLIT-NEXT: s_mov_b32 s1, 0x3ff -; GFX942-NOTTGSPLIT-NEXT: v_and_b32_e64 v0, v0, s1 -; GFX942-NOTTGSPLIT-NEXT: s_mov_b32 s1, 2 -; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX942-NOTTGSPLIT-NEXT: v_lshl_add_u32 v0, v0, s1, v1 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[2:3], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_mov_b32 s2, 0x3ff +; GFX942-NOTTGSPLIT-NEXT: v_and_b32_e64 v0, v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_lshl_add_u32 v0, v0, 2, s1 ; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 ; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -768,15 +743,13 @@ define amdgpu_kernel void @local_nontemporal_store_1( ; ; GFX942-TGSPLIT-LABEL: local_nontemporal_store_1: ; GFX942-TGSPLIT: ; %bb.0: ; %entry -; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 ; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[0:1], 0x0 -; GFX942-TGSPLIT-NEXT: s_mov_b32 s1, 0x3ff -; GFX942-TGSPLIT-NEXT: v_and_b32_e64 v0, v0, s1 -; GFX942-TGSPLIT-NEXT: s_mov_b32 s1, 2 -; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX942-TGSPLIT-NEXT: v_lshl_add_u32 v0, v0, s1, v1 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[2:3], 0x0 +; GFX942-TGSPLIT-NEXT: s_mov_b32 s2, 0x3ff +; GFX942-TGSPLIT-NEXT: v_and_b32_e64 v0, v0, s2 +; GFX942-TGSPLIT-NEXT: v_lshl_add_u32 v0, v0, 2, s1 ; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 ; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -784,14 +757,13 @@ define amdgpu_kernel void @local_nontemporal_store_1( ; ; GFX11-WGP-LABEL: local_nontemporal_store_1: ; GFX11-WGP: ; %bb.0: ; %entry -; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 -; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 +; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 +; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-WGP-NEXT: s_load_b32 s0, s[0:1], 0x0 -; GFX11-WGP-NEXT: s_mov_b32 s1, 0x3ff -; GFX11-WGP-NEXT: v_and_b32_e64 v0, v0, s1 -; GFX11-WGP-NEXT: s_mov_b32 s1, 2 -; GFX11-WGP-NEXT: v_lshl_add_u32 v0, v0, s1, s2 +; GFX11-WGP-NEXT: s_load_b32 s0, s[2:3], 0x0 +; GFX11-WGP-NEXT: s_mov_b32 s2, 0x3ff +; GFX11-WGP-NEXT: v_and_b32_e64 v0, v0, s2 +; GFX11-WGP-NEXT: v_lshl_add_u32 v0, v0, 2, s1 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s0 ; GFX11-WGP-NEXT: ds_store_b32 v0, v1 @@ -799,14 +771,13 @@ define amdgpu_kernel void @local_nontemporal_store_1( ; ; GFX11-CU-LABEL: local_nontemporal_store_1: ; GFX11-CU: ; %bb.0: ; %entry -; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 -; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 +; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 +; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x8 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-CU-NEXT: s_load_b32 s0, s[0:1], 0x0 -; GFX11-CU-NEXT: s_mov_b32 s1, 0x3ff -; GFX11-CU-NEXT: v_and_b32_e64 v0, v0, s1 -; GFX11-CU-NEXT: s_mov_b32 s1, 2 -; GFX11-CU-NEXT: v_lshl_add_u32 v0, v0, s1, s2 +; GFX11-CU-NEXT: s_load_b32 s0, s[2:3], 0x0 +; GFX11-CU-NEXT: s_mov_b32 s2, 0x3ff +; GFX11-CU-NEXT: v_and_b32_e64 v0, v0, s2 +; GFX11-CU-NEXT: v_lshl_add_u32 v0, v0, 2, s1 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s0 ; GFX11-CU-NEXT: ds_store_b32 v0, v1 @@ -814,15 +785,13 @@ define amdgpu_kernel void @local_nontemporal_store_1( ; ; GFX12-WGP-LABEL: local_nontemporal_store_1: ; GFX12-WGP: ; %bb.0: ; %entry -; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 -; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 +; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 +; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 -; GFX12-WGP-NEXT: s_load_b32 s0, s[0:1], 0x0 -; GFX12-WGP-NEXT: s_mov_b32 s1, 0x3ff -; GFX12-WGP-NEXT: v_and_b32_e64 v0, v0, s1 -; GFX12-WGP-NEXT: s_mov_b32 s1, 2 -; GFX12-WGP-NEXT: s_wait_alu 0xfffe -; GFX12-WGP-NEXT: v_lshl_add_u32 v0, v0, s1, s2 +; GFX12-WGP-NEXT: s_load_b32 s0, s[2:3], 0x0 +; GFX12-WGP-NEXT: s_mov_b32 s2, 0x3ff +; GFX12-WGP-NEXT: v_and_b32_e64 v0, v0, s2 +; GFX12-WGP-NEXT: v_lshl_add_u32 v0, v0, 2, s1 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 ; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s0 ; GFX12-WGP-NEXT: ds_store_b32 v0, v1 @@ -830,15 +799,13 @@ define amdgpu_kernel void @local_nontemporal_store_1( ; ; GFX12-CU-LABEL: local_nontemporal_store_1: ; GFX12-CU: ; %bb.0: ; %entry -; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 -; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 +; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 +; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x8 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 -; GFX12-CU-NEXT: s_load_b32 s0, s[0:1], 0x0 -; GFX12-CU-NEXT: s_mov_b32 s1, 0x3ff -; GFX12-CU-NEXT: v_and_b32_e64 v0, v0, s1 -; GFX12-CU-NEXT: s_mov_b32 s1, 2 -; GFX12-CU-NEXT: s_wait_alu 0xfffe -; GFX12-CU-NEXT: v_lshl_add_u32 v0, v0, s1, s2 +; GFX12-CU-NEXT: s_load_b32 s0, s[2:3], 0x0 +; GFX12-CU-NEXT: s_mov_b32 s2, 0x3ff +; GFX12-CU-NEXT: v_and_b32_e64 v0, v0, s2 +; GFX12-CU-NEXT: v_lshl_add_u32 v0, v0, 2, s1 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0 ; GFX12-CU-NEXT: ds_store_b32 v0, v1 @@ -846,15 +813,14 @@ define amdgpu_kernel void @local_nontemporal_store_1( ; ; GFX1250-LABEL: local_nontemporal_store_1: ; GFX1250: ; %bb.0: ; %entry -; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 -; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8 +; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 +; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8 ; GFX1250-NEXT: s_wait_kmcnt 0x0 -; GFX1250-NEXT: s_load_b32 s0, s[0:1], 0x0 +; GFX1250-NEXT: s_load_b32 s0, s[2:3], 0x0 ; GFX1250-NEXT: s_wait_xcnt 0x0 -; GFX1250-NEXT: s_mov_b32 s1, 0x3ff -; GFX1250-NEXT: v_and_b32_e64 v0, v0, s1 -; GFX1250-NEXT: s_mov_b32 s1, 2 -; GFX1250-NEXT: v_lshl_add_u32 v0, v0, s1, s2 +; GFX1250-NEXT: s_mov_b32 s2, 0x3ff +; GFX1250-NEXT: v_and_b32_e64 v0, v0, s2 +; GFX1250-NEXT: v_lshl_add_u32 v0, v0, 2, s1 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s0 ; GFX1250-NEXT: ds_store_b32 v0, v1 diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll index d686e7a2d5b4c..33c516c61e42c 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll @@ -208,12 +208,11 @@ define amdgpu_kernel void @local_volatile_load_1( ; GFX10-WGP-LABEL: local_volatile_load_1: ; GFX10-WGP: ; %bb.0: ; %entry ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, v0 -; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x0 +; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 -; GFX10-WGP-NEXT: s_mov_b32 s6, 2 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-WGP-NEXT: v_lshl_add_u32 v1, v1, s6, s7 +; GFX10-WGP-NEXT: v_lshl_add_u32 v1, v1, 2, s6 ; GFX10-WGP-NEXT: ds_read_b32 v1, v1 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] @@ -222,12 +221,11 @@ define amdgpu_kernel void @local_volatile_load_1( ; GFX10-CU-LABEL: local_volatile_load_1: ; GFX10-CU: ; %bb.0: ; %entry ; GFX10-CU-NEXT: v_mov_b32_e32 v1, v0 -; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x0 +; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 -; GFX10-CU-NEXT: s_mov_b32 s6, 2 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-CU-NEXT: v_lshl_add_u32 v1, v1, s6, s7 +; GFX10-CU-NEXT: v_lshl_add_u32 v1, v1, 2, s6 ; GFX10-CU-NEXT: ds_read_b32 v1, v1 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] @@ -259,14 +257,13 @@ define amdgpu_kernel void @local_volatile_load_1( ; GFX11-WGP-LABEL: local_volatile_load_1: ; GFX11-WGP: ; %bb.0: ; %entry ; GFX11-WGP-NEXT: v_mov_b32_e32 v1, v0 -; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x0 +; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-WGP-NEXT: s_mov_b32 s2, 0x3ff -; GFX11-WGP-NEXT: v_and_b32_e64 v1, v1, s2 -; GFX11-WGP-NEXT: s_mov_b32 s2, 2 +; GFX11-WGP-NEXT: s_mov_b32 s3, 0x3ff +; GFX11-WGP-NEXT: v_and_b32_e64 v1, v1, s3 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-WGP-NEXT: v_lshl_add_u32 v1, v1, s2, s3 +; GFX11-WGP-NEXT: v_lshl_add_u32 v1, v1, 2, s2 ; GFX11-WGP-NEXT: ds_load_b32 v1, v1 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -275,14 +272,13 @@ define amdgpu_kernel void @local_volatile_load_1( ; GFX11-CU-LABEL: local_volatile_load_1: ; GFX11-CU: ; %bb.0: ; %entry ; GFX11-CU-NEXT: v_mov_b32_e32 v1, v0 -; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x0 +; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-CU-NEXT: s_mov_b32 s2, 0x3ff -; GFX11-CU-NEXT: v_and_b32_e64 v1, v1, s2 -; GFX11-CU-NEXT: s_mov_b32 s2, 2 +; GFX11-CU-NEXT: s_mov_b32 s3, 0x3ff +; GFX11-CU-NEXT: v_and_b32_e64 v1, v1, s3 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-CU-NEXT: v_lshl_add_u32 v1, v1, s2, s3 +; GFX11-CU-NEXT: v_lshl_add_u32 v1, v1, 2, s2 ; GFX11-CU-NEXT: ds_load_b32 v1, v1 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -291,15 +287,13 @@ define amdgpu_kernel void @local_volatile_load_1( ; GFX12-WGP-LABEL: local_volatile_load_1: ; GFX12-WGP: ; %bb.0: ; %entry ; GFX12-WGP-NEXT: v_mov_b32_e32 v1, v0 -; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x0 +; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 -; GFX12-WGP-NEXT: s_mov_b32 s2, 0x3ff -; GFX12-WGP-NEXT: v_and_b32_e64 v1, v1, s2 -; GFX12-WGP-NEXT: s_mov_b32 s2, 2 +; GFX12-WGP-NEXT: s_mov_b32 s3, 0x3ff +; GFX12-WGP-NEXT: v_and_b32_e64 v1, v1, s3 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 -; GFX12-WGP-NEXT: s_wait_alu 0xfffe -; GFX12-WGP-NEXT: v_lshl_add_u32 v1, v1, s2, s3 +; GFX12-WGP-NEXT: v_lshl_add_u32 v1, v1, 2, s2 ; GFX12-WGP-NEXT: ds_load_b32 v1, v1 ; GFX12-WGP-NEXT: s_wait_dscnt 0x0 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -308,15 +302,13 @@ define amdgpu_kernel void @local_volatile_load_1( ; GFX12-CU-LABEL: local_volatile_load_1: ; GFX12-CU: ; %bb.0: ; %entry ; GFX12-CU-NEXT: v_mov_b32_e32 v1, v0 -; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x0 +; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 -; GFX12-CU-NEXT: s_mov_b32 s2, 0x3ff -; GFX12-CU-NEXT: v_and_b32_e64 v1, v1, s2 -; GFX12-CU-NEXT: s_mov_b32 s2, 2 +; GFX12-CU-NEXT: s_mov_b32 s3, 0x3ff +; GFX12-CU-NEXT: v_and_b32_e64 v1, v1, s3 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 -; GFX12-CU-NEXT: s_wait_alu 0xfffe -; GFX12-CU-NEXT: v_lshl_add_u32 v1, v1, s2, s3 +; GFX12-CU-NEXT: v_lshl_add_u32 v1, v1, 2, s2 ; GFX12-CU-NEXT: ds_load_b32 v1, v1 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -325,14 +317,13 @@ define amdgpu_kernel void @local_volatile_load_1( ; GFX1250-LABEL: local_volatile_load_1: ; GFX1250: ; %bb.0: ; %entry ; GFX1250-NEXT: v_mov_b32_e32 v1, v0 -; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x0 +; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0 ; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 ; GFX1250-NEXT: v_mov_b32_e32 v0, 0 -; GFX1250-NEXT: s_mov_b32 s2, 0x3ff -; GFX1250-NEXT: v_and_b32_e64 v1, v1, s2 -; GFX1250-NEXT: s_mov_b32 s2, 2 +; GFX1250-NEXT: s_mov_b32 s3, 0x3ff +; GFX1250-NEXT: v_and_b32_e64 v1, v1, s3 ; GFX1250-NEXT: s_wait_kmcnt 0x0 -; GFX1250-NEXT: v_lshl_add_u32 v1, v1, s2, s3 +; GFX1250-NEXT: v_lshl_add_u32 v1, v1, 2, s2 ; GFX1250-NEXT: ds_load_b32 v1, v1 ; GFX1250-NEXT: s_wait_dscnt 0x0 ; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] @@ -511,12 +502,11 @@ define amdgpu_kernel void @local_volatile_store_1( ; ; GFX10-WGP-LABEL: local_volatile_store_1: ; GFX10-WGP: ; %bb.0: ; %entry -; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 -; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 +; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 +; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x8 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-WGP-NEXT: s_load_dword s4, s[4:5], 0x0 -; GFX10-WGP-NEXT: s_mov_b32 s5, 2 -; GFX10-WGP-NEXT: v_lshl_add_u32 v0, v0, s5, s6 +; GFX10-WGP-NEXT: s_load_dword s4, s[6:7], 0x0 +; GFX10-WGP-NEXT: v_lshl_add_u32 v0, v0, 2, s5 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s4 ; GFX10-WGP-NEXT: ds_write_b32 v0, v1 @@ -524,12 +514,11 @@ define amdgpu_kernel void @local_volatile_store_1( ; ; GFX10-CU-LABEL: local_volatile_store_1: ; GFX10-CU: ; %bb.0: ; %entry -; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 -; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 +; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 +; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x8 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-CU-NEXT: s_load_dword s4, s[4:5], 0x0 -; GFX10-CU-NEXT: s_mov_b32 s5, 2 -; GFX10-CU-NEXT: v_lshl_add_u32 v0, v0, s5, s6 +; GFX10-CU-NEXT: s_load_dword s4, s[6:7], 0x0 +; GFX10-CU-NEXT: v_lshl_add_u32 v0, v0, 2, s5 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s4 ; GFX10-CU-NEXT: ds_write_b32 v0, v1 @@ -552,14 +541,13 @@ define amdgpu_kernel void @local_volatile_store_1( ; ; GFX11-WGP-LABEL: local_volatile_store_1: ; GFX11-WGP: ; %bb.0: ; %entry -; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 -; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 +; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 +; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-WGP-NEXT: s_load_b32 s0, s[0:1], 0x0 -; GFX11-WGP-NEXT: s_mov_b32 s1, 0x3ff -; GFX11-WGP-NEXT: v_and_b32_e64 v0, v0, s1 -; GFX11-WGP-NEXT: s_mov_b32 s1, 2 -; GFX11-WGP-NEXT: v_lshl_add_u32 v0, v0, s1, s2 +; GFX11-WGP-NEXT: s_load_b32 s0, s[2:3], 0x0 +; GFX11-WGP-NEXT: s_mov_b32 s2, 0x3ff +; GFX11-WGP-NEXT: v_and_b32_e64 v0, v0, s2 +; GFX11-WGP-NEXT: v_lshl_add_u32 v0, v0, 2, s1 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s0 ; GFX11-WGP-NEXT: ds_store_b32 v0, v1 @@ -567,14 +555,13 @@ define amdgpu_kernel void @local_volatile_store_1( ; ; GFX11-CU-LABEL: local_volatile_store_1: ; GFX11-CU: ; %bb.0: ; %entry -; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 -; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 +; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 +; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x8 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-CU-NEXT: s_load_b32 s0, s[0:1], 0x0 -; GFX11-CU-NEXT: s_mov_b32 s1, 0x3ff -; GFX11-CU-NEXT: v_and_b32_e64 v0, v0, s1 -; GFX11-CU-NEXT: s_mov_b32 s1, 2 -; GFX11-CU-NEXT: v_lshl_add_u32 v0, v0, s1, s2 +; GFX11-CU-NEXT: s_load_b32 s0, s[2:3], 0x0 +; GFX11-CU-NEXT: s_mov_b32 s2, 0x3ff +; GFX11-CU-NEXT: v_and_b32_e64 v0, v0, s2 +; GFX11-CU-NEXT: v_lshl_add_u32 v0, v0, 2, s1 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s0 ; GFX11-CU-NEXT: ds_store_b32 v0, v1 @@ -582,15 +569,13 @@ define amdgpu_kernel void @local_volatile_store_1( ; ; GFX12-WGP-LABEL: local_volatile_store_1: ; GFX12-WGP: ; %bb.0: ; %entry -; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 -; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 +; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 +; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 -; GFX12-WGP-NEXT: s_load_b32 s0, s[0:1], 0x0 -; GFX12-WGP-NEXT: s_mov_b32 s1, 0x3ff -; GFX12-WGP-NEXT: v_and_b32_e64 v0, v0, s1 -; GFX12-WGP-NEXT: s_mov_b32 s1, 2 -; GFX12-WGP-NEXT: s_wait_alu 0xfffe -; GFX12-WGP-NEXT: v_lshl_add_u32 v0, v0, s1, s2 +; GFX12-WGP-NEXT: s_load_b32 s0, s[2:3], 0x0 +; GFX12-WGP-NEXT: s_mov_b32 s2, 0x3ff +; GFX12-WGP-NEXT: v_and_b32_e64 v0, v0, s2 +; GFX12-WGP-NEXT: v_lshl_add_u32 v0, v0, 2, s1 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 ; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s0 ; GFX12-WGP-NEXT: ds_store_b32 v0, v1 @@ -598,15 +583,13 @@ define amdgpu_kernel void @local_volatile_store_1( ; ; GFX12-CU-LABEL: local_volatile_store_1: ; GFX12-CU: ; %bb.0: ; %entry -; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 -; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 +; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 +; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x8 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 -; GFX12-CU-NEXT: s_load_b32 s0, s[0:1], 0x0 -; GFX12-CU-NEXT: s_mov_b32 s1, 0x3ff -; GFX12-CU-NEXT: v_and_b32_e64 v0, v0, s1 -; GFX12-CU-NEXT: s_mov_b32 s1, 2 -; GFX12-CU-NEXT: s_wait_alu 0xfffe -; GFX12-CU-NEXT: v_lshl_add_u32 v0, v0, s1, s2 +; GFX12-CU-NEXT: s_load_b32 s0, s[2:3], 0x0 +; GFX12-CU-NEXT: s_mov_b32 s2, 0x3ff +; GFX12-CU-NEXT: v_and_b32_e64 v0, v0, s2 +; GFX12-CU-NEXT: v_lshl_add_u32 v0, v0, 2, s1 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0 ; GFX12-CU-NEXT: ds_store_b32 v0, v1 @@ -614,15 +597,14 @@ define amdgpu_kernel void @local_volatile_store_1( ; ; GFX1250-LABEL: local_volatile_store_1: ; GFX1250: ; %bb.0: ; %entry -; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 -; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8 +; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 +; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8 ; GFX1250-NEXT: s_wait_kmcnt 0x0 -; GFX1250-NEXT: s_load_b32 s0, s[0:1], 0x0 +; GFX1250-NEXT: s_load_b32 s0, s[2:3], 0x0 ; GFX1250-NEXT: s_wait_xcnt 0x0 -; GFX1250-NEXT: s_mov_b32 s1, 0x3ff -; GFX1250-NEXT: v_and_b32_e64 v0, v0, s1 -; GFX1250-NEXT: s_mov_b32 s1, 2 -; GFX1250-NEXT: v_lshl_add_u32 v0, v0, s1, s2 +; GFX1250-NEXT: s_mov_b32 s2, 0x3ff +; GFX1250-NEXT: v_and_b32_e64 v0, v0, s2 +; GFX1250-NEXT: v_lshl_add_u32 v0, v0, 2, s1 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s0 ; GFX1250-NEXT: ds_store_b32 v0, v1 diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll index 89de17ecbd1e8..6c19722ad6e33 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll @@ -270,12 +270,11 @@ define amdgpu_kernel void @private_nontemporal_load_1( ; GFX10-WGP-NEXT: s_add_u32 s0, s0, s17 ; GFX10-WGP-NEXT: s_addc_u32 s1, s1, 0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, v0 -; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x0 +; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 -; GFX10-WGP-NEXT: s_mov_b32 s6, 2 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-WGP-NEXT: v_lshl_add_u32 v1, v1, s6, s7 +; GFX10-WGP-NEXT: v_lshl_add_u32 v1, v1, 2, s6 ; GFX10-WGP-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen slc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] @@ -286,12 +285,11 @@ define amdgpu_kernel void @private_nontemporal_load_1( ; GFX10-CU-NEXT: s_add_u32 s0, s0, s17 ; GFX10-CU-NEXT: s_addc_u32 s1, s1, 0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, v0 -; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x0 +; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 -; GFX10-CU-NEXT: s_mov_b32 s6, 2 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-CU-NEXT: v_lshl_add_u32 v1, v1, s6, s7 +; GFX10-CU-NEXT: v_lshl_add_u32 v1, v1, 2, s6 ; GFX10-CU-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen slc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] @@ -330,15 +328,13 @@ define amdgpu_kernel void @private_nontemporal_load_1( ; GFX90A-NOTTGSPLIT-NEXT: s_add_u32 s0, s0, s17 ; GFX90A-NOTTGSPLIT-NEXT: s_addc_u32 s1, s1, 0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, v0 -; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x0 +; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0 ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX90A-NOTTGSPLIT-NEXT: s_mov_b32 s6, 0x3ff -; GFX90A-NOTTGSPLIT-NEXT: v_and_b32_e64 v1, v1, s6 -; GFX90A-NOTTGSPLIT-NEXT: s_mov_b32 s6, 2 +; GFX90A-NOTTGSPLIT-NEXT: s_mov_b32 s7, 0x3ff +; GFX90A-NOTTGSPLIT-NEXT: v_and_b32_e64 v1, v1, s7 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 -; GFX90A-NOTTGSPLIT-NEXT: v_lshl_add_u32 v1, v1, s6, v2 +; GFX90A-NOTTGSPLIT-NEXT: v_lshl_add_u32 v1, v1, 2, s6 ; GFX90A-NOTTGSPLIT-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen glc slc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] @@ -349,15 +345,13 @@ define amdgpu_kernel void @private_nontemporal_load_1( ; GFX90A-TGSPLIT-NEXT: s_add_u32 s0, s0, s17 ; GFX90A-TGSPLIT-NEXT: s_addc_u32 s1, s1, 0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, v0 -; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x0 +; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0 ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX90A-TGSPLIT-NEXT: s_mov_b32 s6, 0x3ff -; GFX90A-TGSPLIT-NEXT: v_and_b32_e64 v1, v1, s6 -; GFX90A-TGSPLIT-NEXT: s_mov_b32 s6, 2 +; GFX90A-TGSPLIT-NEXT: s_mov_b32 s7, 0x3ff +; GFX90A-TGSPLIT-NEXT: v_and_b32_e64 v1, v1, s7 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 -; GFX90A-TGSPLIT-NEXT: v_lshl_add_u32 v1, v1, s6, v2 +; GFX90A-TGSPLIT-NEXT: v_lshl_add_u32 v1, v1, 2, s6 ; GFX90A-TGSPLIT-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen glc slc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] @@ -366,15 +360,13 @@ define amdgpu_kernel void @private_nontemporal_load_1( ; GFX942-NOTTGSPLIT-LABEL: private_nontemporal_load_1: ; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry ; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, v0 -; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 ; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 ; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX942-NOTTGSPLIT-NEXT: s_mov_b32 s2, 0x3ff -; GFX942-NOTTGSPLIT-NEXT: v_and_b32_e64 v1, v1, s2 -; GFX942-NOTTGSPLIT-NEXT: s_mov_b32 s2, 2 +; GFX942-NOTTGSPLIT-NEXT: s_mov_b32 s3, 0x3ff +; GFX942-NOTTGSPLIT-NEXT: v_and_b32_e64 v1, v1, s3 ; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX942-NOTTGSPLIT-NEXT: v_lshl_add_u32 v1, v1, s2, v2 +; GFX942-NOTTGSPLIT-NEXT: v_lshl_add_u32 v1, v1, 2, s2 ; GFX942-NOTTGSPLIT-NEXT: scratch_load_dword v1, v1, off nt ; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] @@ -383,15 +375,13 @@ define amdgpu_kernel void @private_nontemporal_load_1( ; GFX942-TGSPLIT-LABEL: private_nontemporal_load_1: ; GFX942-TGSPLIT: ; %bb.0: ; %entry ; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, v0 -; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 ; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 ; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX942-TGSPLIT-NEXT: s_mov_b32 s2, 0x3ff -; GFX942-TGSPLIT-NEXT: v_and_b32_e64 v1, v1, s2 -; GFX942-TGSPLIT-NEXT: s_mov_b32 s2, 2 +; GFX942-TGSPLIT-NEXT: s_mov_b32 s3, 0x3ff +; GFX942-TGSPLIT-NEXT: v_and_b32_e64 v1, v1, s3 ; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX942-TGSPLIT-NEXT: v_lshl_add_u32 v1, v1, s2, v2 +; GFX942-TGSPLIT-NEXT: v_lshl_add_u32 v1, v1, 2, s2 ; GFX942-TGSPLIT-NEXT: scratch_load_dword v1, v1, off nt ; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] @@ -400,14 +390,13 @@ define amdgpu_kernel void @private_nontemporal_load_1( ; GFX11-WGP-LABEL: private_nontemporal_load_1: ; GFX11-WGP: ; %bb.0: ; %entry ; GFX11-WGP-NEXT: v_mov_b32_e32 v1, v0 -; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x0 +; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-WGP-NEXT: s_mov_b32 s2, 0x3ff -; GFX11-WGP-NEXT: v_and_b32_e64 v1, v1, s2 -; GFX11-WGP-NEXT: s_mov_b32 s2, 2 +; GFX11-WGP-NEXT: s_mov_b32 s3, 0x3ff +; GFX11-WGP-NEXT: v_and_b32_e64 v1, v1, s3 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-WGP-NEXT: v_lshl_add_u32 v1, v1, s2, s3 +; GFX11-WGP-NEXT: v_lshl_add_u32 v1, v1, 2, s2 ; GFX11-WGP-NEXT: scratch_load_b32 v1, v1, off slc dlc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -416,14 +405,13 @@ define amdgpu_kernel void @private_nontemporal_load_1( ; GFX11-CU-LABEL: private_nontemporal_load_1: ; GFX11-CU: ; %bb.0: ; %entry ; GFX11-CU-NEXT: v_mov_b32_e32 v1, v0 -; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x0 +; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-CU-NEXT: s_mov_b32 s2, 0x3ff -; GFX11-CU-NEXT: v_and_b32_e64 v1, v1, s2 -; GFX11-CU-NEXT: s_mov_b32 s2, 2 +; GFX11-CU-NEXT: s_mov_b32 s3, 0x3ff +; GFX11-CU-NEXT: v_and_b32_e64 v1, v1, s3 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-CU-NEXT: v_lshl_add_u32 v1, v1, s2, s3 +; GFX11-CU-NEXT: v_lshl_add_u32 v1, v1, 2, s2 ; GFX11-CU-NEXT: scratch_load_b32 v1, v1, off slc dlc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -708,12 +696,11 @@ define amdgpu_kernel void @private_nontemporal_store_1( ; GFX10-WGP: ; %bb.0: ; %entry ; GFX10-WGP-NEXT: s_add_u32 s0, s0, s17 ; GFX10-WGP-NEXT: s_addc_u32 s1, s1, 0 -; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 -; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 +; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 +; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x8 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-WGP-NEXT: s_load_dword s4, s[4:5], 0x0 -; GFX10-WGP-NEXT: s_mov_b32 s5, 2 -; GFX10-WGP-NEXT: v_lshl_add_u32 v1, v0, s5, s6 +; GFX10-WGP-NEXT: s_load_dword s4, s[6:7], 0x0 +; GFX10-WGP-NEXT: v_lshl_add_u32 v1, v0, 2, s5 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-WGP-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen glc slc @@ -723,12 +710,11 @@ define amdgpu_kernel void @private_nontemporal_store_1( ; GFX10-CU: ; %bb.0: ; %entry ; GFX10-CU-NEXT: s_add_u32 s0, s0, s17 ; GFX10-CU-NEXT: s_addc_u32 s1, s1, 0 -; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 -; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 +; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 +; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x8 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-CU-NEXT: s_load_dword s4, s[4:5], 0x0 -; GFX10-CU-NEXT: s_mov_b32 s5, 2 -; GFX10-CU-NEXT: v_lshl_add_u32 v1, v0, s5, s6 +; GFX10-CU-NEXT: s_load_dword s4, s[6:7], 0x0 +; GFX10-CU-NEXT: v_lshl_add_u32 v1, v0, 2, s5 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen glc slc @@ -758,15 +744,13 @@ define amdgpu_kernel void @private_nontemporal_store_1( ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry ; GFX90A-NOTTGSPLIT-NEXT: s_add_u32 s0, s0, s17 ; GFX90A-NOTTGSPLIT-NEXT: s_addc_u32 s1, s1, 0 -; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 -; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 +; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 +; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[4:5], 0x0 -; GFX90A-NOTTGSPLIT-NEXT: s_mov_b32 s5, 0x3ff -; GFX90A-NOTTGSPLIT-NEXT: v_and_b32_e64 v0, v0, s5 -; GFX90A-NOTTGSPLIT-NEXT: s_mov_b32 s5, 2 -; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 -; GFX90A-NOTTGSPLIT-NEXT: v_lshl_add_u32 v1, v0, s5, v1 +; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[6:7], 0x0 +; GFX90A-NOTTGSPLIT-NEXT: s_mov_b32 s6, 0x3ff +; GFX90A-NOTTGSPLIT-NEXT: v_and_b32_e64 v0, v0, s6 +; GFX90A-NOTTGSPLIT-NEXT: v_lshl_add_u32 v1, v0, 2, s5 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s4 ; GFX90A-NOTTGSPLIT-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen glc slc @@ -776,15 +760,13 @@ define amdgpu_kernel void @private_nontemporal_store_1( ; GFX90A-TGSPLIT: ; %bb.0: ; %entry ; GFX90A-TGSPLIT-NEXT: s_add_u32 s0, s0, s17 ; GFX90A-TGSPLIT-NEXT: s_addc_u32 s1, s1, 0 -; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 -; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 +; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 +; GFX90A-TGSPLIT-NEXT: s_load_dword s5, s[8:9], 0x8 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[4:5], 0x0 -; GFX90A-TGSPLIT-NEXT: s_mov_b32 s5, 0x3ff -; GFX90A-TGSPLIT-NEXT: v_and_b32_e64 v0, v0, s5 -; GFX90A-TGSPLIT-NEXT: s_mov_b32 s5, 2 -; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 -; GFX90A-TGSPLIT-NEXT: v_lshl_add_u32 v1, v0, s5, v1 +; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[6:7], 0x0 +; GFX90A-TGSPLIT-NEXT: s_mov_b32 s6, 0x3ff +; GFX90A-TGSPLIT-NEXT: v_and_b32_e64 v0, v0, s6 +; GFX90A-TGSPLIT-NEXT: v_lshl_add_u32 v1, v0, 2, s5 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s4 ; GFX90A-TGSPLIT-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen glc slc @@ -792,15 +774,13 @@ define amdgpu_kernel void @private_nontemporal_store_1( ; ; GFX942-NOTTGSPLIT-LABEL: private_nontemporal_store_1: ; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 ; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[0:1], 0x0 -; GFX942-NOTTGSPLIT-NEXT: s_mov_b32 s1, 0x3ff -; GFX942-NOTTGSPLIT-NEXT: v_and_b32_e64 v0, v0, s1 -; GFX942-NOTTGSPLIT-NEXT: s_mov_b32 s1, 2 -; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX942-NOTTGSPLIT-NEXT: v_lshl_add_u32 v1, v0, s1, v1 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[2:3], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_mov_b32 s2, 0x3ff +; GFX942-NOTTGSPLIT-NEXT: v_and_b32_e64 v0, v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_lshl_add_u32 v1, v0, 2, s1 ; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX942-NOTTGSPLIT-NEXT: scratch_store_dword v1, v0, off nt @@ -808,15 +788,13 @@ define amdgpu_kernel void @private_nontemporal_store_1( ; ; GFX942-TGSPLIT-LABEL: private_nontemporal_store_1: ; GFX942-TGSPLIT: ; %bb.0: ; %entry -; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 ; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[0:1], 0x0 -; GFX942-TGSPLIT-NEXT: s_mov_b32 s1, 0x3ff -; GFX942-TGSPLIT-NEXT: v_and_b32_e64 v0, v0, s1 -; GFX942-TGSPLIT-NEXT: s_mov_b32 s1, 2 -; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX942-TGSPLIT-NEXT: v_lshl_add_u32 v1, v0, s1, v1 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[2:3], 0x0 +; GFX942-TGSPLIT-NEXT: s_mov_b32 s2, 0x3ff +; GFX942-TGSPLIT-NEXT: v_and_b32_e64 v0, v0, s2 +; GFX942-TGSPLIT-NEXT: v_lshl_add_u32 v1, v0, 2, s1 ; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX942-TGSPLIT-NEXT: scratch_store_dword v1, v0, off nt @@ -824,14 +802,13 @@ define amdgpu_kernel void @private_nontemporal_store_1( ; ; GFX11-WGP-LABEL: private_nontemporal_store_1: ; GFX11-WGP: ; %bb.0: ; %entry -; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 -; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 +; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 +; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-WGP-NEXT: s_load_b32 s0, s[0:1], 0x0 -; GFX11-WGP-NEXT: s_mov_b32 s1, 0x3ff -; GFX11-WGP-NEXT: v_and_b32_e64 v0, v0, s1 -; GFX11-WGP-NEXT: s_mov_b32 s1, 2 -; GFX11-WGP-NEXT: v_lshl_add_u32 v1, v0, s1, s2 +; GFX11-WGP-NEXT: s_load_b32 s0, s[2:3], 0x0 +; GFX11-WGP-NEXT: s_mov_b32 s2, 0x3ff +; GFX11-WGP-NEXT: v_and_b32_e64 v0, v0, s2 +; GFX11-WGP-NEXT: v_lshl_add_u32 v1, v0, 2, s1 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-WGP-NEXT: scratch_store_b32 v1, v0, off glc slc dlc @@ -839,14 +816,13 @@ define amdgpu_kernel void @private_nontemporal_store_1( ; ; GFX11-CU-LABEL: private_nontemporal_store_1: ; GFX11-CU: ; %bb.0: ; %entry -; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 -; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 +; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 +; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x8 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-CU-NEXT: s_load_b32 s0, s[0:1], 0x0 -; GFX11-CU-NEXT: s_mov_b32 s1, 0x3ff -; GFX11-CU-NEXT: v_and_b32_e64 v0, v0, s1 -; GFX11-CU-NEXT: s_mov_b32 s1, 2 -; GFX11-CU-NEXT: v_lshl_add_u32 v1, v0, s1, s2 +; GFX11-CU-NEXT: s_load_b32 s0, s[2:3], 0x0 +; GFX11-CU-NEXT: s_mov_b32 s2, 0x3ff +; GFX11-CU-NEXT: v_and_b32_e64 v0, v0, s2 +; GFX11-CU-NEXT: v_lshl_add_u32 v1, v0, 2, s1 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: scratch_store_b32 v1, v0, off glc slc dlc diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-volatile.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-volatile.ll index 7faa0621aa6d0..7c23b76cec3e9 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-volatile.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-volatile.ll @@ -228,12 +228,11 @@ define amdgpu_kernel void @private_volatile_load_1( ; GFX10-WGP-NEXT: s_add_u32 s0, s0, s17 ; GFX10-WGP-NEXT: s_addc_u32 s1, s1, 0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, v0 -; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x0 +; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 -; GFX10-WGP-NEXT: s_mov_b32 s6, 2 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-WGP-NEXT: v_lshl_add_u32 v1, v1, s6, s7 +; GFX10-WGP-NEXT: v_lshl_add_u32 v1, v1, 2, s6 ; GFX10-WGP-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen glc dlc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] @@ -244,12 +243,11 @@ define amdgpu_kernel void @private_volatile_load_1( ; GFX10-CU-NEXT: s_add_u32 s0, s0, s17 ; GFX10-CU-NEXT: s_addc_u32 s1, s1, 0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, v0 -; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x0 +; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 -; GFX10-CU-NEXT: s_mov_b32 s6, 2 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-CU-NEXT: v_lshl_add_u32 v1, v1, s6, s7 +; GFX10-CU-NEXT: v_lshl_add_u32 v1, v1, 2, s6 ; GFX10-CU-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen glc dlc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] @@ -286,14 +284,13 @@ define amdgpu_kernel void @private_volatile_load_1( ; GFX11-WGP-LABEL: private_volatile_load_1: ; GFX11-WGP: ; %bb.0: ; %entry ; GFX11-WGP-NEXT: v_mov_b32_e32 v1, v0 -; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x0 +; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-WGP-NEXT: s_mov_b32 s2, 0x3ff -; GFX11-WGP-NEXT: v_and_b32_e64 v1, v1, s2 -; GFX11-WGP-NEXT: s_mov_b32 s2, 2 +; GFX11-WGP-NEXT: s_mov_b32 s3, 0x3ff +; GFX11-WGP-NEXT: v_and_b32_e64 v1, v1, s3 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-WGP-NEXT: v_lshl_add_u32 v1, v1, s2, s3 +; GFX11-WGP-NEXT: v_lshl_add_u32 v1, v1, 2, s2 ; GFX11-WGP-NEXT: scratch_load_b32 v1, v1, off glc dlc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -302,14 +299,13 @@ define amdgpu_kernel void @private_volatile_load_1( ; GFX11-CU-LABEL: private_volatile_load_1: ; GFX11-CU: ; %bb.0: ; %entry ; GFX11-CU-NEXT: v_mov_b32_e32 v1, v0 -; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x0 +; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-CU-NEXT: s_mov_b32 s2, 0x3ff -; GFX11-CU-NEXT: v_and_b32_e64 v1, v1, s2 -; GFX11-CU-NEXT: s_mov_b32 s2, 2 +; GFX11-CU-NEXT: s_mov_b32 s3, 0x3ff +; GFX11-CU-NEXT: v_and_b32_e64 v1, v1, s3 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-CU-NEXT: v_lshl_add_u32 v1, v1, s2, s3 +; GFX11-CU-NEXT: v_lshl_add_u32 v1, v1, 2, s2 ; GFX11-CU-NEXT: scratch_load_b32 v1, v1, off glc dlc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -578,12 +574,11 @@ define amdgpu_kernel void @private_volatile_store_1( ; GFX10-WGP: ; %bb.0: ; %entry ; GFX10-WGP-NEXT: s_add_u32 s0, s0, s17 ; GFX10-WGP-NEXT: s_addc_u32 s1, s1, 0 -; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 -; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 +; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 +; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x8 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-WGP-NEXT: s_load_dword s4, s[4:5], 0x0 -; GFX10-WGP-NEXT: s_mov_b32 s5, 2 -; GFX10-WGP-NEXT: v_lshl_add_u32 v1, v0, s5, s6 +; GFX10-WGP-NEXT: s_load_dword s4, s[6:7], 0x0 +; GFX10-WGP-NEXT: v_lshl_add_u32 v1, v0, 2, s5 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-WGP-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen @@ -594,12 +589,11 @@ define amdgpu_kernel void @private_volatile_store_1( ; GFX10-CU: ; %bb.0: ; %entry ; GFX10-CU-NEXT: s_add_u32 s0, s0, s17 ; GFX10-CU-NEXT: s_addc_u32 s1, s1, 0 -; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 -; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 +; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 +; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x8 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-CU-NEXT: s_load_dword s4, s[4:5], 0x0 -; GFX10-CU-NEXT: s_mov_b32 s5, 2 -; GFX10-CU-NEXT: v_lshl_add_u32 v1, v0, s5, s6 +; GFX10-CU-NEXT: s_load_dword s4, s[6:7], 0x0 +; GFX10-CU-NEXT: v_lshl_add_u32 v1, v0, 2, s5 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-CU-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen @@ -629,14 +623,13 @@ define amdgpu_kernel void @private_volatile_store_1( ; ; GFX11-WGP-LABEL: private_volatile_store_1: ; GFX11-WGP: ; %bb.0: ; %entry -; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 -; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 +; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 +; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-WGP-NEXT: s_load_b32 s0, s[0:1], 0x0 -; GFX11-WGP-NEXT: s_mov_b32 s1, 0x3ff -; GFX11-WGP-NEXT: v_and_b32_e64 v0, v0, s1 -; GFX11-WGP-NEXT: s_mov_b32 s1, 2 -; GFX11-WGP-NEXT: v_lshl_add_u32 v1, v0, s1, s2 +; GFX11-WGP-NEXT: s_load_b32 s0, s[2:3], 0x0 +; GFX11-WGP-NEXT: s_mov_b32 s2, 0x3ff +; GFX11-WGP-NEXT: v_and_b32_e64 v0, v0, s2 +; GFX11-WGP-NEXT: v_lshl_add_u32 v1, v0, 2, s1 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-WGP-NEXT: scratch_store_b32 v1, v0, off dlc @@ -645,14 +638,13 @@ define amdgpu_kernel void @private_volatile_store_1( ; ; GFX11-CU-LABEL: private_volatile_store_1: ; GFX11-CU: ; %bb.0: ; %entry -; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 -; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 +; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 +; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x8 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-CU-NEXT: s_load_b32 s0, s[0:1], 0x0 -; GFX11-CU-NEXT: s_mov_b32 s1, 0x3ff -; GFX11-CU-NEXT: v_and_b32_e64 v0, v0, s1 -; GFX11-CU-NEXT: s_mov_b32 s1, 2 -; GFX11-CU-NEXT: v_lshl_add_u32 v1, v0, s1, s2 +; GFX11-CU-NEXT: s_load_b32 s0, s[2:3], 0x0 +; GFX11-CU-NEXT: s_mov_b32 s2, 0x3ff +; GFX11-CU-NEXT: v_and_b32_e64 v0, v0, s2 +; GFX11-CU-NEXT: v_lshl_add_u32 v1, v0, 2, s1 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-CU-NEXT: scratch_store_b32 v1, v0, off dlc diff --git a/llvm/test/CodeGen/AMDGPU/move-to-valu-lshl_add.ll b/llvm/test/CodeGen/AMDGPU/move-to-valu-lshl_add.ll new file mode 100644 index 0000000000000..b7e6ed26876c4 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/move-to-valu-lshl_add.ll @@ -0,0 +1,127 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s + +define amdgpu_kernel void @lshl1_add(ptr addrspace(1) %in, ptr addrspace(7) %in2) { +; CHECK-LABEL: lshl1_add: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_clause 0x1 +; CHECK-NEXT: s_load_b64 s[6:7], s[4:5], 0x24 +; CHECK-NEXT: s_load_b128 s[0:3], s[4:5], 0x44 +; CHECK-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-NEXT: s_load_b32 s5, s[4:5], 0x54 +; CHECK-NEXT: s_mov_b32 s4, 0 +; CHECK-NEXT: s_wait_kmcnt 0x0 +; CHECK-NEXT: global_load_b32 v1, v0, s[6:7] scope:SCOPE_SYS +; CHECK-NEXT: s_wait_loadcnt 0x0 +; CHECK-NEXT: s_mov_b32 s6, s3 +; CHECK-NEXT: s_mov_b32 s7, s4 +; CHECK-NEXT: s_mov_b32 s3, s4 +; CHECK-NEXT: s_or_b64 s[6:7], s[6:7], s[4:5] +; CHECK-NEXT: s_mov_b32 s5, s2 +; CHECK-NEXT: s_mov_b32 s2, s1 +; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-NEXT: s_or_b64 s[4:5], s[2:3], s[4:5] +; CHECK-NEXT: v_lshl_add_u32 v1, v1, 1, s0 +; CHECK-NEXT: buffer_store_b16 v0, v1, s[4:7], null offen +; CHECK-NEXT: s_endpgm + %vaddr = load volatile i32, ptr addrspace(1) %in, align 4 + %1 = sext i32 %vaddr to i64 + %gep = getelementptr i16, ptr addrspace(7) %in2, i64 %1 + store i16 0, ptr addrspace(7) %gep, align 2 + ret void +} + +define amdgpu_kernel void @lshl2_add(ptr addrspace(1) %in, ptr addrspace(7) %in2) { +; CHECK-LABEL: lshl2_add: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_clause 0x1 +; CHECK-NEXT: s_load_b64 s[6:7], s[4:5], 0x24 +; CHECK-NEXT: s_load_b128 s[0:3], s[4:5], 0x44 +; CHECK-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-NEXT: s_load_b32 s5, s[4:5], 0x54 +; CHECK-NEXT: s_mov_b32 s4, 0 +; CHECK-NEXT: s_wait_kmcnt 0x0 +; CHECK-NEXT: global_load_b32 v1, v0, s[6:7] scope:SCOPE_SYS +; CHECK-NEXT: s_wait_loadcnt 0x0 +; CHECK-NEXT: s_mov_b32 s6, s3 +; CHECK-NEXT: s_mov_b32 s7, s4 +; CHECK-NEXT: s_mov_b32 s3, s4 +; CHECK-NEXT: s_or_b64 s[6:7], s[6:7], s[4:5] +; CHECK-NEXT: s_mov_b32 s5, s2 +; CHECK-NEXT: s_mov_b32 s2, s1 +; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-NEXT: s_or_b64 s[4:5], s[2:3], s[4:5] +; CHECK-NEXT: v_lshl_add_u32 v1, v1, 2, s0 +; CHECK-NEXT: buffer_store_b32 v0, v1, s[4:7], null offen +; CHECK-NEXT: s_endpgm + %vaddr = load volatile i32, ptr addrspace(1) %in, align 4 + %1 = sext i32 %vaddr to i64 + %gep = getelementptr i32, ptr addrspace(7) %in2, i64 %1 + store i32 0, ptr addrspace(7) %gep, align 4 + ret void +} + +define amdgpu_kernel void @lshl3_add(ptr addrspace(1) %in, ptr addrspace(7) %in2) { +; CHECK-LABEL: lshl3_add: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_clause 0x1 +; CHECK-NEXT: s_load_b64 s[6:7], s[4:5], 0x24 +; CHECK-NEXT: s_load_b128 s[0:3], s[4:5], 0x44 +; CHECK-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-NEXT: s_load_b32 s5, s[4:5], 0x54 +; CHECK-NEXT: s_mov_b32 s4, 0 +; CHECK-NEXT: s_wait_kmcnt 0x0 +; CHECK-NEXT: global_load_b32 v2, v0, s[6:7] scope:SCOPE_SYS +; CHECK-NEXT: s_wait_loadcnt 0x0 +; CHECK-NEXT: s_mov_b32 s6, s3 +; CHECK-NEXT: s_mov_b32 s7, s4 +; CHECK-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-NEXT: v_mov_b32_e32 v1, 0 +; CHECK-NEXT: s_or_b64 s[6:7], s[6:7], s[4:5] +; CHECK-NEXT: s_mov_b32 s5, s2 +; CHECK-NEXT: s_mov_b32 s2, s1 +; CHECK-NEXT: s_mov_b32 s3, s4 +; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-NEXT: s_or_b64 s[4:5], s[2:3], s[4:5] +; CHECK-NEXT: v_lshl_add_u32 v2, v2, 3, s0 +; CHECK-NEXT: buffer_store_b64 v[0:1], v2, s[4:7], null offen +; CHECK-NEXT: s_endpgm + %vaddr = load volatile i32, ptr addrspace(1) %in, align 4 + %1 = sext i32 %vaddr to i64 + %gep = getelementptr i64, ptr addrspace(7) %in2, i64 %1 + store i64 0, ptr addrspace(7) %gep, align 8 + ret void +} + +define amdgpu_kernel void @lshl4_add(ptr addrspace(1) %in, ptr addrspace(7) %in2) { +; CHECK-LABEL: lshl4_add: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_clause 0x1 +; CHECK-NEXT: s_load_b64 s[6:7], s[4:5], 0x24 +; CHECK-NEXT: s_load_b128 s[0:3], s[4:5], 0x44 +; CHECK-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-NEXT: s_load_b32 s5, s[4:5], 0x54 +; CHECK-NEXT: s_mov_b32 s4, 0 +; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; CHECK-NEXT: s_mov_b32 s9, s4 +; CHECK-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v2, v0 +; CHECK-NEXT: s_wait_kmcnt 0x0 +; CHECK-NEXT: global_load_b32 v3, v0, s[6:7] scope:SCOPE_SYS +; CHECK-NEXT: s_wait_loadcnt 0x0 +; CHECK-NEXT: s_mov_b32 s7, s4 +; CHECK-NEXT: s_mov_b32 s6, s3 +; CHECK-NEXT: s_mov_b32 s8, s1 +; CHECK-NEXT: s_or_b64 s[6:7], s[6:7], s[4:5] +; CHECK-NEXT: s_mov_b32 s5, s2 +; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-NEXT: s_or_b64 s[4:5], s[8:9], s[4:5] +; CHECK-NEXT: v_lshl_add_u32 v4, v3, 4, s0 +; CHECK-NEXT: v_mov_b32_e32 v3, v0 +; CHECK-NEXT: buffer_store_b128 v[0:3], v4, s[4:7], null offen +; CHECK-NEXT: s_endpgm + %vaddr = load volatile i32, ptr addrspace(1) %in, align 4 + %1 = sext i32 %vaddr to i64 + %gep = getelementptr i128, ptr addrspace(7) %in2, i64 %1 + store i128 0, ptr addrspace(7) %gep, align 16 + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-offset-private.ll b/llvm/test/CodeGen/AMDGPU/mubuf-offset-private.ll index 6e2d0f6503a20..7e2bfa666a19f 100644 --- a/llvm/test/CodeGen/AMDGPU/mubuf-offset-private.ll +++ b/llvm/test/CodeGen/AMDGPU/mubuf-offset-private.ll @@ -144,7 +144,7 @@ define amdgpu_kernel void @store_private_offset_i8_max_offset_plus2() #0 { ; SICIVI: buffer_store_dword v{{[0-9]+}}, [[ADDR1]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen{{$}} ; GFX9: global_load_dword [[VADDR:v[0-9]+]], -; GFX9: v_lshlrev_b32_e32 [[ADDR:v[0-9]+]], 2, [[VADDR]] +; GFX9: v_lshl_add_u32 [[ADDR:v[0-9]+]], [[VADDR]], 2, s{{[0-9]+}} ; GFX9-NOT [[ADDR]] ; GFX9: buffer_store_dword v{{[0-9]+}}, [[ADDR]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen offset:32 define amdgpu_kernel void @store_private_unknown_bits_vaddr() #0 { diff --git a/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll b/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll index bac460949d579..a2c86bd09404a 100644 --- a/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll +++ b/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll @@ -33,11 +33,10 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache ; MUBUF-NEXT: s_mov_b32 s6, s32 ; MUBUF-NEXT: v_mov_b32_e32 v1, 0 ; MUBUF-NEXT: v_mov_b32_e32 v2, 1 -; MUBUF-NEXT: s_lshl_b32 s7, s10, 2 ; MUBUF-NEXT: s_add_i32 s32, s6, 0x1000 ; MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], s6 offset:4 -; MUBUF-NEXT: s_add_i32 s6, s6, s7 +; MUBUF-NEXT: s_lshl2_add_u32 s6, s10, s6 ; MUBUF-NEXT: v_mov_b32_e32 v2, s6 ; MUBUF-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen ; MUBUF-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 @@ -68,10 +67,9 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache ; FLATSCR-NEXT: s_mov_b32 s2, s32 ; FLATSCR-NEXT: v_mov_b32_e32 v1, 0 ; FLATSCR-NEXT: v_mov_b32_e32 v2, 1 -; FLATSCR-NEXT: s_lshl_b32 s3, s6, 2 ; FLATSCR-NEXT: s_add_i32 s32, s2, 0x1000 ; FLATSCR-NEXT: scratch_store_dwordx2 off, v[1:2], s2 -; FLATSCR-NEXT: s_add_i32 s2, s2, s3 +; FLATSCR-NEXT: s_lshl2_add_u32 s2, s6, s2 ; FLATSCR-NEXT: scratch_load_dword v2, off, s2 ; FLATSCR-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) @@ -132,12 +130,11 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache ; MUBUF-NEXT: ; %bb.1: ; %bb.0 ; MUBUF-NEXT: s_add_i32 s4, s32, 0xfff ; MUBUF-NEXT: s_and_b32 s4, s4, 0xfffff000 -; MUBUF-NEXT: s_lshl_b32 s5, s5, 2 ; MUBUF-NEXT: s_add_i32 s32, s4, 0x1000 ; MUBUF-NEXT: v_mov_b32_e32 v1, 0 ; MUBUF-NEXT: v_mov_b32_e32 v2, s4 ; MUBUF-NEXT: v_mov_b32_e32 v3, 1 -; MUBUF-NEXT: s_add_i32 s4, s4, s5 +; MUBUF-NEXT: s_lshl2_add_u32 s4, s5, s4 ; MUBUF-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen ; MUBUF-NEXT: buffer_store_dword v3, v2, s[0:3], 0 offen offset:4 ; MUBUF-NEXT: v_mov_b32_e32 v2, s4 @@ -168,10 +165,9 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache ; FLATSCR-NEXT: v_mov_b32_e32 v1, 0 ; FLATSCR-NEXT: s_and_b32 s0, s0, 0xfffff000 ; FLATSCR-NEXT: v_mov_b32_e32 v2, 1 -; FLATSCR-NEXT: s_lshl_b32 s1, s1, 2 ; FLATSCR-NEXT: s_add_i32 s32, s0, 0x1000 ; FLATSCR-NEXT: scratch_store_dwordx2 off, v[1:2], s0 -; FLATSCR-NEXT: s_add_i32 s0, s0, s1 +; FLATSCR-NEXT: s_lshl2_add_u32 s0, s1, s0 ; FLATSCR-NEXT: scratch_load_dword v2, off, s0 ; FLATSCR-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/shlN_add.ll b/llvm/test/CodeGen/AMDGPU/shlN_add.ll index 3e507a0c5889f..ba8ae9554d0e8 100644 --- a/llvm/test/CodeGen/AMDGPU/shlN_add.ll +++ b/llvm/test/CodeGen/AMDGPU/shlN_add.ll @@ -14,8 +14,7 @@ define amdgpu_ps i32 @s_shl1_add_u32(i32 inreg %src0, i32 inreg %src1) { ; GFX9-SDAG-LABEL: s_shl1_add_u32: ; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_lshl_b32 s0, s0, 1 -; GFX9-SDAG-NEXT: s_add_i32 s0, s0, s1 +; GFX9-SDAG-NEXT: s_lshl1_add_u32 s0, s0, s1 ; GFX9-SDAG-NEXT: ; return to shader part epilog ; ; GFX8-SDAG-LABEL: s_shl1_add_u32: @@ -26,8 +25,7 @@ define amdgpu_ps i32 @s_shl1_add_u32(i32 inreg %src0, i32 inreg %src1) { ; ; GFX10-SDAG-LABEL: s_shl1_add_u32: ; GFX10-SDAG: ; %bb.0: -; GFX10-SDAG-NEXT: s_lshl_b32 s0, s0, 1 -; GFX10-SDAG-NEXT: s_add_i32 s0, s0, s1 +; GFX10-SDAG-NEXT: s_lshl1_add_u32 s0, s0, s1 ; GFX10-SDAG-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_shl1_add_u32: @@ -53,8 +51,7 @@ define amdgpu_ps i32 @s_shl1_add_u32(i32 inreg %src0, i32 inreg %src1) { define amdgpu_ps i32 @s_shl2_add_u32(i32 inreg %src0, i32 inreg %src1) { ; GFX9-SDAG-LABEL: s_shl2_add_u32: ; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_lshl_b32 s0, s0, 2 -; GFX9-SDAG-NEXT: s_add_i32 s0, s0, s1 +; GFX9-SDAG-NEXT: s_lshl2_add_u32 s0, s0, s1 ; GFX9-SDAG-NEXT: ; return to shader part epilog ; ; GFX8-SDAG-LABEL: s_shl2_add_u32: @@ -65,8 +62,7 @@ define amdgpu_ps i32 @s_shl2_add_u32(i32 inreg %src0, i32 inreg %src1) { ; ; GFX10-SDAG-LABEL: s_shl2_add_u32: ; GFX10-SDAG: ; %bb.0: -; GFX10-SDAG-NEXT: s_lshl_b32 s0, s0, 2 -; GFX10-SDAG-NEXT: s_add_i32 s0, s0, s1 +; GFX10-SDAG-NEXT: s_lshl2_add_u32 s0, s0, s1 ; GFX10-SDAG-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_shl2_add_u32: @@ -92,8 +88,7 @@ define amdgpu_ps i32 @s_shl2_add_u32(i32 inreg %src0, i32 inreg %src1) { define amdgpu_ps i32 @s_shl3_add_u32(i32 inreg %src0, i32 inreg %src1) { ; GFX9-SDAG-LABEL: s_shl3_add_u32: ; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_lshl_b32 s0, s0, 3 -; GFX9-SDAG-NEXT: s_add_i32 s0, s0, s1 +; GFX9-SDAG-NEXT: s_lshl3_add_u32 s0, s0, s1 ; GFX9-SDAG-NEXT: ; return to shader part epilog ; ; GFX8-SDAG-LABEL: s_shl3_add_u32: @@ -104,8 +99,7 @@ define amdgpu_ps i32 @s_shl3_add_u32(i32 inreg %src0, i32 inreg %src1) { ; ; GFX10-SDAG-LABEL: s_shl3_add_u32: ; GFX10-SDAG: ; %bb.0: -; GFX10-SDAG-NEXT: s_lshl_b32 s0, s0, 3 -; GFX10-SDAG-NEXT: s_add_i32 s0, s0, s1 +; GFX10-SDAG-NEXT: s_lshl3_add_u32 s0, s0, s1 ; GFX10-SDAG-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_shl3_add_u32: @@ -131,8 +125,7 @@ define amdgpu_ps i32 @s_shl3_add_u32(i32 inreg %src0, i32 inreg %src1) { define amdgpu_ps i32 @s_shl4_add_u32(i32 inreg %src0, i32 inreg %src1) { ; GFX9-SDAG-LABEL: s_shl4_add_u32: ; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_lshl_b32 s0, s0, 4 -; GFX9-SDAG-NEXT: s_add_i32 s0, s0, s1 +; GFX9-SDAG-NEXT: s_lshl4_add_u32 s0, s0, s1 ; GFX9-SDAG-NEXT: ; return to shader part epilog ; ; GFX8-SDAG-LABEL: s_shl4_add_u32: @@ -143,8 +136,7 @@ define amdgpu_ps i32 @s_shl4_add_u32(i32 inreg %src0, i32 inreg %src1) { ; ; GFX10-SDAG-LABEL: s_shl4_add_u32: ; GFX10-SDAG: ; %bb.0: -; GFX10-SDAG-NEXT: s_lshl_b32 s0, s0, 4 -; GFX10-SDAG-NEXT: s_add_i32 s0, s0, s1 +; GFX10-SDAG-NEXT: s_lshl4_add_u32 s0, s0, s1 ; GFX10-SDAG-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_shl4_add_u32: @@ -598,10 +590,8 @@ define amdgpu_ps float @shl5_add_u32_vgpr1(i32 inreg %src0, i32 %src1) { define amdgpu_ps <2 x i32> @s_shl1_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> inreg %src1) { ; GFX9-SDAG-LABEL: s_shl1_add_u32_v2: ; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_lshl_b32 s0, s0, 1 -; GFX9-SDAG-NEXT: s_lshl_b32 s1, s1, 1 -; GFX9-SDAG-NEXT: s_add_i32 s1, s1, s3 -; GFX9-SDAG-NEXT: s_add_i32 s0, s0, s2 +; GFX9-SDAG-NEXT: s_lshl1_add_u32 s1, s1, s3 +; GFX9-SDAG-NEXT: s_lshl1_add_u32 s0, s0, s2 ; GFX9-SDAG-NEXT: ; return to shader part epilog ; ; GFX8-SDAG-LABEL: s_shl1_add_u32_v2: @@ -614,10 +604,8 @@ define amdgpu_ps <2 x i32> @s_shl1_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> i ; ; GFX10-SDAG-LABEL: s_shl1_add_u32_v2: ; GFX10-SDAG: ; %bb.0: -; GFX10-SDAG-NEXT: s_lshl_b32 s0, s0, 1 -; GFX10-SDAG-NEXT: s_lshl_b32 s1, s1, 1 -; GFX10-SDAG-NEXT: s_add_i32 s0, s0, s2 -; GFX10-SDAG-NEXT: s_add_i32 s1, s1, s3 +; GFX10-SDAG-NEXT: s_lshl1_add_u32 s0, s0, s2 +; GFX10-SDAG-NEXT: s_lshl1_add_u32 s1, s1, s3 ; GFX10-SDAG-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_shl1_add_u32_v2: @@ -647,10 +635,8 @@ define amdgpu_ps <2 x i32> @s_shl1_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> i define amdgpu_ps <2 x i32> @s_shl2_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> inreg %src1) { ; GFX9-SDAG-LABEL: s_shl2_add_u32_v2: ; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_lshl_b32 s0, s0, 2 -; GFX9-SDAG-NEXT: s_lshl_b32 s1, s1, 2 -; GFX9-SDAG-NEXT: s_add_i32 s1, s1, s3 -; GFX9-SDAG-NEXT: s_add_i32 s0, s0, s2 +; GFX9-SDAG-NEXT: s_lshl2_add_u32 s1, s1, s3 +; GFX9-SDAG-NEXT: s_lshl2_add_u32 s0, s0, s2 ; GFX9-SDAG-NEXT: ; return to shader part epilog ; ; GFX8-SDAG-LABEL: s_shl2_add_u32_v2: @@ -663,10 +649,8 @@ define amdgpu_ps <2 x i32> @s_shl2_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> i ; ; GFX10-SDAG-LABEL: s_shl2_add_u32_v2: ; GFX10-SDAG: ; %bb.0: -; GFX10-SDAG-NEXT: s_lshl_b32 s0, s0, 2 -; GFX10-SDAG-NEXT: s_lshl_b32 s1, s1, 2 -; GFX10-SDAG-NEXT: s_add_i32 s0, s0, s2 -; GFX10-SDAG-NEXT: s_add_i32 s1, s1, s3 +; GFX10-SDAG-NEXT: s_lshl2_add_u32 s0, s0, s2 +; GFX10-SDAG-NEXT: s_lshl2_add_u32 s1, s1, s3 ; GFX10-SDAG-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_shl2_add_u32_v2: @@ -696,10 +680,8 @@ define amdgpu_ps <2 x i32> @s_shl2_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> i define amdgpu_ps <2 x i32> @s_shl3_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> inreg %src1) { ; GFX9-SDAG-LABEL: s_shl3_add_u32_v2: ; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_lshl_b32 s0, s0, 3 -; GFX9-SDAG-NEXT: s_lshl_b32 s1, s1, 3 -; GFX9-SDAG-NEXT: s_add_i32 s1, s1, s3 -; GFX9-SDAG-NEXT: s_add_i32 s0, s0, s2 +; GFX9-SDAG-NEXT: s_lshl3_add_u32 s1, s1, s3 +; GFX9-SDAG-NEXT: s_lshl3_add_u32 s0, s0, s2 ; GFX9-SDAG-NEXT: ; return to shader part epilog ; ; GFX8-SDAG-LABEL: s_shl3_add_u32_v2: @@ -712,10 +694,8 @@ define amdgpu_ps <2 x i32> @s_shl3_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> i ; ; GFX10-SDAG-LABEL: s_shl3_add_u32_v2: ; GFX10-SDAG: ; %bb.0: -; GFX10-SDAG-NEXT: s_lshl_b32 s0, s0, 3 -; GFX10-SDAG-NEXT: s_lshl_b32 s1, s1, 3 -; GFX10-SDAG-NEXT: s_add_i32 s0, s0, s2 -; GFX10-SDAG-NEXT: s_add_i32 s1, s1, s3 +; GFX10-SDAG-NEXT: s_lshl3_add_u32 s0, s0, s2 +; GFX10-SDAG-NEXT: s_lshl3_add_u32 s1, s1, s3 ; GFX10-SDAG-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_shl3_add_u32_v2: @@ -745,10 +725,8 @@ define amdgpu_ps <2 x i32> @s_shl3_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> i define amdgpu_ps <2 x i32> @s_shl4_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> inreg %src1) { ; GFX9-SDAG-LABEL: s_shl4_add_u32_v2: ; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_lshl_b32 s0, s0, 4 -; GFX9-SDAG-NEXT: s_lshl_b32 s1, s1, 4 -; GFX9-SDAG-NEXT: s_add_i32 s1, s1, s3 -; GFX9-SDAG-NEXT: s_add_i32 s0, s0, s2 +; GFX9-SDAG-NEXT: s_lshl4_add_u32 s1, s1, s3 +; GFX9-SDAG-NEXT: s_lshl4_add_u32 s0, s0, s2 ; GFX9-SDAG-NEXT: ; return to shader part epilog ; ; GFX8-SDAG-LABEL: s_shl4_add_u32_v2: @@ -761,10 +739,8 @@ define amdgpu_ps <2 x i32> @s_shl4_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> i ; ; GFX10-SDAG-LABEL: s_shl4_add_u32_v2: ; GFX10-SDAG: ; %bb.0: -; GFX10-SDAG-NEXT: s_lshl_b32 s0, s0, 4 -; GFX10-SDAG-NEXT: s_lshl_b32 s1, s1, 4 -; GFX10-SDAG-NEXT: s_add_i32 s0, s0, s2 -; GFX10-SDAG-NEXT: s_add_i32 s1, s1, s3 +; GFX10-SDAG-NEXT: s_lshl4_add_u32 s0, s0, s2 +; GFX10-SDAG-NEXT: s_lshl4_add_u32 s1, s1, s3 ; GFX10-SDAG-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_shl4_add_u32_v2: @@ -794,10 +770,8 @@ define amdgpu_ps <2 x i32> @s_shl4_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> i define amdgpu_ps <2 x i32> @s_shl_2_4_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> inreg %src1) { ; GFX9-SDAG-LABEL: s_shl_2_4_add_u32_v2: ; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_lshl_b32 s0, s0, 2 -; GFX9-SDAG-NEXT: s_lshl_b32 s1, s1, 4 -; GFX9-SDAG-NEXT: s_add_i32 s1, s1, s3 -; GFX9-SDAG-NEXT: s_add_i32 s0, s0, s2 +; GFX9-SDAG-NEXT: s_lshl4_add_u32 s1, s1, s3 +; GFX9-SDAG-NEXT: s_lshl2_add_u32 s0, s0, s2 ; GFX9-SDAG-NEXT: ; return to shader part epilog ; ; GFX8-SDAG-LABEL: s_shl_2_4_add_u32_v2: @@ -810,10 +784,8 @@ define amdgpu_ps <2 x i32> @s_shl_2_4_add_u32_v2(<2 x i32> inreg %src0, <2 x i32 ; ; GFX10-SDAG-LABEL: s_shl_2_4_add_u32_v2: ; GFX10-SDAG: ; %bb.0: -; GFX10-SDAG-NEXT: s_lshl_b32 s0, s0, 2 -; GFX10-SDAG-NEXT: s_lshl_b32 s1, s1, 4 -; GFX10-SDAG-NEXT: s_add_i32 s0, s0, s2 -; GFX10-SDAG-NEXT: s_add_i32 s1, s1, s3 +; GFX10-SDAG-NEXT: s_lshl2_add_u32 s0, s0, s2 +; GFX10-SDAG-NEXT: s_lshl4_add_u32 s1, s1, s3 ; GFX10-SDAG-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_shl_2_4_add_u32_v2: diff --git a/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll b/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll index 5aafb0f576fb4..90304b2c730cb 100644 --- a/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll +++ b/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll @@ -69,6 +69,7 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x ; CHECK-NEXT: [[S_ADD_U32_3:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc ; CHECK-NEXT: [[S_ASHR_I32_3:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 undef %169:sreg_32, 31, implicit-def dead $scc ; CHECK-NEXT: undef [[S_ADD_U32_4:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY6]], undef %169:sreg_32, implicit-def $scc + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM4:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_3]], 64, 0 :: (invariant load (s128) from %ir.99, addrspace 4) ; CHECK-NEXT: [[S_ADD_U32_4:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_3]], implicit-def dead $scc, implicit $scc ; CHECK-NEXT: undef [[S_ADD_U32_5:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, [[S_LSHL_B32_]], implicit-def $scc ; CHECK-NEXT: [[S_ADD_U32_5:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc @@ -92,7 +93,6 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[S_MOV_B32_]], 16, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %357:sgpr_128, undef %358:sreg_32, 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %368:sgpr_128, 16, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM4:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_3]], 64, 0 :: (invariant load (s128) from %ir.99, addrspace 4) ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM5:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_4]], 64, 0 :: (invariant load (s128) from %ir.107, addrspace 4) ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM6:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_5]], 0, 0 :: (invariant load (s128) from %ir.112, addrspace 4) ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM7:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_6]], 0, 0 :: (invariant load (s128) from %ir.117, addrspace 4) @@ -101,6 +101,7 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %352:sgpr_128, [[S_ADD_I32_]], 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %363:sgpr_128, [[S_ADD_I32_1]], 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM3]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN4:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[S_ADD_I32_2:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM]], -98, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_3:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM1]], -114, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_4:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM2]], -130, implicit-def dead $scc @@ -113,10 +114,8 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x ; CHECK-NEXT: [[S_ADD_U32_14:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc ; CHECK-NEXT: undef [[S_ADD_U32_15:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY11]], [[S_LSHL_B32_2]], implicit-def $scc ; CHECK-NEXT: [[S_ADD_U32_15:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LSHL_B32_3:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY12]], 4, implicit-def dead $scc - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN4:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[S_ADD_I32_6:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_3]], 16, implicit-def dead $scc - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %384:sgpr_128, [[S_ADD_I32_6]], 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[S_LSHL4_ADD_U32_:%[0-9]+]]:sreg_32 = S_LSHL4_ADD_U32 [[COPY12]], 16, implicit-def dead $scc + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %383:sgpr_128, [[S_LSHL4_ADD_U32_]], 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN5:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM9:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_5]], 224, 0 :: (invariant load (s128) from %ir.129, addrspace 4) ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM10:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY7]], 224, 0 :: (invariant load (s128) from %ir.145, addrspace 4) @@ -127,25 +126,25 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM14:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_8]], 224, 0 :: (invariant load (s128) from %ir.140, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN7:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN8:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[S_ADD_I32_7:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM4]], -217, implicit-def dead $scc - ; CHECK-NEXT: [[S_ADD_I32_8:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -233, implicit-def dead $scc - ; CHECK-NEXT: [[S_ADD_I32_9:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM5]], -249, implicit-def dead $scc - ; CHECK-NEXT: [[S_ADD_I32_10:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM3]], -297, implicit-def dead $scc - ; CHECK-NEXT: [[S_ADD_I32_11:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -313, implicit-def dead $scc - ; CHECK-NEXT: [[S_ADD_I32_12:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -329, implicit-def dead $scc - ; CHECK-NEXT: [[S_ADD_I32_13:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -345, implicit-def dead $scc - ; CHECK-NEXT: [[S_ADD_I32_14:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM6]], -441, implicit-def dead $scc + ; CHECK-NEXT: [[S_ADD_I32_6:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM4]], -217, implicit-def dead $scc + ; CHECK-NEXT: [[S_ADD_I32_7:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -233, implicit-def dead $scc + ; CHECK-NEXT: [[S_ADD_I32_8:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM5]], -249, implicit-def dead $scc + ; CHECK-NEXT: [[S_ADD_I32_9:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM3]], -297, implicit-def dead $scc + ; CHECK-NEXT: [[S_ADD_I32_10:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -313, implicit-def dead $scc + ; CHECK-NEXT: [[S_ADD_I32_11:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -329, implicit-def dead $scc + ; CHECK-NEXT: [[S_ADD_I32_12:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -345, implicit-def dead $scc + ; CHECK-NEXT: [[S_ADD_I32_13:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM6]], -441, implicit-def dead $scc ; CHECK-NEXT: undef [[S_ADD_U32_16:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY2]], [[S_LSHL_B32_2]], implicit-def $scc ; CHECK-NEXT: [[S_ADD_U32_16:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %36:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LSHL_B32_4:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY13]], 4, implicit-def dead $scc + ; CHECK-NEXT: [[S_LSHL_B32_3:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY13]], 4, implicit-def dead $scc ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN9:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[S_ASHR_I32_4:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_4]], 31, implicit-def dead $scc - ; CHECK-NEXT: undef [[S_ADD_U32_17:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY2]], [[S_LSHL_B32_4]], implicit-def $scc + ; CHECK-NEXT: [[S_ASHR_I32_4:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_3]], 31, implicit-def dead $scc + ; CHECK-NEXT: undef [[S_ADD_U32_17:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY2]], [[S_LSHL_B32_3]], implicit-def $scc ; CHECK-NEXT: [[S_ADD_U32_17:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %36:sreg_32, [[S_ASHR_I32_4]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LSHL_B32_5:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY5]], 3, implicit-def dead $scc + ; CHECK-NEXT: [[S_LSHL_B32_4:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY5]], 3, implicit-def dead $scc ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN10:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM12]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[S_ASHR_I32_5:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_5]], 31, implicit-def dead $scc - ; CHECK-NEXT: undef [[S_ADD_U32_18:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY]].sub0, [[S_LSHL_B32_5]], implicit-def $scc + ; CHECK-NEXT: [[S_ASHR_I32_5:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_4]], 31, implicit-def dead $scc + ; CHECK-NEXT: undef [[S_ADD_U32_18:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY]].sub0, [[S_LSHL_B32_4]], implicit-def $scc ; CHECK-NEXT: [[S_ADD_U32_18:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %57:sreg_32, [[S_ASHR_I32_5]], implicit-def dead $scc, implicit $scc ; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[S_ADD_U32_18]], 168, 0 :: (invariant load (s32) from %ir.273, align 8, addrspace 4) ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM15:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_8]], 576, 0 :: (invariant load (s128) from %ir.157, addrspace 4) @@ -164,11 +163,11 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM17:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_10]], 0, 0 :: (invariant load (s128) from %ir.178, addrspace 4) ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM18:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_11]], 0, 0 :: (invariant load (s128) from %ir.183, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN16:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM16]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[S_LSHL_B32_6:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY4]], 3, implicit-def dead $scc + ; CHECK-NEXT: [[S_LSHL_B32_5:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY4]], 3, implicit-def dead $scc ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET1:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[S_ASHR_I32_6:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_6]], 31, implicit-def dead $scc - ; CHECK-NEXT: [[S_ADD_I32_15:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM4]], -467, implicit-def dead $scc - ; CHECK-NEXT: undef [[S_ADD_U32_19:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY]].sub0, [[S_LSHL_B32_6]], implicit-def $scc + ; CHECK-NEXT: [[S_ASHR_I32_6:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_5]], 31, implicit-def dead $scc + ; CHECK-NEXT: [[S_ADD_I32_14:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM4]], -467, implicit-def dead $scc + ; CHECK-NEXT: undef [[S_ADD_U32_19:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY]].sub0, [[S_LSHL_B32_5]], implicit-def $scc ; CHECK-NEXT: [[S_ADD_U32_19:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %57:sreg_32, [[S_ASHR_I32_6]], implicit-def dead $scc, implicit $scc ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[S_ADD_U32_19]], 168, 0 :: (invariant load (s64) from %ir.282, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET2:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM17]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) @@ -185,11 +184,11 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN17:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM19]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN18:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM20]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN19:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM21]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[S_LSHL_B32_7:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY3]], 3, implicit-def dead $scc + ; CHECK-NEXT: [[S_LSHL_B32_6:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY3]], 3, implicit-def dead $scc ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN20:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM22]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[S_ASHR_I32_7:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_7]], 31, implicit-def dead $scc - ; CHECK-NEXT: [[S_ADD_I32_16:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM5]], -468, implicit-def dead $scc - ; CHECK-NEXT: undef [[S_ADD_U32_20:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY]].sub0, [[S_LSHL_B32_7]], implicit-def $scc + ; CHECK-NEXT: [[S_ASHR_I32_7:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_6]], 31, implicit-def dead $scc + ; CHECK-NEXT: [[S_ADD_I32_15:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM5]], -468, implicit-def dead $scc + ; CHECK-NEXT: undef [[S_ADD_U32_20:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY]].sub0, [[S_LSHL_B32_6]], implicit-def $scc ; CHECK-NEXT: [[S_ADD_U32_20:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %57:sreg_32, [[S_ASHR_I32_7]], implicit-def dead $scc, implicit $scc ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM2:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[S_ADD_U32_20]], 168, 0 :: (invariant load (s64) from %ir.293, addrspace 4) ; CHECK-NEXT: [[COPY17:%[0-9]+]]:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM]] @@ -198,32 +197,32 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x ; CHECK-NEXT: [[COPY17:%[0-9]+]].sub1:sgpr_128 = COPY [[S_AND_B32_1]] ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY17]], 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM23:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_16]], 160, 0 :: (invariant load (s128) from %ir.256, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %470:sreg_64, 0, 0 :: (invariant load (s32) from `ptr addrspace(4) poison`, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %469:sreg_64, 0, 0 :: (invariant load (s32) from `ptr addrspace(4) poison`, addrspace 4) ; CHECK-NEXT: KILL [[S_ADD_U32_16]].sub0, [[S_ADD_U32_16]].sub1 - ; CHECK-NEXT: KILL undef %470:sreg_64 + ; CHECK-NEXT: KILL undef %469:sreg_64 ; CHECK-NEXT: KILL [[COPY17]].sub0_sub1_sub2, [[COPY17]].sub3 - ; CHECK-NEXT: [[S_LSHL_B32_8:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY14]], 3, implicit-def dead $scc + ; CHECK-NEXT: [[S_LSHL_B32_7:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY14]], 3, implicit-def dead $scc ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM24:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_17]], 160, 0 :: (invariant load (s128) from %ir.265, addrspace 4) - ; CHECK-NEXT: [[S_ASHR_I32_8:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_8]], 31, implicit-def dead $scc - ; CHECK-NEXT: [[S_ADD_I32_17:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM6]], -469, implicit-def dead $scc - ; CHECK-NEXT: undef [[S_ADD_U32_21:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY]].sub0, [[S_LSHL_B32_8]], implicit-def $scc + ; CHECK-NEXT: [[S_ASHR_I32_8:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_7]], 31, implicit-def dead $scc + ; CHECK-NEXT: [[S_ADD_I32_16:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM6]], -469, implicit-def dead $scc + ; CHECK-NEXT: undef [[S_ADD_U32_21:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY]].sub0, [[S_LSHL_B32_7]], implicit-def $scc ; CHECK-NEXT: [[S_ADD_U32_21:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %57:sreg_32, [[S_ASHR_I32_8]], implicit-def dead $scc, implicit $scc ; CHECK-NEXT: [[S_LOAD_DWORD_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[S_ADD_U32_21]], 168, 0 :: (invariant load (s32) from %ir.305, align 8, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN21:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM23]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN22:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM24]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM24]] ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM23]] + ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM24]] ; CHECK-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_LOAD_DWORD_IMM1]], 65535, implicit-def dead $scc ; CHECK-NEXT: [[COPY18:%[0-9]+]]:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM]] ; CHECK-NEXT: [[COPY18:%[0-9]+]].sub1:sgpr_128 = COPY [[S_AND_B32_2]] ; CHECK-NEXT: [[COPY18:%[0-9]+]].sub0:sgpr_128 = COPY [[S_LOAD_DWORD_IMM2]] ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM7:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY18]], 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[S_ADD_I32_18:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM]], -474, implicit-def dead $scc - ; CHECK-NEXT: [[S_ADD_I32_19:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -475, implicit-def dead $scc - ; CHECK-NEXT: [[S_ADD_I32_20:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -491, implicit-def dead $scc - ; CHECK-NEXT: [[S_ADD_I32_21:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -507, implicit-def dead $scc - ; CHECK-NEXT: [[S_ADD_I32_22:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -539, implicit-def dead $scc - ; CHECK-NEXT: [[S_ADD_I32_23:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM7]], -473, implicit-def dead $scc + ; CHECK-NEXT: [[S_ADD_I32_17:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM]], -474, implicit-def dead $scc + ; CHECK-NEXT: [[S_ADD_I32_18:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -475, implicit-def dead $scc + ; CHECK-NEXT: [[S_ADD_I32_19:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -491, implicit-def dead $scc + ; CHECK-NEXT: [[S_ADD_I32_20:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -507, implicit-def dead $scc + ; CHECK-NEXT: [[S_ADD_I32_21:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -539, implicit-def dead $scc + ; CHECK-NEXT: [[S_ADD_I32_22:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM7]], -473, implicit-def dead $scc ; CHECK-NEXT: undef [[S_ADD_U32_22:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY1]], [[S_LSHL_B32_]], implicit-def $scc ; CHECK-NEXT: [[S_ADD_U32_22:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %33:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM25:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_22]], 96, 0 :: (invariant load (s128) from %ir.323, addrspace 4) @@ -236,10 +235,10 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN23:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM25]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN24:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM26]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN25:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM27]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM26]] + ; CHECK-NEXT: KILL [[V_MOV_B32_e32_]] ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM27]] ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM25]] - ; CHECK-NEXT: KILL [[V_MOV_B32_e32_]] - ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM26]] ; CHECK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -2, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec ; CHECK-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -1, [[BUFFER_LOAD_FORMAT_X_IDXEN1]], 0, implicit $exec ; CHECK-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -3, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec @@ -310,15 +309,15 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x ; CHECK-NEXT: [[V_ADD_U32_e64_18:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -216, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec ; CHECK-NEXT: [[V_OR_B32_e64_36:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_35]], [[V_ADD_U32_e64_17]], implicit $exec ; CHECK-NEXT: [[V_OR_B32_e64_37:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_36]], [[V_ADD_U32_e64_18]], implicit $exec - ; CHECK-NEXT: [[V_OR_B32_e64_38:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_7]], [[V_OR_B32_e64_37]], implicit $exec - ; CHECK-NEXT: [[V_OR_B32_e64_39:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_8]], [[V_OR_B32_e64_38]], implicit $exec - ; CHECK-NEXT: [[V_OR_B32_e64_40:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_9]], [[V_OR_B32_e64_39]], implicit $exec - ; CHECK-NEXT: [[V_OR_B32_e64_41:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_10]], [[V_OR_B32_e64_40]], implicit $exec - ; CHECK-NEXT: [[V_OR_B32_e64_42:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_11]], [[V_OR_B32_e64_41]], implicit $exec - ; CHECK-NEXT: [[V_OR_B32_e64_43:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_12]], [[V_OR_B32_e64_42]], implicit $exec - ; CHECK-NEXT: [[V_OR_B32_e64_44:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_13]], [[V_OR_B32_e64_43]], implicit $exec + ; CHECK-NEXT: [[V_OR_B32_e64_38:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_6]], [[V_OR_B32_e64_37]], implicit $exec + ; CHECK-NEXT: [[V_OR_B32_e64_39:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_7]], [[V_OR_B32_e64_38]], implicit $exec + ; CHECK-NEXT: [[V_OR_B32_e64_40:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_8]], [[V_OR_B32_e64_39]], implicit $exec + ; CHECK-NEXT: [[V_OR_B32_e64_41:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_9]], [[V_OR_B32_e64_40]], implicit $exec + ; CHECK-NEXT: [[V_OR_B32_e64_42:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_10]], [[V_OR_B32_e64_41]], implicit $exec + ; CHECK-NEXT: [[V_OR_B32_e64_43:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_11]], [[V_OR_B32_e64_42]], implicit $exec + ; CHECK-NEXT: [[V_OR_B32_e64_44:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_12]], [[V_OR_B32_e64_43]], implicit $exec ; CHECK-NEXT: [[V_ADD_U32_e64_19:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -457, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec - ; CHECK-NEXT: [[V_OR_B32_e64_45:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_14]], [[V_OR_B32_e64_44]], implicit $exec + ; CHECK-NEXT: [[V_OR_B32_e64_45:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_13]], [[V_OR_B32_e64_44]], implicit $exec ; CHECK-NEXT: [[V_ADD_U32_e64_20:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -458, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec ; CHECK-NEXT: [[V_OR_B32_e64_46:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_45]], [[V_ADD_U32_e64_19]], implicit $exec ; CHECK-NEXT: [[V_ADD_U32_e64_21:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -459, [[BUFFER_LOAD_FORMAT_X_IDXEN21]], 0, implicit $exec @@ -326,15 +325,15 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x ; CHECK-NEXT: [[V_ADD_U32_e64_22:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -466, [[BUFFER_LOAD_FORMAT_X_IDXEN22]], 0, implicit $exec ; CHECK-NEXT: [[V_OR_B32_e64_48:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_47]], [[V_ADD_U32_e64_21]], implicit $exec ; CHECK-NEXT: [[V_OR_B32_e64_49:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_48]], [[V_ADD_U32_e64_22]], implicit $exec - ; CHECK-NEXT: [[V_OR_B32_e64_50:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_15]], [[V_OR_B32_e64_49]], implicit $exec - ; CHECK-NEXT: [[V_OR_B32_e64_51:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_16]], [[V_OR_B32_e64_50]], implicit $exec - ; CHECK-NEXT: [[V_OR_B32_e64_52:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_17]], [[V_OR_B32_e64_51]], implicit $exec - ; CHECK-NEXT: [[V_OR_B32_e64_53:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_23]], [[V_OR_B32_e64_52]], implicit $exec - ; CHECK-NEXT: [[V_OR_B32_e64_54:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_18]], [[V_OR_B32_e64_53]], implicit $exec - ; CHECK-NEXT: [[V_OR_B32_e64_55:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_19]], [[V_OR_B32_e64_54]], implicit $exec - ; CHECK-NEXT: [[V_OR_B32_e64_56:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_20]], [[V_OR_B32_e64_55]], implicit $exec - ; CHECK-NEXT: [[V_OR_B32_e64_57:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_21]], [[V_OR_B32_e64_56]], implicit $exec - ; CHECK-NEXT: [[V_OR_B32_e64_58:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_22]], [[V_OR_B32_e64_57]], implicit $exec + ; CHECK-NEXT: [[V_OR_B32_e64_50:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_14]], [[V_OR_B32_e64_49]], implicit $exec + ; CHECK-NEXT: [[V_OR_B32_e64_51:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_15]], [[V_OR_B32_e64_50]], implicit $exec + ; CHECK-NEXT: [[V_OR_B32_e64_52:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_16]], [[V_OR_B32_e64_51]], implicit $exec + ; CHECK-NEXT: [[V_OR_B32_e64_53:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_22]], [[V_OR_B32_e64_52]], implicit $exec + ; CHECK-NEXT: [[V_OR_B32_e64_54:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_17]], [[V_OR_B32_e64_53]], implicit $exec + ; CHECK-NEXT: [[V_OR_B32_e64_55:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_18]], [[V_OR_B32_e64_54]], implicit $exec + ; CHECK-NEXT: [[V_OR_B32_e64_56:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_19]], [[V_OR_B32_e64_55]], implicit $exec + ; CHECK-NEXT: [[V_OR_B32_e64_57:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_20]], [[V_OR_B32_e64_56]], implicit $exec + ; CHECK-NEXT: [[V_OR_B32_e64_58:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_21]], [[V_OR_B32_e64_57]], implicit $exec ; CHECK-NEXT: [[V_ADD_U32_e64_23:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -555, [[BUFFER_LOAD_FORMAT_X_IDXEN23]], 0, implicit $exec ; CHECK-NEXT: [[V_ADD_U32_e64_24:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -556, [[BUFFER_LOAD_FORMAT_X_IDXEN24]], 0, implicit $exec ; CHECK-NEXT: [[V_OR_B32_e64_59:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_58]], [[V_ADD_U32_e64_23]], implicit $exec @@ -351,13 +350,13 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x ; CHECK-NEXT: [[V_OR_B32_e64_64:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_63]], [[V_ADD_U32_e64_28]], implicit $exec ; CHECK-NEXT: [[V_ADD_U32_e64_30:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -593, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec ; CHECK-NEXT: [[V_OR_B32_e64_65:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_64]], [[V_ADD_U32_e64_29]], implicit $exec - ; CHECK-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM undef %543:sreg_64, 0, 0 :: (invariant load (s256) from `ptr addrspace(4) poison`, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM undef %542:sreg_64, 0, 0 :: (invariant load (s256) from `ptr addrspace(4) poison`, addrspace 4) ; CHECK-NEXT: [[V_OR_B32_e64_66:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_65]], [[V_ADD_U32_e64_30]], implicit $exec - ; CHECK-NEXT: [[S_ADD_I32_24:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM8]], -594, implicit-def dead $scc - ; CHECK-NEXT: [[V_OR_B32_e64_67:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_24]], [[V_OR_B32_e64_66]], implicit $exec + ; CHECK-NEXT: [[S_ADD_I32_23:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM8]], -594, implicit-def dead $scc + ; CHECK-NEXT: [[V_OR_B32_e64_67:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_23]], [[V_OR_B32_e64_66]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 0, [[V_OR_B32_e64_67]], implicit $exec ; CHECK-NEXT: undef [[V_CNDMASK_B32_e64_:%[0-9]+]].sub3:vreg_128 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[V_CMP_EQ_U32_e64_]], implicit $exec - ; CHECK-NEXT: IMAGE_STORE_V4_V2_nsa_gfx10 [[V_CNDMASK_B32_e64_]], undef %557:vgpr_32, undef %559:vgpr_32, [[S_LOAD_DWORDX8_IMM]], 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), addrspace 8) + ; CHECK-NEXT: IMAGE_STORE_V4_V2_nsa_gfx10 [[V_CNDMASK_B32_e64_]], undef %556:vgpr_32, undef %558:vgpr_32, [[S_LOAD_DWORDX8_IMM]], 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 .expVert: %0 = extractelement <31 x i32> %userData, i64 2 diff --git a/llvm/test/CodeGen/AMDGPU/wqm.ll b/llvm/test/CodeGen/AMDGPU/wqm.ll index 21f0c008366a9..0fdc1a83dddbd 100644 --- a/llvm/test/CodeGen/AMDGPU/wqm.ll +++ b/llvm/test/CodeGen/AMDGPU/wqm.ll @@ -2029,10 +2029,10 @@ define amdgpu_ps void @test_alloca(float %data, i32 %a, i32 %idx) nounwind { ; GFX9-W64-NEXT: s_and_b64 exec, exec, s[0:1] ; GFX9-W64-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX9-W64-NEXT: s_wqm_b64 exec, exec +; GFX9-W64-NEXT: s_mov_b32 s2, 0 ; GFX9-W64-NEXT: buffer_store_dword v1, off, s[8:11], 0 ; GFX9-W64-NEXT: s_waitcnt vmcnt(0) -; GFX9-W64-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-W64-NEXT: v_lshl_add_u32 v1, v2, 2, v1 +; GFX9-W64-NEXT: v_lshl_add_u32 v1, v2, 2, s2 ; GFX9-W64-NEXT: buffer_load_dword v1, v1, s[8:11], 0 offen ; GFX9-W64-NEXT: s_and_b64 exec, exec, s[0:1] ; GFX9-W64-NEXT: s_waitcnt vmcnt(0) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/constbarrier-rv32.ll b/llvm/test/CodeGen/RISCV/GlobalISel/constbarrier-rv32.ll index b24ea9ec1561e..3c617f9854761 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/constbarrier-rv32.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/constbarrier-rv32.ll @@ -32,9 +32,12 @@ define void @constant_fold_barrier_i128(ptr %p) { ; RV32-NEXT: mv a6, a1 ; RV32-NEXT: seqz a7, a1 ; RV32-NEXT: and a1, a7, a1 +; RV32-NEXT: mv a1, a1 ; RV32-NEXT: mv a7, a1 ; RV32-NEXT: seqz a3, a1 ; RV32-NEXT: and a1, a3, a1 +; RV32-NEXT: mv a1, a1 +; RV32-NEXT: mv a1, a1 ; RV32-NEXT: sw a2, 0(a0) ; RV32-NEXT: sw a6, 4(a0) ; RV32-NEXT: sw a7, 8(a0) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/div-by-constant.ll b/llvm/test/CodeGen/RISCV/GlobalISel/div-by-constant.ll index 225ceed9627b7..5f61ee2d02d24 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/div-by-constant.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/div-by-constant.ll @@ -103,15 +103,18 @@ define i64 @udiv64_constant_no_add(i64 %a) nounwind { ; RV32-NEXT: mulhu a1, a1, a2 ; RV32-NEXT: add a5, a5, a6 ; RV32-NEXT: mv t0, t1 +; RV32-NEXT: mv a1, a1 ; RV32-NEXT: sltu a4, a5, a6 ; RV32-NEXT: add a5, a5, a7 ; RV32-NEXT: sltu a6, t1, t1 ; RV32-NEXT: sltiu t1, t1, 0 ; RV32-NEXT: add t0, t0, t2 +; RV32-NEXT: mv a1, a1 ; RV32-NEXT: sltu a2, a5, a7 ; RV32-NEXT: add a6, a6, t1 ; RV32-NEXT: sltu a5, t0, t2 ; RV32-NEXT: add t0, t0, a0 +; RV32-NEXT: mv a1, a1 ; RV32-NEXT: add a2, a4, a2 ; RV32-NEXT: add a5, a6, a5 ; RV32-NEXT: sltu a0, t0, a0 @@ -155,6 +158,7 @@ define i64 @udiv64_constant_add(i64 %a) nounwind { ; RV32-NEXT: mulhu a7, a0, a2 ; RV32-NEXT: mulhu t2, a1, a3 ; RV32-NEXT: mv t1, t2 +; RV32-NEXT: mv t1, t1 ; RV32-NEXT: mul t2, a1, a3 ; RV32-NEXT: mulhu a2, a1, a2 ; RV32-NEXT: mulhu a3, a0, a3 diff --git a/llvm/test/CodeGen/RISCV/machine-copyprop-noop-removal.mir b/llvm/test/CodeGen/RISCV/machine-copyprop-noop-removal.mir index d739537b50d05..293b15bf9d25e 100644 --- a/llvm/test/CodeGen/RISCV/machine-copyprop-noop-removal.mir +++ b/llvm/test/CodeGen/RISCV/machine-copyprop-noop-removal.mir @@ -1,8 +1,11 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 # RUN: llc -o - %s -mtriple=riscv64 -run-pass=machine-cp -mcp-use-is-copy-instr | FileCheck %s -## This test was added to capture a case where MachineCopyPropagation risks -## leaving a no-op register move (add, x0, reg). +## This test was added to capture a case where MachineCopyPropagation may +## leave a no-op register move (add reg, x0, reg). +## Due to the bug reported in +## , we are not currently +## able to optimize this case. --- name: ham @@ -21,6 +24,7 @@ body: | ; CHECK-NEXT: liveins: $x10 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $x11 = ADDI $x0, 0 + ; CHECK-NEXT: renamable $x10 = ADDI killed renamable $x10, 0 ; CHECK-NEXT: BEQ renamable $x10, $x0, %bb.4 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: diff --git a/llvm/test/CodeGen/RISCV/sextw-removal.ll b/llvm/test/CodeGen/RISCV/sextw-removal.ll index b155feab9b4d9..9f326280885b5 100644 --- a/llvm/test/CodeGen/RISCV/sextw-removal.ll +++ b/llvm/test/CodeGen/RISCV/sextw-removal.ll @@ -1352,6 +1352,7 @@ define signext i32 @sextw_sh2add(i1 zeroext %0, ptr %1, i32 signext %2, i32 sign ; NOREMOVAL-LABEL: sextw_sh2add: ; NOREMOVAL: # %bb.0: ; NOREMOVAL-NEXT: sh2add a2, a2, a3 +; NOREMOVAL-NEXT: mv a2, a2 ; NOREMOVAL-NEXT: beqz a0, .LBB22_2 ; NOREMOVAL-NEXT: # %bb.1: ; NOREMOVAL-NEXT: sw a2, 0(a1) diff --git a/llvm/test/CodeGen/WebAssembly/simd-arith.ll b/llvm/test/CodeGen/WebAssembly/simd-arith.ll index 324a0c49fb413..d698fad745dfb 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-arith.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-arith.ll @@ -1992,6 +1992,389 @@ define <8 x i16> @avgr_u_v8i16_zext(<8 x i16> %x, <8 x i16> %y) { %c.trunc = trunc <8 x i32> %c to <8 x i16> ret <8 x i16> %c.trunc } +define void @avgr_undef_shuffle_lanes(ptr %res, <8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d) { +; SIMD128-LABEL: avgr_undef_shuffle_lanes: +; SIMD128: .functype avgr_undef_shuffle_lanes (i32, v128, v128, v128, v128) -> () +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i8x16.avgr_u $push1=, $1, $2 +; SIMD128-NEXT: i8x16.shuffle $push12=, $pop1, $4, 0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0 +; SIMD128-NEXT: local.tee $push11=, $2=, $pop12 +; SIMD128-NEXT: i8x16.avgr_u $push0=, $3, $4 +; SIMD128-NEXT: i8x16.shuffle $push10=, $pop0, $4, 0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0 +; SIMD128-NEXT: local.tee $push9=, $4=, $pop10 +; SIMD128-NEXT: i8x16.shuffle $push4=, $pop11, $pop9, 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 +; SIMD128-NEXT: v128.const $push8=, 255, 255, 255, 255, 255, 255, 255, 255 +; SIMD128-NEXT: local.tee $push7=, $3=, $pop8 +; SIMD128-NEXT: v128.and $push5=, $pop4, $pop7 +; SIMD128-NEXT: i8x16.shuffle $push2=, $2, $4, 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 +; SIMD128-NEXT: v128.and $push3=, $pop2, $3 +; SIMD128-NEXT: i8x16.narrow_i16x8_u $push6=, $pop5, $pop3 +; SIMD128-NEXT: v128.store 0($0):p2align=0, $pop6 +; SIMD128-NEXT: return +; +; SIMD128-FAST-LABEL: avgr_undef_shuffle_lanes: +; SIMD128-FAST: .functype avgr_undef_shuffle_lanes (i32, v128, v128, v128, v128) -> () +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i8x16.avgr_u $push1=, $1, $2 +; SIMD128-FAST-NEXT: i8x16.shuffle $push12=, $pop1, $4, 0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0 +; SIMD128-FAST-NEXT: local.tee $push11=, $2=, $pop12 +; SIMD128-FAST-NEXT: i8x16.avgr_u $push0=, $3, $4 +; SIMD128-FAST-NEXT: i8x16.shuffle $push10=, $pop0, $4, 0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0 +; SIMD128-FAST-NEXT: local.tee $push9=, $4=, $pop10 +; SIMD128-FAST-NEXT: i8x16.shuffle $push4=, $pop11, $pop9, 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 +; SIMD128-FAST-NEXT: v128.const $push8=, 255, 255, 255, 255, 255, 255, 255, 255 +; SIMD128-FAST-NEXT: local.tee $push7=, $3=, $pop8 +; SIMD128-FAST-NEXT: v128.and $push5=, $pop4, $pop7 +; SIMD128-FAST-NEXT: i8x16.shuffle $push2=, $2, $4, 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 +; SIMD128-FAST-NEXT: v128.and $push3=, $pop2, $3 +; SIMD128-FAST-NEXT: i8x16.narrow_i16x8_u $push6=, $pop5, $pop3 +; SIMD128-FAST-NEXT: v128.store 0($0):p2align=0, $pop6 +; SIMD128-FAST-NEXT: return +; +; NO-SIMD128-LABEL: avgr_undef_shuffle_lanes: +; NO-SIMD128: .functype avgr_undef_shuffle_lanes (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.const $push0=, 255 +; NO-SIMD128-NEXT: i32.and $push2=, $24, $pop0 +; NO-SIMD128-NEXT: i32.const $push143=, 255 +; NO-SIMD128-NEXT: i32.and $push1=, $32, $pop143 +; NO-SIMD128-NEXT: i32.add $push3=, $pop2, $pop1 +; NO-SIMD128-NEXT: i32.const $push4=, 1 +; NO-SIMD128-NEXT: i32.add $push5=, $pop3, $pop4 +; NO-SIMD128-NEXT: i32.const $push142=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push6=, $pop5, $pop142 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push141=, 255 +; NO-SIMD128-NEXT: i32.and $push8=, $8, $pop141 +; NO-SIMD128-NEXT: i32.const $push140=, 255 +; NO-SIMD128-NEXT: i32.and $push7=, $16, $pop140 +; NO-SIMD128-NEXT: i32.add $push9=, $pop8, $pop7 +; NO-SIMD128-NEXT: i32.const $push139=, 1 +; NO-SIMD128-NEXT: i32.add $push10=, $pop9, $pop139 +; NO-SIMD128-NEXT: i32.const $push138=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push11=, $pop10, $pop138 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop11 +; NO-SIMD128-NEXT: i32.const $push137=, 255 +; NO-SIMD128-NEXT: i32.and $push13=, $23, $pop137 +; NO-SIMD128-NEXT: i32.const $push136=, 255 +; NO-SIMD128-NEXT: i32.and $push12=, $31, $pop136 +; NO-SIMD128-NEXT: i32.add $push14=, $pop13, $pop12 +; NO-SIMD128-NEXT: i32.const $push135=, 1 +; NO-SIMD128-NEXT: i32.add $push15=, $pop14, $pop135 +; NO-SIMD128-NEXT: i32.const $push134=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push16=, $pop15, $pop134 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop16 +; NO-SIMD128-NEXT: i32.const $push133=, 255 +; NO-SIMD128-NEXT: i32.and $push18=, $7, $pop133 +; NO-SIMD128-NEXT: i32.const $push132=, 255 +; NO-SIMD128-NEXT: i32.and $push17=, $15, $pop132 +; NO-SIMD128-NEXT: i32.add $push19=, $pop18, $pop17 +; NO-SIMD128-NEXT: i32.const $push131=, 1 +; NO-SIMD128-NEXT: i32.add $push20=, $pop19, $pop131 +; NO-SIMD128-NEXT: i32.const $push130=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push21=, $pop20, $pop130 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop21 +; NO-SIMD128-NEXT: i32.const $push129=, 255 +; NO-SIMD128-NEXT: i32.and $push23=, $22, $pop129 +; NO-SIMD128-NEXT: i32.const $push128=, 255 +; NO-SIMD128-NEXT: i32.and $push22=, $30, $pop128 +; NO-SIMD128-NEXT: i32.add $push24=, $pop23, $pop22 +; NO-SIMD128-NEXT: i32.const $push127=, 1 +; NO-SIMD128-NEXT: i32.add $push25=, $pop24, $pop127 +; NO-SIMD128-NEXT: i32.const $push126=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push26=, $pop25, $pop126 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop26 +; NO-SIMD128-NEXT: i32.const $push125=, 255 +; NO-SIMD128-NEXT: i32.and $push28=, $6, $pop125 +; NO-SIMD128-NEXT: i32.const $push124=, 255 +; NO-SIMD128-NEXT: i32.and $push27=, $14, $pop124 +; NO-SIMD128-NEXT: i32.add $push29=, $pop28, $pop27 +; NO-SIMD128-NEXT: i32.const $push123=, 1 +; NO-SIMD128-NEXT: i32.add $push30=, $pop29, $pop123 +; NO-SIMD128-NEXT: i32.const $push122=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push31=, $pop30, $pop122 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop31 +; NO-SIMD128-NEXT: i32.const $push121=, 255 +; NO-SIMD128-NEXT: i32.and $push33=, $21, $pop121 +; NO-SIMD128-NEXT: i32.const $push120=, 255 +; NO-SIMD128-NEXT: i32.and $push32=, $29, $pop120 +; NO-SIMD128-NEXT: i32.add $push34=, $pop33, $pop32 +; NO-SIMD128-NEXT: i32.const $push119=, 1 +; NO-SIMD128-NEXT: i32.add $push35=, $pop34, $pop119 +; NO-SIMD128-NEXT: i32.const $push118=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push36=, $pop35, $pop118 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop36 +; NO-SIMD128-NEXT: i32.const $push117=, 255 +; NO-SIMD128-NEXT: i32.and $push38=, $5, $pop117 +; NO-SIMD128-NEXT: i32.const $push116=, 255 +; NO-SIMD128-NEXT: i32.and $push37=, $13, $pop116 +; NO-SIMD128-NEXT: i32.add $push39=, $pop38, $pop37 +; NO-SIMD128-NEXT: i32.const $push115=, 1 +; NO-SIMD128-NEXT: i32.add $push40=, $pop39, $pop115 +; NO-SIMD128-NEXT: i32.const $push114=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push41=, $pop40, $pop114 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop41 +; NO-SIMD128-NEXT: i32.const $push113=, 255 +; NO-SIMD128-NEXT: i32.and $push43=, $20, $pop113 +; NO-SIMD128-NEXT: i32.const $push112=, 255 +; NO-SIMD128-NEXT: i32.and $push42=, $28, $pop112 +; NO-SIMD128-NEXT: i32.add $push44=, $pop43, $pop42 +; NO-SIMD128-NEXT: i32.const $push111=, 1 +; NO-SIMD128-NEXT: i32.add $push45=, $pop44, $pop111 +; NO-SIMD128-NEXT: i32.const $push110=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push46=, $pop45, $pop110 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop46 +; NO-SIMD128-NEXT: i32.const $push109=, 255 +; NO-SIMD128-NEXT: i32.and $push48=, $4, $pop109 +; NO-SIMD128-NEXT: i32.const $push108=, 255 +; NO-SIMD128-NEXT: i32.and $push47=, $12, $pop108 +; NO-SIMD128-NEXT: i32.add $push49=, $pop48, $pop47 +; NO-SIMD128-NEXT: i32.const $push107=, 1 +; NO-SIMD128-NEXT: i32.add $push50=, $pop49, $pop107 +; NO-SIMD128-NEXT: i32.const $push106=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push51=, $pop50, $pop106 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop51 +; NO-SIMD128-NEXT: i32.const $push105=, 255 +; NO-SIMD128-NEXT: i32.and $push53=, $19, $pop105 +; NO-SIMD128-NEXT: i32.const $push104=, 255 +; NO-SIMD128-NEXT: i32.and $push52=, $27, $pop104 +; NO-SIMD128-NEXT: i32.add $push54=, $pop53, $pop52 +; NO-SIMD128-NEXT: i32.const $push103=, 1 +; NO-SIMD128-NEXT: i32.add $push55=, $pop54, $pop103 +; NO-SIMD128-NEXT: i32.const $push102=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push56=, $pop55, $pop102 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop56 +; NO-SIMD128-NEXT: i32.const $push101=, 255 +; NO-SIMD128-NEXT: i32.and $push58=, $3, $pop101 +; NO-SIMD128-NEXT: i32.const $push100=, 255 +; NO-SIMD128-NEXT: i32.and $push57=, $11, $pop100 +; NO-SIMD128-NEXT: i32.add $push59=, $pop58, $pop57 +; NO-SIMD128-NEXT: i32.const $push99=, 1 +; NO-SIMD128-NEXT: i32.add $push60=, $pop59, $pop99 +; NO-SIMD128-NEXT: i32.const $push98=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push61=, $pop60, $pop98 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop61 +; NO-SIMD128-NEXT: i32.const $push97=, 255 +; NO-SIMD128-NEXT: i32.and $push63=, $18, $pop97 +; NO-SIMD128-NEXT: i32.const $push96=, 255 +; NO-SIMD128-NEXT: i32.and $push62=, $26, $pop96 +; NO-SIMD128-NEXT: i32.add $push64=, $pop63, $pop62 +; NO-SIMD128-NEXT: i32.const $push95=, 1 +; NO-SIMD128-NEXT: i32.add $push65=, $pop64, $pop95 +; NO-SIMD128-NEXT: i32.const $push94=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push66=, $pop65, $pop94 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop66 +; NO-SIMD128-NEXT: i32.const $push93=, 255 +; NO-SIMD128-NEXT: i32.and $push68=, $2, $pop93 +; NO-SIMD128-NEXT: i32.const $push92=, 255 +; NO-SIMD128-NEXT: i32.and $push67=, $10, $pop92 +; NO-SIMD128-NEXT: i32.add $push69=, $pop68, $pop67 +; NO-SIMD128-NEXT: i32.const $push91=, 1 +; NO-SIMD128-NEXT: i32.add $push70=, $pop69, $pop91 +; NO-SIMD128-NEXT: i32.const $push90=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push71=, $pop70, $pop90 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop71 +; NO-SIMD128-NEXT: i32.const $push89=, 255 +; NO-SIMD128-NEXT: i32.and $push73=, $17, $pop89 +; NO-SIMD128-NEXT: i32.const $push88=, 255 +; NO-SIMD128-NEXT: i32.and $push72=, $25, $pop88 +; NO-SIMD128-NEXT: i32.add $push74=, $pop73, $pop72 +; NO-SIMD128-NEXT: i32.const $push87=, 1 +; NO-SIMD128-NEXT: i32.add $push75=, $pop74, $pop87 +; NO-SIMD128-NEXT: i32.const $push86=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push76=, $pop75, $pop86 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop76 +; NO-SIMD128-NEXT: i32.const $push85=, 255 +; NO-SIMD128-NEXT: i32.and $push78=, $1, $pop85 +; NO-SIMD128-NEXT: i32.const $push84=, 255 +; NO-SIMD128-NEXT: i32.and $push77=, $9, $pop84 +; NO-SIMD128-NEXT: i32.add $push79=, $pop78, $pop77 +; NO-SIMD128-NEXT: i32.const $push83=, 1 +; NO-SIMD128-NEXT: i32.add $push80=, $pop79, $pop83 +; NO-SIMD128-NEXT: i32.const $push82=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push81=, $pop80, $pop82 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop81 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: avgr_undef_shuffle_lanes: +; NO-SIMD128-FAST: .functype avgr_undef_shuffle_lanes (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.const $push0=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push2=, $17, $pop0 +; NO-SIMD128-FAST-NEXT: i32.const $push143=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $25, $pop143 +; NO-SIMD128-FAST-NEXT: i32.add $push3=, $pop2, $pop1 +; NO-SIMD128-FAST-NEXT: i32.const $push4=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push5=, $pop3, $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push142=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push6=, $pop5, $pop142 +; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push141=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push8=, $1, $pop141 +; NO-SIMD128-FAST-NEXT: i32.const $push140=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $9, $pop140 +; NO-SIMD128-FAST-NEXT: i32.add $push9=, $pop8, $pop7 +; NO-SIMD128-FAST-NEXT: i32.const $push139=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push10=, $pop9, $pop139 +; NO-SIMD128-FAST-NEXT: i32.const $push138=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push11=, $pop10, $pop138 +; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop11 +; NO-SIMD128-FAST-NEXT: i32.const $push137=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $18, $pop137 +; NO-SIMD128-FAST-NEXT: i32.const $push136=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push12=, $26, $pop136 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $pop13, $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push135=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push15=, $pop14, $pop135 +; NO-SIMD128-FAST-NEXT: i32.const $push134=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push16=, $pop15, $pop134 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push133=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push18=, $2, $pop133 +; NO-SIMD128-FAST-NEXT: i32.const $push132=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push17=, $10, $pop132 +; NO-SIMD128-FAST-NEXT: i32.add $push19=, $pop18, $pop17 +; NO-SIMD128-FAST-NEXT: i32.const $push131=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push20=, $pop19, $pop131 +; NO-SIMD128-FAST-NEXT: i32.const $push130=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push21=, $pop20, $pop130 +; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop21 +; NO-SIMD128-FAST-NEXT: i32.const $push129=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push23=, $19, $pop129 +; NO-SIMD128-FAST-NEXT: i32.const $push128=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push22=, $27, $pop128 +; NO-SIMD128-FAST-NEXT: i32.add $push24=, $pop23, $pop22 +; NO-SIMD128-FAST-NEXT: i32.const $push127=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push25=, $pop24, $pop127 +; NO-SIMD128-FAST-NEXT: i32.const $push126=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push26=, $pop25, $pop126 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop26 +; NO-SIMD128-FAST-NEXT: i32.const $push125=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push28=, $3, $pop125 +; NO-SIMD128-FAST-NEXT: i32.const $push124=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push27=, $11, $pop124 +; NO-SIMD128-FAST-NEXT: i32.add $push29=, $pop28, $pop27 +; NO-SIMD128-FAST-NEXT: i32.const $push123=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push30=, $pop29, $pop123 +; NO-SIMD128-FAST-NEXT: i32.const $push122=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push31=, $pop30, $pop122 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop31 +; NO-SIMD128-FAST-NEXT: i32.const $push121=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push33=, $20, $pop121 +; NO-SIMD128-FAST-NEXT: i32.const $push120=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push32=, $28, $pop120 +; NO-SIMD128-FAST-NEXT: i32.add $push34=, $pop33, $pop32 +; NO-SIMD128-FAST-NEXT: i32.const $push119=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push35=, $pop34, $pop119 +; NO-SIMD128-FAST-NEXT: i32.const $push118=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push36=, $pop35, $pop118 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop36 +; NO-SIMD128-FAST-NEXT: i32.const $push117=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push38=, $4, $pop117 +; NO-SIMD128-FAST-NEXT: i32.const $push116=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push37=, $12, $pop116 +; NO-SIMD128-FAST-NEXT: i32.add $push39=, $pop38, $pop37 +; NO-SIMD128-FAST-NEXT: i32.const $push115=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push40=, $pop39, $pop115 +; NO-SIMD128-FAST-NEXT: i32.const $push114=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push41=, $pop40, $pop114 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop41 +; NO-SIMD128-FAST-NEXT: i32.const $push113=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push43=, $21, $pop113 +; NO-SIMD128-FAST-NEXT: i32.const $push112=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push42=, $29, $pop112 +; NO-SIMD128-FAST-NEXT: i32.add $push44=, $pop43, $pop42 +; NO-SIMD128-FAST-NEXT: i32.const $push111=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push45=, $pop44, $pop111 +; NO-SIMD128-FAST-NEXT: i32.const $push110=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push46=, $pop45, $pop110 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop46 +; NO-SIMD128-FAST-NEXT: i32.const $push109=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push48=, $5, $pop109 +; NO-SIMD128-FAST-NEXT: i32.const $push108=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push47=, $13, $pop108 +; NO-SIMD128-FAST-NEXT: i32.add $push49=, $pop48, $pop47 +; NO-SIMD128-FAST-NEXT: i32.const $push107=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push50=, $pop49, $pop107 +; NO-SIMD128-FAST-NEXT: i32.const $push106=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push51=, $pop50, $pop106 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop51 +; NO-SIMD128-FAST-NEXT: i32.const $push105=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push53=, $22, $pop105 +; NO-SIMD128-FAST-NEXT: i32.const $push104=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push52=, $30, $pop104 +; NO-SIMD128-FAST-NEXT: i32.add $push54=, $pop53, $pop52 +; NO-SIMD128-FAST-NEXT: i32.const $push103=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push55=, $pop54, $pop103 +; NO-SIMD128-FAST-NEXT: i32.const $push102=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push56=, $pop55, $pop102 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop56 +; NO-SIMD128-FAST-NEXT: i32.const $push101=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push58=, $6, $pop101 +; NO-SIMD128-FAST-NEXT: i32.const $push100=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push57=, $14, $pop100 +; NO-SIMD128-FAST-NEXT: i32.add $push59=, $pop58, $pop57 +; NO-SIMD128-FAST-NEXT: i32.const $push99=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push60=, $pop59, $pop99 +; NO-SIMD128-FAST-NEXT: i32.const $push98=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push61=, $pop60, $pop98 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop61 +; NO-SIMD128-FAST-NEXT: i32.const $push97=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push63=, $23, $pop97 +; NO-SIMD128-FAST-NEXT: i32.const $push96=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push62=, $31, $pop96 +; NO-SIMD128-FAST-NEXT: i32.add $push64=, $pop63, $pop62 +; NO-SIMD128-FAST-NEXT: i32.const $push95=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push65=, $pop64, $pop95 +; NO-SIMD128-FAST-NEXT: i32.const $push94=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push66=, $pop65, $pop94 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop66 +; NO-SIMD128-FAST-NEXT: i32.const $push93=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push68=, $7, $pop93 +; NO-SIMD128-FAST-NEXT: i32.const $push92=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push67=, $15, $pop92 +; NO-SIMD128-FAST-NEXT: i32.add $push69=, $pop68, $pop67 +; NO-SIMD128-FAST-NEXT: i32.const $push91=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push70=, $pop69, $pop91 +; NO-SIMD128-FAST-NEXT: i32.const $push90=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push71=, $pop70, $pop90 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop71 +; NO-SIMD128-FAST-NEXT: i32.const $push89=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push73=, $24, $pop89 +; NO-SIMD128-FAST-NEXT: i32.const $push88=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push72=, $32, $pop88 +; NO-SIMD128-FAST-NEXT: i32.add $push74=, $pop73, $pop72 +; NO-SIMD128-FAST-NEXT: i32.const $push87=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push75=, $pop74, $pop87 +; NO-SIMD128-FAST-NEXT: i32.const $push86=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push76=, $pop75, $pop86 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop76 +; NO-SIMD128-FAST-NEXT: i32.const $push85=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push78=, $8, $pop85 +; NO-SIMD128-FAST-NEXT: i32.const $push84=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push77=, $16, $pop84 +; NO-SIMD128-FAST-NEXT: i32.add $push79=, $pop78, $pop77 +; NO-SIMD128-FAST-NEXT: i32.const $push83=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push80=, $pop79, $pop83 +; NO-SIMD128-FAST-NEXT: i32.const $push82=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push81=, $pop80, $pop82 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop81 +; NO-SIMD128-FAST-NEXT: return + %zext.0 = zext <8 x i8> %a to <8 x i16> + %zext.1 = zext <8 x i8> %b to <8 x i16> + %add.0 = add nuw nsw <8 x i16> %zext.0, splat (i16 1) + %add.1 = add nuw nsw <8 x i16> %add.0, %zext.1 + %shift.0 = lshr <8 x i16> %add.1, splat (i16 1) + %zext.2 = zext <8 x i8> %c to <8 x i16> + %zext.3 = zext <8 x i8> %d to <8 x i16> + %add.2 = add nuw nsw <8 x i16> %zext.2, splat (i16 1) + %add.3 = add nuw nsw <8 x i16> %add.2, %zext.3 + %shift.1 = lshr <8 x i16> %add.3, splat (i16 1) + %shuffle = shufflevector <8 x i16> %shift.0, <8 x i16> %shift.1, <16 x i32> + %trunc = trunc nuw <16 x i16> %shuffle to <16 x i8> + store <16 x i8> %trunc, ptr %res, align 1 + ret void +} define <16 x i8> @avgr_u_v16i8_wrap(<16 x i8> %x, <16 x i8> %y) { ; SIMD128-LABEL: avgr_u_v16i8_wrap: ; SIMD128: .functype avgr_u_v16i8_wrap (v128, v128) -> (v128) diff --git a/llvm/test/CodeGen/X86/avx512-gather-scatter-intrin-deprecated.ll b/llvm/test/CodeGen/X86/avx512-gather-scatter-intrin-deprecated.ll index 77053e2c1bc98..4dd883a24f623 100644 --- a/llvm/test/CodeGen/X86/avx512-gather-scatter-intrin-deprecated.ll +++ b/llvm/test/CodeGen/X86/avx512-gather-scatter-intrin-deprecated.ll @@ -255,8 +255,8 @@ define void @gather_qps(<8 x i64> %ind, <8 x float> %src, ptr %base, ptr %stbuf) ; CHECK-LABEL: gather_qps: ; CHECK: ## %bb.0: ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; CHECK-NEXT: kxnorw %k0, %k0, %k1 -; CHECK-NEXT: kxnorw %k0, %k0, %k2 +; CHECK-NEXT: kxnorb %k0, %k0, %k1 +; CHECK-NEXT: kxnorb %k0, %k0, %k2 ; CHECK-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm1 {%k2} ; CHECK-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 ; CHECK-NEXT: vscatterqps %ymm1, (%rsi,%zmm0,4) {%k1} @@ -520,7 +520,7 @@ define <8 x float>@test_int_x86_avx512_gather3siv8_sf(<8 x float> %x0, ptr %x1, ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vgatherdps (%rdi,%ymm1,4), %ymm0 {%k1} -; CHECK-NEXT: kxnorw %k0, %k0, %k1 +; CHECK-NEXT: kxnorb %k0, %k0, %k1 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vgatherdps (%rdi,%ymm1,2), %ymm2 {%k1} ; CHECK-NEXT: vaddps %ymm2, %ymm0, %ymm0 @@ -772,7 +772,7 @@ define void@test_int_x86_avx512_scattersiv8_sf(ptr %x0, i8 %x1, <8 x i32> %x2, < ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vscatterdps %ymm1, (%rdi,%ymm0,2) {%k1} -; CHECK-NEXT: kxnorw %k0, %k0, %k1 +; CHECK-NEXT: kxnorb %k0, %k0, %k1 ; CHECK-NEXT: vscatterdps %ymm1, (%rdi,%ymm0,4) {%k1} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -788,7 +788,7 @@ define void@test_int_x86_avx512_scattersiv8_si(ptr %x0, i8 %x1, <8 x i32> %x2, < ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,2) {%k1} -; CHECK-NEXT: kxnorw %k0, %k0, %k1 +; CHECK-NEXT: kxnorb %k0, %k0, %k1 ; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,4) {%k1} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -800,9 +800,9 @@ define void@test_int_x86_avx512_scattersiv8_si(ptr %x0, i8 %x1, <8 x i32> %x2, < define void @scatter_mask_test(ptr %x0, <8 x i32> %x2, <8 x i32> %x3) { ; CHECK-LABEL: scatter_mask_test: ; CHECK: ## %bb.0: -; CHECK-NEXT: kxnorw %k0, %k0, %k1 +; CHECK-NEXT: kxnorb %k0, %k0, %k1 ; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,2) {%k1} -; CHECK-NEXT: kxorw %k0, %k0, %k1 +; CHECK-NEXT: kxorb %k0, %k0, %k1 ; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,4) {%k1} ; CHECK-NEXT: movb $1, %al ; CHECK-NEXT: kmovd %eax, %k1 diff --git a/llvm/test/CodeGen/X86/avx512-gather-scatter-intrin.ll b/llvm/test/CodeGen/X86/avx512-gather-scatter-intrin.ll index df71e3c3afa5e..5ed91ea1eb872 100644 --- a/llvm/test/CodeGen/X86/avx512-gather-scatter-intrin.ll +++ b/llvm/test/CodeGen/X86/avx512-gather-scatter-intrin.ll @@ -251,9 +251,9 @@ define dso_local void @scatter_mask_qps_execdomain(<8 x i64> %ind, ptr %src, i8 define dso_local void @gather_qps(<8 x i64> %ind, <8 x float> %src, ptr %base, ptr %stbuf) { ; CHECK-LABEL: gather_qps: ; CHECK: # %bb.0: -; CHECK-NEXT: kxnorw %k0, %k0, %k1 +; CHECK-NEXT: kxnorb %k0, %k0, %k1 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; CHECK-NEXT: kxnorw %k0, %k0, %k2 +; CHECK-NEXT: kxnorb %k0, %k0, %k2 ; CHECK-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm1 {%k2} ; CHECK-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 ; CHECK-NEXT: vscatterqps %ymm1, (%rsi,%zmm0,4) {%k1} @@ -523,7 +523,7 @@ define <8 x float> @test_int_x86_avx512_mask_gather3siv8_sf(<8 x float> %x0, ptr ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vgatherdps (%rdi,%ymm1,4), %ymm0 {%k1} -; CHECK-NEXT: kxnorw %k0, %k0, %k1 +; CHECK-NEXT: kxnorb %k0, %k0, %k1 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vgatherdps (%rdi,%ymm1,2), %ymm2 {%k1} ; CHECK-NEXT: vaddps %ymm2, %ymm0, %ymm0 @@ -774,7 +774,7 @@ define dso_local void@test_int_x86_avx512_scattersiv8_sf(ptr %x0, i8 %x1, <8 x i ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vscatterdps %ymm1, (%rdi,%ymm0,2) {%k1} -; CHECK-NEXT: kxnorw %k0, %k0, %k1 +; CHECK-NEXT: kxnorb %k0, %k0, %k1 ; CHECK-NEXT: vscatterdps %ymm1, (%rdi,%ymm0,4) {%k1} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -789,7 +789,7 @@ define dso_local void@test_int_x86_avx512_scattersiv8_si(ptr %x0, i8 %x1, <8 x i ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,2) {%k1} -; CHECK-NEXT: kxnorw %k0, %k0, %k1 +; CHECK-NEXT: kxnorb %k0, %k0, %k1 ; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,4) {%k1} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -802,9 +802,9 @@ define dso_local void@test_int_x86_avx512_scattersiv8_si(ptr %x0, i8 %x1, <8 x i define dso_local void @scatter_mask_test(ptr %x0, <8 x i32> %x2, <8 x i32> %x3) { ; CHECK-LABEL: scatter_mask_test: ; CHECK: # %bb.0: -; CHECK-NEXT: kxnorw %k0, %k0, %k1 +; CHECK-NEXT: kxnorb %k0, %k0, %k1 ; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,2) {%k1} -; CHECK-NEXT: kxorw %k0, %k0, %k1 +; CHECK-NEXT: kxorb %k0, %k0, %k1 ; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,4) {%k1} ; CHECK-NEXT: movb $1, %al ; CHECK-NEXT: kmovd %eax, %k1 @@ -856,7 +856,7 @@ define <16 x float> @gather_mask_test(<16 x i32> %ind, <16 x float> %src, ptr %b define <8 x float> @gather_global(<8 x i64>, ptr nocapture readnone) { ; CHECK-LABEL: gather_global: ; CHECK: # %bb.0: -; CHECK-NEXT: kxnorw %k0, %k0, %k1 +; CHECK-NEXT: kxnorb %k0, %k0, %k1 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vgatherqps x(,%zmm0,4), %ymm1 {%k1} ; CHECK-NEXT: vmovaps %ymm1, %ymm0 diff --git a/llvm/test/CodeGen/X86/avx512-mask-set-opt.ll b/llvm/test/CodeGen/X86/avx512-mask-set-opt.ll new file mode 100644 index 0000000000000..ca5f3192d7b97 --- /dev/null +++ b/llvm/test/CodeGen/X86/avx512-mask-set-opt.ll @@ -0,0 +1,229 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=AVX512,AVX512DQ +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512dq,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512DQBW + +declare <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x float>) +declare <16 x float> @llvm.masked.expandload.v16f32(ptr, <16 x i1>, <16 x float>) +declare <8 x float> @llvm.masked.expandload.v8f32(ptr, <8 x i1>, <8 x float>) +declare <16 x i32> @llvm.masked.expandload.v16i32(ptr, <16 x i1>, <16 x i32>) + +; Test case 1: Direct v8i1 all-ones mask (should use kxnorb on AVX512DQ) +define <8 x float> @mask_v8i1_allones(ptr %ptr) { +; AVX512F-LABEL: mask_v8i1_allones: +; AVX512F: # %bb.0: +; AVX512F-NEXT: movw $255, %ax +; AVX512F-NEXT: kmovw %eax, %k1 +; AVX512F-NEXT: vexpandps (%rdi), %zmm0 {%k1} {z} +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 +; AVX512F-NEXT: retq +; +; AVX512DQ-LABEL: mask_v8i1_allones: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: kxnorb %k0, %k0, %k1 +; AVX512DQ-NEXT: vexpandps (%rdi), %zmm0 {%k1} {z} +; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 +; AVX512DQ-NEXT: retq +; +; AVX512BW-LABEL: mask_v8i1_allones: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: movw $255, %ax +; AVX512BW-NEXT: kmovd %eax, %k1 +; AVX512BW-NEXT: vexpandps (%rdi), %zmm0 {%k1} {z} +; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 +; AVX512BW-NEXT: retq +; +; AVX512DQBW-LABEL: mask_v8i1_allones: +; AVX512DQBW: # %bb.0: +; AVX512DQBW-NEXT: kxnorb %k0, %k0, %k1 +; AVX512DQBW-NEXT: vexpandps (%rdi), %zmm0 {%k1} {z} +; AVX512DQBW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 +; AVX512DQBW-NEXT: retq + %res = call <8 x float> @llvm.masked.expandload.v8f32(ptr %ptr, <8 x i1> , <8 x float> zeroinitializer) + ret <8 x float> %res +} + +; Test case 2: v16i1 with lower 8 bits set via bitconvert (should use kxnorb on AVX512DQ) +define <16 x float> @mask_v16i1_lower8(ptr %ptr) { +; AVX512F-LABEL: mask_v16i1_lower8: +; AVX512F: # %bb.0: +; AVX512F-NEXT: movw $255, %ax +; AVX512F-NEXT: kmovw %eax, %k1 +; AVX512F-NEXT: vexpandps (%rdi), %zmm0 {%k1} {z} +; AVX512F-NEXT: retq +; +; AVX512DQ-LABEL: mask_v16i1_lower8: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: kxnorb %k0, %k0, %k1 +; AVX512DQ-NEXT: vexpandps (%rdi), %zmm0 {%k1} {z} +; AVX512DQ-NEXT: retq +; +; AVX512BW-LABEL: mask_v16i1_lower8: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: movw $255, %ax +; AVX512BW-NEXT: kmovd %eax, %k1 +; AVX512BW-NEXT: vexpandps (%rdi), %zmm0 {%k1} {z} +; AVX512BW-NEXT: retq +; +; AVX512DQBW-LABEL: mask_v16i1_lower8: +; AVX512DQBW: # %bb.0: +; AVX512DQBW-NEXT: kxnorb %k0, %k0, %k1 +; AVX512DQBW-NEXT: vexpandps (%rdi), %zmm0 {%k1} {z} +; AVX512DQBW-NEXT: retq + %res = call <16 x float> @llvm.masked.expandload.v16f32(ptr %ptr, <16 x i1> , <16 x float> zeroinitializer) + ret <16 x float> %res +} + +; Test case 3: v16i1 with all bits set (should use kxnorw on all targets) +define <16 x float> @gather_all(ptr %base, <16 x i32> %ind, i16 %mask) { +; AVX512-LABEL: gather_all: +; AVX512: # %bb.0: +; AVX512-NEXT: kxnorw %k0, %k0, %k1 +; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX512-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1} +; AVX512-NEXT: vmovaps %zmm1, %zmm0 +; AVX512-NEXT: retq + %broadcast.splatinsert = insertelement <16 x ptr> poison, ptr %base, i32 0 + %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer + %sext_ind = sext <16 x i32> %ind to <16 x i64> + %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind + %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> poison) + ret <16 x float> %res +} + +; Test case 4: v8i1 with lower 8 bits set in gather (should use kxnorb on AVX512DQ targets) +define <16 x float> @gather_lower(ptr %base, <16 x i32> %ind, i16 %mask) { +; AVX512F-LABEL: gather_lower: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX512F-NEXT: movw $255, %ax +; AVX512F-NEXT: kmovw %eax, %k1 +; AVX512F-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1} +; AVX512F-NEXT: vmovaps %zmm1, %zmm0 +; AVX512F-NEXT: retq +; +; AVX512DQ-LABEL: gather_lower: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX512DQ-NEXT: kxnorb %k0, %k0, %k1 +; AVX512DQ-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1} +; AVX512DQ-NEXT: vmovaps %zmm1, %zmm0 +; AVX512DQ-NEXT: retq +; +; AVX512BW-LABEL: gather_lower: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX512BW-NEXT: movw $255, %ax +; AVX512BW-NEXT: kmovd %eax, %k1 +; AVX512BW-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1} +; AVX512BW-NEXT: vmovaps %zmm1, %zmm0 +; AVX512BW-NEXT: retq +; +; AVX512DQBW-LABEL: gather_lower: +; AVX512DQBW: # %bb.0: +; AVX512DQBW-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX512DQBW-NEXT: kxnorb %k0, %k0, %k1 +; AVX512DQBW-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1} +; AVX512DQBW-NEXT: vmovaps %zmm1, %zmm0 +; AVX512DQBW-NEXT: retq + %broadcast.splatinsert = insertelement <16 x ptr> poison, ptr %base, i32 0 + %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer + %sext_ind = sext <16 x i32> %ind to <16 x i64> + %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind + %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> poison) + ret <16 x float> %res +} + +; Test case 5: v32i1 mask via bitconvert combined with dynamic condition. +; Ensures lower 16 lanes force the KSET1W path without folding into a shuffle. +define <32 x i16> @mask_v32i1_lower16(<32 x i16> %a, <32 x i16> %b, <32 x i16> %c, <32 x i16> %d) { +; AVX512F-LABEL: mask_v32i1_lower16: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vextracti64x4 $1, %zmm3, %ymm3 +; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm2 +; AVX512F-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm2 +; AVX512F-NEXT: vpternlogd {{.*#+}} zmm3 = -1 +; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 +; AVX512F-NEXT: vpternlogq {{.*#+}} zmm0 = zmm1 ^ (zmm2 & (zmm0 ^ zmm1)) +; AVX512F-NEXT: retq +; +; AVX512DQ-LABEL: mask_v32i1_lower16: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: vextracti64x4 $1, %zmm3, %ymm3 +; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm2 +; AVX512DQ-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm2 +; AVX512DQ-NEXT: vpternlogd {{.*#+}} zmm3 = -1 +; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 +; AVX512DQ-NEXT: vpternlogq {{.*#+}} zmm0 = zmm1 ^ (zmm2 & (zmm0 ^ zmm1)) +; AVX512DQ-NEXT: retq +; +; AVX512BW-LABEL: mask_v32i1_lower16: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: movl $65535, %eax # imm = 0xFFFF +; AVX512BW-NEXT: kmovd %eax, %k0 +; AVX512BW-NEXT: vpcmpgtw %zmm3, %zmm2, %k1 +; AVX512BW-NEXT: kord %k0, %k1, %k1 +; AVX512BW-NEXT: vpblendmw %zmm0, %zmm1, %zmm0 {%k1} +; AVX512BW-NEXT: retq +; +; AVX512DQBW-LABEL: mask_v32i1_lower16: +; AVX512DQBW: # %bb.0: +; AVX512DQBW-NEXT: kxnorw %k0, %k0, %k0 +; AVX512DQBW-NEXT: vpcmpgtw %zmm3, %zmm2, %k1 +; AVX512DQBW-NEXT: kord %k0, %k1, %k1 +; AVX512DQBW-NEXT: vpblendmw %zmm0, %zmm1, %zmm0 {%k1} +; AVX512DQBW-NEXT: retq + %mask0 = bitcast i32 65535 to <32 x i1> + %mask1 = icmp sgt <32 x i16> %c, %d + %mask = or <32 x i1> %mask0, %mask1 + %res = select <32 x i1> %mask, <32 x i16> %a, <32 x i16> %b + ret <32 x i16> %res +} + +; Test case 6: v64i1 mask via bitconvert combined with dynamic condition. +; Verifies the KSET1D submask pattern survives past SelectionDAG combines. +define <64 x i8> @mask_v64i1_lower32(<64 x i8> %a, <64 x i8> %b, <64 x i8> %c, <64 x i8> %d) { +; AVX512F-LABEL: mask_v64i1_lower32: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vextracti64x4 $1, %zmm3, %ymm3 +; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm2 +; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm2, %ymm2 +; AVX512F-NEXT: vpternlogd {{.*#+}} zmm3 = -1 +; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 +; AVX512F-NEXT: vpternlogq {{.*#+}} zmm0 = zmm1 ^ (zmm2 & (zmm0 ^ zmm1)) +; AVX512F-NEXT: retq +; +; AVX512DQ-LABEL: mask_v64i1_lower32: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: vextracti64x4 $1, %zmm3, %ymm3 +; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm2 +; AVX512DQ-NEXT: vpcmpgtb %ymm3, %ymm2, %ymm2 +; AVX512DQ-NEXT: vpternlogd {{.*#+}} zmm3 = -1 +; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 +; AVX512DQ-NEXT: vpternlogq {{.*#+}} zmm0 = zmm1 ^ (zmm2 & (zmm0 ^ zmm1)) +; AVX512DQ-NEXT: retq +; +; AVX512BW-LABEL: mask_v64i1_lower32: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF +; AVX512BW-NEXT: kmovq %rax, %k0 +; AVX512BW-NEXT: vpcmpgtb %zmm3, %zmm2, %k1 +; AVX512BW-NEXT: korq %k0, %k1, %k1 +; AVX512BW-NEXT: vpblendmb %zmm0, %zmm1, %zmm0 {%k1} +; AVX512BW-NEXT: retq +; +; AVX512DQBW-LABEL: mask_v64i1_lower32: +; AVX512DQBW: # %bb.0: +; AVX512DQBW-NEXT: kxnord %k0, %k0, %k0 +; AVX512DQBW-NEXT: vpcmpgtb %zmm3, %zmm2, %k1 +; AVX512DQBW-NEXT: korq %k0, %k1, %k1 +; AVX512DQBW-NEXT: vpblendmb %zmm0, %zmm1, %zmm0 {%k1} +; AVX512DQBW-NEXT: retq + %mask0 = bitcast i64 4294967295 to <64 x i1> + %mask1 = icmp sgt <64 x i8> %c, %d + %mask = or <64 x i1> %mask0, %mask1 + %res = select <64 x i1> %mask, <64 x i8> %a, <64 x i8> %b + ret <64 x i8> %res +} + diff --git a/llvm/test/CodeGen/X86/bittest-big-integer.ll b/llvm/test/CodeGen/X86/bittest-big-integer.ll index e9e9ee9c97593..9b7569ff8b29f 100644 --- a/llvm/test/CodeGen/X86/bittest-big-integer.ll +++ b/llvm/test/CodeGen/X86/bittest-big-integer.ll @@ -967,82 +967,63 @@ define <8 x i16> @complement_ne_i128_bitcast(ptr %word, i32 %position) nounwind ; X86-NEXT: pushl %esi ; X86-NEXT: andl $-16, %esp ; X86-NEXT: subl $80, %esp -; X86-NEXT: movzbl 16(%ebp), %ecx -; X86-NEXT: movl 12(%ebp), %edx -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $1, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: shrb $3, %al -; X86-NEXT: andb $12, %al -; X86-NEXT: negb %al -; X86-NEXT: movsbl %al, %eax -; X86-NEXT: movl 56(%esp,%eax), %esi -; X86-NEXT: movl 60(%esp,%eax), %ebx -; X86-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NEXT: shldl %cl, %esi, %ebx -; X86-NEXT: movzwl 14(%edx), %edi +; X86-NEXT: movl 12(%ebp), %eax +; X86-NEXT: movzwl (%eax), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movzwl 12(%eax), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movzwl 14(%eax), %edi ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: shll $16, %edi -; X86-NEXT: movzwl 12(%edx), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: orl %ecx, %edi -; X86-NEXT: xorl %ebx, %edi -; X86-NEXT: movl 52(%esp,%eax), %edx -; X86-NEXT: movzbl 16(%ebp), %ecx -; X86-NEXT: shldl %cl, %edx, %esi -; X86-NEXT: movl 12(%ebp), %eax -; X86-NEXT: movzwl 10(%eax), %ebx -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: shll $16, %ebx -; X86-NEXT: movzwl 8(%eax), %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: orl %eax, %ebx -; X86-NEXT: xorl %esi, %ebx -; X86-NEXT: movl (%esp), %eax # 4-byte Reload -; X86-NEXT: movl 48(%esp,%eax), %esi -; X86-NEXT: shldl %cl, %esi, %edx -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movzwl 6(%ecx), %eax -; X86-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NEXT: shll $16, %eax -; X86-NEXT: movzwl 4(%ecx), %ecx +; X86-NEXT: movzwl 2(%eax), %ecx ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: orl %ecx, %eax -; X86-NEXT: xorl %edx, %eax -; X86-NEXT: movzbl 16(%ebp), %ecx -; X86-NEXT: shll %cl, %esi -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movzwl 2(%ecx), %edx +; X86-NEXT: movzwl 4(%eax), %edx ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: shll $16, %edx -; X86-NEXT: movzwl (%ecx), %ecx +; X86-NEXT: movzwl 6(%eax), %esi +; X86-NEXT: movzwl 8(%eax), %ecx ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: orl %ecx, %edx -; X86-NEXT: xorl %esi, %edx +; X86-NEXT: movzwl 10(%eax), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: shll $16, %eax +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: shll $16, %eax +; X86-NEXT: orl %edx, %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shll $16, %eax +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: andb $96, %bl +; X86-NEXT: shrb $3, %bl +; X86-NEXT: movzbl %bl, %edi +; X86-NEXT: movl 32(%esp,%edi), %edi +; X86-NEXT: btcl %eax, %edi +; X86-NEXT: andl $96, %eax +; X86-NEXT: shrl $3, %eax ; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %edi, 12(%ecx) -; X86-NEXT: movl %ebx, 8(%ecx) -; X86-NEXT: movl %eax, 4(%ecx) -; X86-NEXT: movl %edx, (%ecx) +; X86-NEXT: movl %edi, (%ecx,%eax) ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NEXT: movw %dx, 14(%eax) ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NEXT: movw %dx, 12(%eax) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: movw %dx, 10(%eax) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: movw %dx, 8(%eax) -; X86-NEXT: movl (%esp), %edx # 4-byte Reload -; X86-NEXT: movw %dx, 6(%eax) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: movw %dx, 4(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movw %cx, 10(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movw %cx, 8(%eax) +; X86-NEXT: movw %si, 6(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movw %cx, 4(%eax) ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: movw %cx, 2(%eax) ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload @@ -1056,81 +1037,57 @@ define <8 x i16> @complement_ne_i128_bitcast(ptr %word, i32 %position) nounwind ; ; SSE2-LABEL: complement_ne_i128_bitcast: ; SSE2: # %bb.0: -; SSE2-NEXT: movl %esi, %ecx -; SSE2-NEXT: movl $1, %eax -; SSE2-NEXT: xorl %edx, %edx -; SSE2-NEXT: shldq %cl, %rax, %rdx -; SSE2-NEXT: xorl %esi, %esi -; SSE2-NEXT: shlq %cl, %rax -; SSE2-NEXT: testb $64, %cl -; SSE2-NEXT: cmovneq %rax, %rdx -; SSE2-NEXT: cmovneq %rsi, %rax +; SSE2-NEXT: # kill: def $esi killed $esi def $rsi ; SSE2-NEXT: movdqa (%rdi), %xmm0 -; SSE2-NEXT: xorq %rdx, 8(%rdi) -; SSE2-NEXT: movq %xmm0, %rcx -; SSE2-NEXT: xorq %rax, %rcx -; SSE2-NEXT: movq %rcx, (%rdi) +; SSE2-NEXT: movq 8(%rdi), %rax +; SSE2-NEXT: movq %xmm0, %rdx +; SSE2-NEXT: movl %esi, %ecx +; SSE2-NEXT: andb $32, %cl +; SSE2-NEXT: shrdq %cl, %rax, %rdx +; SSE2-NEXT: shrq %cl, %rax +; SSE2-NEXT: testb $64, %sil +; SSE2-NEXT: cmoveq %rdx, %rax +; SSE2-NEXT: btcl %esi, %eax +; SSE2-NEXT: andl $96, %esi +; SSE2-NEXT: shrl $3, %esi +; SSE2-NEXT: movl %eax, (%rdi,%rsi) ; SSE2-NEXT: retq ; ; SSE4-LABEL: complement_ne_i128_bitcast: ; SSE4: # %bb.0: -; SSE4-NEXT: movl %esi, %ecx -; SSE4-NEXT: movl $1, %eax -; SSE4-NEXT: xorl %edx, %edx -; SSE4-NEXT: shldq %cl, %rax, %rdx -; SSE4-NEXT: shlq %cl, %rax -; SSE4-NEXT: xorl %esi, %esi -; SSE4-NEXT: testb $64, %cl -; SSE4-NEXT: cmovneq %rax, %rdx -; SSE4-NEXT: cmovneq %rsi, %rax +; SSE4-NEXT: # kill: def $esi killed $esi def $rsi ; SSE4-NEXT: movdqa (%rdi), %xmm0 -; SSE4-NEXT: movq %xmm0, %rcx -; SSE4-NEXT: xorq %rax, %rcx ; SSE4-NEXT: pextrq $1, %xmm0, %rax -; SSE4-NEXT: xorq %rdx, %rax -; SSE4-NEXT: movq %rax, 8(%rdi) -; SSE4-NEXT: movq %rcx, (%rdi) +; SSE4-NEXT: movq %xmm0, %rdx +; SSE4-NEXT: movl %esi, %ecx +; SSE4-NEXT: andb $32, %cl +; SSE4-NEXT: shrdq %cl, %rax, %rdx +; SSE4-NEXT: shrq %cl, %rax +; SSE4-NEXT: testb $64, %sil +; SSE4-NEXT: cmoveq %rdx, %rax +; SSE4-NEXT: btcl %esi, %eax +; SSE4-NEXT: andl $96, %esi +; SSE4-NEXT: shrl $3, %esi +; SSE4-NEXT: movl %eax, (%rdi,%rsi) ; SSE4-NEXT: retq ; -; AVX2-LABEL: complement_ne_i128_bitcast: -; AVX2: # %bb.0: -; AVX2-NEXT: movl %esi, %ecx -; AVX2-NEXT: movl $1, %eax -; AVX2-NEXT: xorl %edx, %edx -; AVX2-NEXT: shldq %cl, %rax, %rdx -; AVX2-NEXT: xorl %esi, %esi -; AVX2-NEXT: shlxq %rcx, %rax, %rax -; AVX2-NEXT: testb $64, %cl -; AVX2-NEXT: cmovneq %rax, %rdx -; AVX2-NEXT: cmovneq %rsi, %rax -; AVX2-NEXT: vmovdqa (%rdi), %xmm0 -; AVX2-NEXT: vmovq %xmm0, %rcx -; AVX2-NEXT: vpextrq $1, %xmm0, %rsi -; AVX2-NEXT: xorq %rax, %rcx -; AVX2-NEXT: xorq %rdx, %rsi -; AVX2-NEXT: movq %rsi, 8(%rdi) -; AVX2-NEXT: movq %rcx, (%rdi) -; AVX2-NEXT: retq -; -; AVX512-LABEL: complement_ne_i128_bitcast: -; AVX512: # %bb.0: -; AVX512-NEXT: movl %esi, %ecx -; AVX512-NEXT: xorl %eax, %eax -; AVX512-NEXT: movl $1, %edx -; AVX512-NEXT: xorl %esi, %esi -; AVX512-NEXT: shldq %cl, %rdx, %rsi -; AVX512-NEXT: shlxq %rcx, %rdx, %rdx -; AVX512-NEXT: testb $64, %cl -; AVX512-NEXT: cmovneq %rdx, %rsi -; AVX512-NEXT: cmovneq %rax, %rdx -; AVX512-NEXT: vmovdqa (%rdi), %xmm0 -; AVX512-NEXT: vmovq %xmm0, %rax -; AVX512-NEXT: xorq %rdx, %rax -; AVX512-NEXT: vpextrq $1, %xmm0, %rcx -; AVX512-NEXT: xorq %rsi, %rcx -; AVX512-NEXT: movq %rcx, 8(%rdi) -; AVX512-NEXT: movq %rax, (%rdi) -; AVX512-NEXT: retq +; AVX-LABEL: complement_ne_i128_bitcast: +; AVX: # %bb.0: +; AVX-NEXT: # kill: def $esi killed $esi def $rsi +; AVX-NEXT: vmovdqa (%rdi), %xmm0 +; AVX-NEXT: vpextrq $1, %xmm0, %rax +; AVX-NEXT: vmovq %xmm0, %rdx +; AVX-NEXT: movl %esi, %ecx +; AVX-NEXT: andb $32, %cl +; AVX-NEXT: shrdq %cl, %rax, %rdx +; AVX-NEXT: shrxq %rcx, %rax, %rax +; AVX-NEXT: testb $64, %sil +; AVX-NEXT: cmoveq %rdx, %rax +; AVX-NEXT: btcl %esi, %eax +; AVX-NEXT: andl $96, %esi +; AVX-NEXT: shrl $3, %esi +; AVX-NEXT: movl %eax, (%rdi,%rsi) +; AVX-NEXT: retq %rem = and i32 %position, 127 %ofs = zext nneg i32 %rem to i128 %bit = shl nuw i128 1, %ofs diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll index 2f691e7ca8f5b..58adbb767ed87 100644 --- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll +++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll @@ -207,15 +207,15 @@ declare void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> , <16 x ptr> , i32 , < ; SCALAR-NEXT: store i32 %Elt2, ptr %Ptr23, align 4 define <8 x i32> @test6(<8 x i32>%a1, <8 x ptr> %ptr) { -; X64-LABEL: test6: -; X64: # %bb.0: -; X64-NEXT: kxnorw %k0, %k0, %k1 -; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; X64-NEXT: kxnorw %k0, %k0, %k2 -; X64-NEXT: vpgatherqd (,%zmm1), %ymm2 {%k2} -; X64-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1} -; X64-NEXT: vmovdqa %ymm2, %ymm0 -; X64-NEXT: retq +; X64-KNL-LABEL: test6: +; X64-KNL: # %bb.0: +; X64-KNL-NEXT: kxnorw %k0, %k0, %k1 +; X64-KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; X64-KNL-NEXT: kxnorw %k0, %k0, %k2 +; X64-KNL-NEXT: vpgatherqd (,%zmm1), %ymm2 {%k2} +; X64-KNL-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1} +; X64-KNL-NEXT: vmovdqa %ymm2, %ymm0 +; X64-KNL-NEXT: retq ; ; X86-KNL-LABEL: test6: ; X86-KNL: # %bb.0: @@ -230,11 +230,21 @@ define <8 x i32> @test6(<8 x i32>%a1, <8 x ptr> %ptr) { ; X86-KNL-NEXT: vmovdqa %ymm2, %ymm0 ; X86-KNL-NEXT: retl ; +; X64-SKX-LABEL: test6: +; X64-SKX: # %bb.0: +; X64-SKX-NEXT: kxnorb %k0, %k0, %k1 +; X64-SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; X64-SKX-NEXT: kxnorb %k0, %k0, %k2 +; X64-SKX-NEXT: vpgatherqd (,%zmm1), %ymm2 {%k2} +; X64-SKX-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1} +; X64-SKX-NEXT: vmovdqa %ymm2, %ymm0 +; X64-SKX-NEXT: retq +; ; X86-SKX-LABEL: test6: ; X86-SKX: # %bb.0: -; X86-SKX-NEXT: kxnorw %k0, %k0, %k1 +; X86-SKX-NEXT: kxnorb %k0, %k0, %k1 ; X86-SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; X86-SKX-NEXT: kxnorw %k0, %k0, %k2 +; X86-SKX-NEXT: kxnorb %k0, %k0, %k2 ; X86-SKX-NEXT: vpgatherdd (,%ymm1), %ymm2 {%k2} ; X86-SKX-NEXT: vpscatterdd %ymm0, (,%ymm1) {%k1} ; X86-SKX-NEXT: vmovdqa %ymm2, %ymm0 @@ -397,7 +407,7 @@ define <8 x i32> @test9(ptr %base, <8 x i64> %ind1, <8 x i32>%ind5) { ; X64-SKX-SMALL-NEXT: vpmovzxdq {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero ; X64-SKX-SMALL-NEXT: vpmuldq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm1 ; X64-SKX-SMALL-NEXT: vpaddq %zmm1, %zmm0, %zmm1 -; X64-SKX-SMALL-NEXT: kxnorw %k0, %k0, %k1 +; X64-SKX-SMALL-NEXT: kxnorb %k0, %k0, %k1 ; X64-SKX-SMALL-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; X64-SKX-SMALL-NEXT: vpgatherqd 72(,%zmm1), %ymm0 {%k1} ; X64-SKX-SMALL-NEXT: retq @@ -412,7 +422,7 @@ define <8 x i32> @test9(ptr %base, <8 x i64> %ind1, <8 x i32>%ind5) { ; X64-SKX-LARGE-NEXT: vpmullq (%rax){1to8}, %zmm0, %zmm0 ; X64-SKX-LARGE-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ; X64-SKX-LARGE-NEXT: vpaddq %zmm1, %zmm0, %zmm1 -; X64-SKX-LARGE-NEXT: kxnorw %k0, %k0, %k1 +; X64-SKX-LARGE-NEXT: kxnorb %k0, %k0, %k1 ; X64-SKX-LARGE-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; X64-SKX-LARGE-NEXT: vpgatherqd 72(,%zmm1), %ymm0 {%k1} ; X64-SKX-LARGE-NEXT: retq @@ -424,7 +434,7 @@ define <8 x i32> @test9(ptr %base, <8 x i64> %ind1, <8 x i32>%ind5) { ; X86-SKX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %ymm0 ; X86-SKX-NEXT: vpaddd {{[0-9]+}}(%esp){1to8}, %ymm0, %ymm0 ; X86-SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm1 -; X86-SKX-NEXT: kxnorw %k0, %k0, %k1 +; X86-SKX-NEXT: kxnorb %k0, %k0, %k1 ; X86-SKX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; X86-SKX-NEXT: vpgatherdd 68(,%ymm1), %ymm0 {%k1} ; X86-SKX-NEXT: retl @@ -481,7 +491,7 @@ define <8 x i32> @test10(ptr %base, <8 x i64> %i1, <8 x i32>%ind5) { ; X64-SKX-SMALL-NEXT: vpmovzxdq {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero ; X64-SKX-SMALL-NEXT: vpmuldq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm1 ; X64-SKX-SMALL-NEXT: vpaddq %zmm1, %zmm0, %zmm1 -; X64-SKX-SMALL-NEXT: kxnorw %k0, %k0, %k1 +; X64-SKX-SMALL-NEXT: kxnorb %k0, %k0, %k1 ; X64-SKX-SMALL-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; X64-SKX-SMALL-NEXT: vpgatherqd 72(,%zmm1), %ymm0 {%k1} ; X64-SKX-SMALL-NEXT: retq @@ -496,7 +506,7 @@ define <8 x i32> @test10(ptr %base, <8 x i64> %i1, <8 x i32>%ind5) { ; X64-SKX-LARGE-NEXT: vpmullq (%rax){1to8}, %zmm0, %zmm0 ; X64-SKX-LARGE-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ; X64-SKX-LARGE-NEXT: vpaddq %zmm1, %zmm0, %zmm1 -; X64-SKX-LARGE-NEXT: kxnorw %k0, %k0, %k1 +; X64-SKX-LARGE-NEXT: kxnorb %k0, %k0, %k1 ; X64-SKX-LARGE-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; X64-SKX-LARGE-NEXT: vpgatherqd 72(,%zmm1), %ymm0 {%k1} ; X64-SKX-LARGE-NEXT: retq @@ -508,7 +518,7 @@ define <8 x i32> @test10(ptr %base, <8 x i64> %i1, <8 x i32>%ind5) { ; X86-SKX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %ymm0 ; X86-SKX-NEXT: vpaddd {{[0-9]+}}(%esp){1to8}, %ymm0, %ymm0 ; X86-SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm1 -; X86-SKX-NEXT: kxnorw %k0, %k0, %k1 +; X86-SKX-NEXT: kxnorb %k0, %k0, %k1 ; X86-SKX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; X86-SKX-NEXT: vpgatherdd 68(,%ymm1), %ymm0 {%k1} ; X86-SKX-NEXT: retl @@ -2465,17 +2475,17 @@ define void @test30b(<3 x ptr> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x i32> declare <16 x ptr> @llvm.masked.gather.v16p0.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x ptr>) define <16 x ptr> @test31(<16 x ptr> %ptrs) { -; X64-LABEL: test31: -; X64: # %bb.0: -; X64-NEXT: kxnorw %k0, %k0, %k1 -; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; X64-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; X64-NEXT: kxnorw %k0, %k0, %k2 -; X64-NEXT: vpgatherqq (,%zmm0), %zmm3 {%k2} -; X64-NEXT: vpgatherqq (,%zmm1), %zmm2 {%k1} -; X64-NEXT: vmovdqa64 %zmm3, %zmm0 -; X64-NEXT: vmovdqa64 %zmm2, %zmm1 -; X64-NEXT: retq +; X64-KNL-LABEL: test31: +; X64-KNL: # %bb.0: +; X64-KNL-NEXT: kxnorw %k0, %k0, %k1 +; X64-KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; X64-KNL-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; X64-KNL-NEXT: kxnorw %k0, %k0, %k2 +; X64-KNL-NEXT: vpgatherqq (,%zmm0), %zmm3 {%k2} +; X64-KNL-NEXT: vpgatherqq (,%zmm1), %zmm2 {%k1} +; X64-KNL-NEXT: vmovdqa64 %zmm3, %zmm0 +; X64-KNL-NEXT: vmovdqa64 %zmm2, %zmm1 +; X64-KNL-NEXT: retq ; ; X86-LABEL: test31: ; X86: # %bb.0: @@ -2484,6 +2494,18 @@ define <16 x ptr> @test31(<16 x ptr> %ptrs) { ; X86-NEXT: vpgatherdd (,%zmm0), %zmm1 {%k1} ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ; X86-NEXT: retl +; +; X64-SKX-LABEL: test31: +; X64-SKX: # %bb.0: +; X64-SKX-NEXT: kxnorb %k0, %k0, %k1 +; X64-SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; X64-SKX-NEXT: kxnorb %k0, %k0, %k2 +; X64-SKX-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; X64-SKX-NEXT: vpgatherqq (,%zmm0), %zmm3 {%k2} +; X64-SKX-NEXT: vpgatherqq (,%zmm1), %zmm2 {%k1} +; X64-SKX-NEXT: vmovdqa64 %zmm3, %zmm0 +; X64-SKX-NEXT: vmovdqa64 %zmm2, %zmm1 +; X64-SKX-NEXT: retq %res = call <16 x ptr> @llvm.masked.gather.v16p0.v16p0(<16 x ptr> %ptrs, i32 4, <16 x i1> , <16 x ptr> undef) ret <16 x ptr>%res } @@ -3253,17 +3275,17 @@ define <8 x i32> @test_global_array(<8 x i64> %indxs) { ; X64-KNL-NEXT: vmovdqa %ymm1, %ymm0 ; X64-KNL-NEXT: retq ; -; X86-LABEL: test_global_array: -; X86: # %bb.0: -; X86-NEXT: kxnorw %k0, %k0, %k1 -; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; X86-NEXT: vpgatherqd glob_array(,%zmm0,4), %ymm1 {%k1} -; X86-NEXT: vmovdqa %ymm1, %ymm0 -; X86-NEXT: retl +; X86-KNL-LABEL: test_global_array: +; X86-KNL: # %bb.0: +; X86-KNL-NEXT: kxnorw %k0, %k0, %k1 +; X86-KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; X86-KNL-NEXT: vpgatherqd glob_array(,%zmm0,4), %ymm1 {%k1} +; X86-KNL-NEXT: vmovdqa %ymm1, %ymm0 +; X86-KNL-NEXT: retl ; ; X64-SKX-SMALL-LABEL: test_global_array: ; X64-SKX-SMALL: # %bb.0: -; X64-SKX-SMALL-NEXT: kxnorw %k0, %k0, %k1 +; X64-SKX-SMALL-NEXT: kxnorb %k0, %k0, %k1 ; X64-SKX-SMALL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X64-SKX-SMALL-NEXT: vpgatherqd glob_array(,%zmm0,4), %ymm1 {%k1} ; X64-SKX-SMALL-NEXT: vmovdqa %ymm1, %ymm0 @@ -3272,11 +3294,19 @@ define <8 x i32> @test_global_array(<8 x i64> %indxs) { ; X64-SKX-LARGE-LABEL: test_global_array: ; X64-SKX-LARGE: # %bb.0: ; X64-SKX-LARGE-NEXT: movabsq $glob_array, %rax -; X64-SKX-LARGE-NEXT: kxnorw %k0, %k0, %k1 +; X64-SKX-LARGE-NEXT: kxnorb %k0, %k0, %k1 ; X64-SKX-LARGE-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X64-SKX-LARGE-NEXT: vpgatherqd (%rax,%zmm0,4), %ymm1 {%k1} ; X64-SKX-LARGE-NEXT: vmovdqa %ymm1, %ymm0 ; X64-SKX-LARGE-NEXT: retq +; +; X86-SKX-LABEL: test_global_array: +; X86-SKX: # %bb.0: +; X86-SKX-NEXT: kxnorb %k0, %k0, %k1 +; X86-SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; X86-SKX-NEXT: vpgatherqd glob_array(,%zmm0,4), %ymm1 {%k1} +; X86-SKX-NEXT: vmovdqa %ymm1, %ymm0 +; X86-SKX-NEXT: retl %p = getelementptr inbounds [16 x i32], ptr @glob_array, i64 0, <8 x i64> %indxs %g = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %p, i32 8, <8 x i1> , <8 x i32> undef) ret <8 x i32> %g @@ -3291,17 +3321,17 @@ define <8 x i32> @test_global_array_zeroinitializer_index(<8 x i64> %indxs) { ; X64-KNL-NEXT: vmovdqa %ymm1, %ymm0 ; X64-KNL-NEXT: retq ; -; X86-LABEL: test_global_array_zeroinitializer_index: -; X86: # %bb.0: -; X86-NEXT: kxnorw %k0, %k0, %k1 -; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; X86-NEXT: vpgatherqd glob_array(,%zmm0,4), %ymm1 {%k1} -; X86-NEXT: vmovdqa %ymm1, %ymm0 -; X86-NEXT: retl +; X86-KNL-LABEL: test_global_array_zeroinitializer_index: +; X86-KNL: # %bb.0: +; X86-KNL-NEXT: kxnorw %k0, %k0, %k1 +; X86-KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; X86-KNL-NEXT: vpgatherqd glob_array(,%zmm0,4), %ymm1 {%k1} +; X86-KNL-NEXT: vmovdqa %ymm1, %ymm0 +; X86-KNL-NEXT: retl ; ; X64-SKX-SMALL-LABEL: test_global_array_zeroinitializer_index: ; X64-SKX-SMALL: # %bb.0: -; X64-SKX-SMALL-NEXT: kxnorw %k0, %k0, %k1 +; X64-SKX-SMALL-NEXT: kxnorb %k0, %k0, %k1 ; X64-SKX-SMALL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X64-SKX-SMALL-NEXT: vpgatherqd glob_array(,%zmm0,4), %ymm1 {%k1} ; X64-SKX-SMALL-NEXT: vmovdqa %ymm1, %ymm0 @@ -3310,11 +3340,19 @@ define <8 x i32> @test_global_array_zeroinitializer_index(<8 x i64> %indxs) { ; X64-SKX-LARGE-LABEL: test_global_array_zeroinitializer_index: ; X64-SKX-LARGE: # %bb.0: ; X64-SKX-LARGE-NEXT: movabsq $glob_array, %rax -; X64-SKX-LARGE-NEXT: kxnorw %k0, %k0, %k1 +; X64-SKX-LARGE-NEXT: kxnorb %k0, %k0, %k1 ; X64-SKX-LARGE-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X64-SKX-LARGE-NEXT: vpgatherqd (%rax,%zmm0,4), %ymm1 {%k1} ; X64-SKX-LARGE-NEXT: vmovdqa %ymm1, %ymm0 ; X64-SKX-LARGE-NEXT: retq +; +; X86-SKX-LABEL: test_global_array_zeroinitializer_index: +; X86-SKX: # %bb.0: +; X86-SKX-NEXT: kxnorb %k0, %k0, %k1 +; X86-SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; X86-SKX-NEXT: vpgatherqd glob_array(,%zmm0,4), %ymm1 {%k1} +; X86-SKX-NEXT: vmovdqa %ymm1, %ymm0 +; X86-SKX-NEXT: retl %p = getelementptr inbounds [16 x i32], ptr @glob_array, <8 x i64> zeroinitializer, <8 x i64> %indxs %g = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %p, i32 8, <8 x i1> , <8 x i32> undef) ret <8 x i32> %g @@ -3545,7 +3583,7 @@ define <8 x float> @sext_v8i8_index(ptr %base, <8 x i8> %ind) { ; X64-SKX-LABEL: sext_v8i8_index: ; X64-SKX: # %bb.0: ; X64-SKX-NEXT: vpmovsxbd %xmm0, %ymm1 -; X64-SKX-NEXT: kxnorw %k0, %k0, %k1 +; X64-SKX-NEXT: kxnorb %k0, %k0, %k1 ; X64-SKX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; X64-SKX-NEXT: vgatherdps (%rdi,%ymm1,4), %ymm0 {%k1} ; X64-SKX-NEXT: retq @@ -3554,7 +3592,7 @@ define <8 x float> @sext_v8i8_index(ptr %base, <8 x i8> %ind) { ; X86-SKX: # %bb.0: ; X86-SKX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SKX-NEXT: vpmovsxbd %xmm0, %ymm1 -; X86-SKX-NEXT: kxnorw %k0, %k0, %k1 +; X86-SKX-NEXT: kxnorb %k0, %k0, %k1 ; X86-SKX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; X86-SKX-NEXT: vgatherdps (%eax,%ymm1,4), %ymm0 {%k1} ; X86-SKX-NEXT: retl @@ -3617,7 +3655,7 @@ define <8 x float> @zext_v8i8_index(ptr %base, <8 x i8> %ind) { ; X64-SKX-LABEL: zext_v8i8_index: ; X64-SKX: # %bb.0: ; X64-SKX-NEXT: vpmovzxbd {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero -; X64-SKX-NEXT: kxnorw %k0, %k0, %k1 +; X64-SKX-NEXT: kxnorb %k0, %k0, %k1 ; X64-SKX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; X64-SKX-NEXT: vgatherdps (%rdi,%ymm1,4), %ymm0 {%k1} ; X64-SKX-NEXT: retq @@ -3626,7 +3664,7 @@ define <8 x float> @zext_v8i8_index(ptr %base, <8 x i8> %ind) { ; X86-SKX: # %bb.0: ; X86-SKX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SKX-NEXT: vpmovzxbd {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero -; X86-SKX-NEXT: kxnorw %k0, %k0, %k1 +; X86-SKX-NEXT: kxnorb %k0, %k0, %k1 ; X86-SKX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; X86-SKX-NEXT: vgatherdps (%eax,%ymm1,4), %ymm0 {%k1} ; X86-SKX-NEXT: retl @@ -4793,19 +4831,19 @@ define <16 x i32> @pr163023_sext(ptr %a0, <16 x i32> %a1) { } define <16 x i32> @pr163023_zext(ptr %a0, <16 x i32> %a1) { -; X64-LABEL: pr163023_zext: -; X64: # %bb.0: -; X64-NEXT: vpmovzxdq {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero -; X64-NEXT: vextracti64x4 $1, %zmm0, %ymm0 -; X64-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero -; X64-NEXT: kxnorw %k0, %k0, %k1 -; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; X64-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; X64-NEXT: kxnorw %k0, %k0, %k2 -; X64-NEXT: vpgatherqd (%rdi,%zmm0), %ymm3 {%k2} -; X64-NEXT: vpgatherqd (%rdi,%zmm1), %ymm2 {%k1} -; X64-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm0 -; X64-NEXT: retq +; X64-KNL-LABEL: pr163023_zext: +; X64-KNL: # %bb.0: +; X64-KNL-NEXT: vpmovzxdq {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero +; X64-KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0 +; X64-KNL-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero +; X64-KNL-NEXT: kxnorw %k0, %k0, %k1 +; X64-KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; X64-KNL-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; X64-KNL-NEXT: kxnorw %k0, %k0, %k2 +; X64-KNL-NEXT: vpgatherqd (%rdi,%zmm0), %ymm3 {%k2} +; X64-KNL-NEXT: vpgatherqd (%rdi,%zmm1), %ymm2 {%k1} +; X64-KNL-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm0 +; X64-KNL-NEXT: retq ; ; X86-LABEL: pr163023_zext: ; X86: # %bb.0: @@ -4815,6 +4853,20 @@ define <16 x i32> @pr163023_zext(ptr %a0, <16 x i32> %a1) { ; X86-NEXT: vpgatherdd (%eax,%zmm0), %zmm1 {%k1} ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ; X86-NEXT: retl +; +; X64-SKX-LABEL: pr163023_zext: +; X64-SKX: # %bb.0: +; X64-SKX-NEXT: vpmovzxdq {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero +; X64-SKX-NEXT: vextracti64x4 $1, %zmm0, %ymm0 +; X64-SKX-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero +; X64-SKX-NEXT: kxnorb %k0, %k0, %k1 +; X64-SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; X64-SKX-NEXT: kxnorb %k0, %k0, %k2 +; X64-SKX-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; X64-SKX-NEXT: vpgatherqd (%rdi,%zmm0), %ymm3 {%k2} +; X64-SKX-NEXT: vpgatherqd (%rdi,%zmm1), %ymm2 {%k1} +; X64-SKX-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm0 +; X64-SKX-NEXT: retq %addr.p = ptrtoint ptr %a0 to i64 %addr.v = insertelement <1 x i64> poison, i64 %addr.p, i64 0 %addr.splat = shufflevector <1 x i64> %addr.v, <1 x i64> poison, <16 x i32> zeroinitializer @@ -4834,21 +4886,37 @@ define <16 x i32> @pr163023_zext(ptr %a0, <16 x i32> %a1) { %struct.foo = type { ptr, i64, i16, i16, i32 } define <8 x i64> @pr45906(<8 x ptr> %ptr) { -; X64-LABEL: pr45906: -; X64: # %bb.0: # %bb -; X64-NEXT: kxnorw %k0, %k0, %k1 -; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; X64-NEXT: vpgatherqq 8(,%zmm0), %zmm1 {%k1} -; X64-NEXT: vmovdqa64 %zmm1, %zmm0 -; X64-NEXT: retq +; X64-KNL-LABEL: pr45906: +; X64-KNL: # %bb.0: # %bb +; X64-KNL-NEXT: kxnorw %k0, %k0, %k1 +; X64-KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; X64-KNL-NEXT: vpgatherqq 8(,%zmm0), %zmm1 {%k1} +; X64-KNL-NEXT: vmovdqa64 %zmm1, %zmm0 +; X64-KNL-NEXT: retq ; -; X86-LABEL: pr45906: -; X86: # %bb.0: # %bb -; X86-NEXT: kxnorw %k0, %k0, %k1 -; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; X86-NEXT: vpgatherdq 4(,%ymm0), %zmm1 {%k1} -; X86-NEXT: vmovdqa64 %zmm1, %zmm0 -; X86-NEXT: retl +; X86-KNL-LABEL: pr45906: +; X86-KNL: # %bb.0: # %bb +; X86-KNL-NEXT: kxnorw %k0, %k0, %k1 +; X86-KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; X86-KNL-NEXT: vpgatherdq 4(,%ymm0), %zmm1 {%k1} +; X86-KNL-NEXT: vmovdqa64 %zmm1, %zmm0 +; X86-KNL-NEXT: retl +; +; X64-SKX-LABEL: pr45906: +; X64-SKX: # %bb.0: # %bb +; X64-SKX-NEXT: kxnorb %k0, %k0, %k1 +; X64-SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; X64-SKX-NEXT: vpgatherqq 8(,%zmm0), %zmm1 {%k1} +; X64-SKX-NEXT: vmovdqa64 %zmm1, %zmm0 +; X64-SKX-NEXT: retq +; +; X86-SKX-LABEL: pr45906: +; X86-SKX: # %bb.0: # %bb +; X86-SKX-NEXT: kxnorb %k0, %k0, %k1 +; X86-SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; X86-SKX-NEXT: vpgatherdq 4(,%ymm0), %zmm1 {%k1} +; X86-SKX-NEXT: vmovdqa64 %zmm1, %zmm0 +; X86-SKX-NEXT: retl bb: %tmp = getelementptr inbounds %struct.foo, <8 x ptr> %ptr, i64 0, i32 1 %tmp1 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %tmp, i32 8, <8 x i1> , <8 x i64> undef) diff --git a/llvm/test/CodeGen/X86/scatter-schedule.ll b/llvm/test/CodeGen/X86/scatter-schedule.ll index 762a050247a87..36bf31395d6d5 100644 --- a/llvm/test/CodeGen/X86/scatter-schedule.ll +++ b/llvm/test/CodeGen/X86/scatter-schedule.ll @@ -9,9 +9,9 @@ target triple = "x86_64-unknown-linux-gnu" define void @test(i64 %x272, <16 x ptr> %x335, <16 x i32> %x270) { ; CHECK-LABEL: test: ; CHECK: # %bb.0: -; CHECK-NEXT: kxnorw %k0, %k0, %k1 +; CHECK-NEXT: kxnorb %k0, %k0, %k1 ; CHECK-NEXT: vpscatterqd %ymm2, (,%zmm0) {%k1} -; CHECK-NEXT: kxnorw %k0, %k0, %k1 +; CHECK-NEXT: kxnorb %k0, %k0, %k1 ; CHECK-NEXT: vextracti64x4 $1, %zmm2, %ymm0 ; CHECK-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1} ; CHECK-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/vector-replicaton-i1-mask.ll b/llvm/test/CodeGen/X86/vector-replicaton-i1-mask.ll index a768baae97add..466fa6ba098b3 100644 --- a/llvm/test/CodeGen/X86/vector-replicaton-i1-mask.ll +++ b/llvm/test/CodeGen/X86/vector-replicaton-i1-mask.ll @@ -5890,17 +5890,16 @@ define void @mask_replication_factor6_vf4(ptr %in.maskvec, ptr %in.vec, ptr %out ; AVX512DQ-SLOW: # %bb.0: ; AVX512DQ-SLOW-NEXT: kmovw (%rdi), %k0 ; AVX512DQ-SLOW-NEXT: vpmovm2d %k0, %zmm0 -; AVX512DQ-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,2,3,3] -; AVX512DQ-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,1,1,1] -; AVX512DQ-SLOW-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512DQ-SLOW-NEXT: movw $255, %ax -; AVX512DQ-SLOW-NEXT: kmovw %eax, %k1 -; AVX512DQ-SLOW-NEXT: vpcmpgtd %zmm1, %zmm2, %k1 {%k1} ; AVX512DQ-SLOW-NEXT: vpmovsxbd {{.*#+}} zmm1 = [0,0,0,0,0,0,1,1,1,1,1,1,2,2,2,2] -; AVX512DQ-SLOW-NEXT: vpermd %zmm0, %zmm1, %zmm0 -; AVX512DQ-SLOW-NEXT: vpmovd2m %zmm0, %k2 -; AVX512DQ-SLOW-NEXT: vmovdqa32 64(%rsi), %zmm0 {%k1} {z} -; AVX512DQ-SLOW-NEXT: vmovdqa32 (%rsi), %zmm1 {%k2} {z} +; AVX512DQ-SLOW-NEXT: vpermd %zmm0, %zmm1, %zmm1 +; AVX512DQ-SLOW-NEXT: vpmovd2m %zmm1, %k1 +; AVX512DQ-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] +; AVX512DQ-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,1] +; AVX512DQ-SLOW-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512DQ-SLOW-NEXT: kxnorb %k0, %k0, %k2 +; AVX512DQ-SLOW-NEXT: vpcmpgtd %zmm0, %zmm1, %k2 {%k2} +; AVX512DQ-SLOW-NEXT: vmovdqa32 64(%rsi), %zmm0 {%k2} {z} +; AVX512DQ-SLOW-NEXT: vmovdqa32 (%rsi), %zmm1 {%k1} {z} ; AVX512DQ-SLOW-NEXT: vmovdqa64 %zmm1, (%rdx) ; AVX512DQ-SLOW-NEXT: vmovdqa %ymm0, 64(%rdx) ; AVX512DQ-SLOW-NEXT: vzeroupper @@ -5910,17 +5909,16 @@ define void @mask_replication_factor6_vf4(ptr %in.maskvec, ptr %in.vec, ptr %out ; AVX512DQ-FAST: # %bb.0: ; AVX512DQ-FAST-NEXT: kmovw (%rdi), %k0 ; AVX512DQ-FAST-NEXT: vpmovm2d %k0, %zmm0 -; AVX512DQ-FAST-NEXT: vpmovsxbd {{.*#+}} ymm1 = [2,2,3,3,3,3,3,3] -; AVX512DQ-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm1 -; AVX512DQ-FAST-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512DQ-FAST-NEXT: movw $255, %ax -; AVX512DQ-FAST-NEXT: kmovw %eax, %k1 -; AVX512DQ-FAST-NEXT: vpcmpgtd %zmm1, %zmm2, %k1 {%k1} ; AVX512DQ-FAST-NEXT: vpmovsxbd {{.*#+}} zmm1 = [0,0,0,0,0,0,1,1,1,1,1,1,2,2,2,2] -; AVX512DQ-FAST-NEXT: vpermd %zmm0, %zmm1, %zmm0 -; AVX512DQ-FAST-NEXT: vpmovd2m %zmm0, %k2 -; AVX512DQ-FAST-NEXT: vmovdqa32 64(%rsi), %zmm0 {%k1} {z} -; AVX512DQ-FAST-NEXT: vmovdqa32 (%rsi), %zmm1 {%k2} {z} +; AVX512DQ-FAST-NEXT: vpermd %zmm0, %zmm1, %zmm1 +; AVX512DQ-FAST-NEXT: vpmovd2m %zmm1, %k1 +; AVX512DQ-FAST-NEXT: vpmovsxbd {{.*#+}} ymm1 = [2,2,3,3,3,3,3,3] +; AVX512DQ-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 +; AVX512DQ-FAST-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512DQ-FAST-NEXT: kxnorb %k0, %k0, %k2 +; AVX512DQ-FAST-NEXT: vpcmpgtd %zmm0, %zmm1, %k2 {%k2} +; AVX512DQ-FAST-NEXT: vmovdqa32 64(%rsi), %zmm0 {%k2} {z} +; AVX512DQ-FAST-NEXT: vmovdqa32 (%rsi), %zmm1 {%k1} {z} ; AVX512DQ-FAST-NEXT: vmovdqa64 %zmm1, (%rdx) ; AVX512DQ-FAST-NEXT: vmovdqa %ymm0, 64(%rdx) ; AVX512DQ-FAST-NEXT: vzeroupper diff --git a/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_ehframe.s b/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_ehframe.s new file mode 100644 index 0000000000000..fca8345ff207c --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_ehframe.s @@ -0,0 +1,58 @@ +# REQUIRES: asserts +# REQUIRES: system-linux +# RUN: llvm-mc -triple=systemz-unknown-linux-gnu -filetype=obj -o %t %s +# RUN: llvm-jitlink -noexec -phony-externals -debug-only=jitlink %t 2>&1 | \ +# RUN: FileCheck %s +# +# Check that splitting of eh-frame sections works. +# +# CHECK: DWARFRecordSectionSplitter: Processing .eh_frame... +# CHECK: Processing block at +# CHECK: Processing CFI record at +# CHECK: Processing CFI record at +# CHECK: EHFrameEdgeFixer: Processing .eh_frame in "{{.*}}"... +# CHECK: Processing block at +# CHECK: Record is CIE +# CHECK: Processing block at +# CHECK: Record is FDE +# CHECK: Adding edge at {{.*}} to CIE at: {{.*}} +# CHECK: Processing PC-begin at +# CHECK: Existing edge at {{.*}} to PC begin at {{.*}} +# CHECK: Adding keep-alive edge from target at {{.*}} to FDE at {{.*}} + + .text + .file "exceptions.cpp" + # Start of file scope inline assembly + .globl _ZSt21ios_base_library_initv + + # End of file scope inline assembly + .globl main # -- Begin function main + .p2align 4 + .type main,@function +main: # @main + .cfi_startproc +# %bb.0: # %entry + stmg %r11, %r15, 88(%r15) + .cfi_offset %r11, -72 + .cfi_offset %r14, -48 + .cfi_offset %r15, -40 + aghi %r15, -168 + .cfi_def_cfa_offset 328 + lgr %r11, %r15 + .cfi_def_cfa_register %r11 + mvhi 164(%r11), 0 + lghi %r2, 4 + brasl %r14, __cxa_allocate_exception@PLT + mvhi 0(%r2), 1 + lgrl %r3, _ZTIi@GOT + lghi %r4, 0 + brasl %r14, __cxa_throw@PLT +.Lfunc_end0: + .size main, .Lfunc_end0-main + .cfi_endproc + # -- End function + .section ".note.GNU-stack","",@progbits + .addrsig + .addrsig_sym __cxa_allocate_exception + .addrsig_sym __cxa_throw + .addrsig_sym _ZTIi diff --git a/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_reloc_abs16.s b/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_reloc_abs16.s new file mode 100644 index 0000000000000..04e828685c040 --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_reloc_abs16.s @@ -0,0 +1,35 @@ +# REQUIRES: system-linux +# RUN: llvm-mc -triple=systemz-unknown-linux -position-independent \ +# RUN: -filetype=obj -o %t.o %s +# RUN: llvm-jitlink -noexec -abs X=0xFFFF -check=%s %t.o + +# RUN: not llvm-jitlink -noexec -abs X=0x10000 %t.o 2>&1 | \ +# RUN: FileCheck -check-prefix=CHECK-ERROR %s +# +# Check success and failure cases of R_390_16 handling. + +# jitlink-check: *{8}P = X + +# CHECK-ERROR: relocation target {{.*}} (X) is out of range of Pointer16 fixup + + .text + .section .text.main + .globl main + .p2align 4 + .type main,@function +main: + br %r14 +.Lfunc_end0: + .size main, .Lfunc_end0-main + + .type P,@object + .data + .globl P + .p2align 1 +P: + .short 0 + .short 0 + .short 0 + .short X # Using byte here generates R_390_16. + .size P, 8 + diff --git a/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_reloc_abs32.s b/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_reloc_abs32.s new file mode 100644 index 0000000000000..1a63acdb63d57 --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_reloc_abs32.s @@ -0,0 +1,32 @@ +# REQUIRES: system-linux +# RUN: llvm-mc -triple=systemz-unknown-linux -position-independent \ +# RUN: -filetype=obj -o %t.o %s +# RUN: llvm-jitlink -noexec -abs X=0x12345678 -check=%s %t.o +# +# RUN: not llvm-jitlink -noexec -abs X=0x123456789 %t.o 2>&1 | \ +# RUN: FileCheck -check-prefix=CHECK-ERROR %s +# +# Check success and failure cases of R_390_32 handling. + +# jitlink-check: *{8}P = X + +# CHECK-ERROR: relocation target {{.*}} (X) is out of range of Pointer32 fixup + + .text + .section .text.main + .globl main + .p2align 4 + .type main,@function +main: + br %r14 +.Lfunc_end0: + .size main, .Lfunc_end0-main + + .type P,@object + .data + .globl P + .p2align 2 +P: + .long 0 + .long X # Using long here generates R_390_32. + .size P, 8 diff --git a/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_reloc_abs64.s b/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_reloc_abs64.s new file mode 100644 index 0000000000000..63d2a1a539aeb --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_reloc_abs64.s @@ -0,0 +1,28 @@ +# REQUIRES: system-linux +# RUN: llvm-mc -triple=systemz-unknown-linux -position-independent \ +# RUN: -filetype=obj -o %t.o %s +# RUN: llvm-jitlink -noexec -abs X=0xffffffffffffffff -check=%s %t.o +# +# Check success and failure cases of R_390_64 handling. + +# jitlink-check: *{8}P = X + +# CHECK-ERROR: relocation target "X" {{.*}} is out of range of Pointer64 fixup + + .text + .section .text.main + .globl main + .p2align 4 + .type main,@function +main: + br %r14 +.Lfunc_end0: + .size main, .Lfunc_end0-main + + .type P,@object + .data + .globl P + .p2align 4 +P: + .quad X # Using quad here generates R_390_64. + .size P, 8 diff --git a/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_reloc_abs8.s b/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_reloc_abs8.s new file mode 100644 index 0000000000000..5f23f289140a6 --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_reloc_abs8.s @@ -0,0 +1,38 @@ +# REQUIRES: system-linux +# RUN: llvm-mc -triple=systemz-unknown-linux -position-independent \ +# RUN: -filetype=obj -o %t.o %s +# RUN: llvm-jitlink -noexec -abs X=0xFF -check=%s %t.o + +# RUN: not llvm-jitlink -noexec -abs X=0x100 %t.o 2>&1 | \ +# RUN: FileCheck -check-prefix=CHECK-ERROR %s +# +# Check success and failure cases of R_390_8 handling. + +# jitlink-check: *{8}P = X + +# CHECK-ERROR: relocation target {{.*}} (X) is out of range of Pointer8 fixup + + .text + .section .text.main + .globl main + .p2align 4 + .type main,@function +main: + br %r14 +.Lfunc_end0: + .size main, .Lfunc_end0-main + + .type P,@object + .data + .globl P +P: + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte X # Using byte here generates R_390_8. + .size P, 8 + diff --git a/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_reloc_call_pic.s b/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_reloc_call_pic.s new file mode 100644 index 0000000000000..e38a2f39b7c22 --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_reloc_call_pic.s @@ -0,0 +1,96 @@ +# REQUIRES: system-linux +# RUN: rm -rf %t && mkdir -p %t +# RUN: llvm-mc -triple=systemz-unknown-linux -position-independent \ +# RUN: -filetype=obj -o %t/elf_pic_reloc.o %s +# +# RUN: llvm-jitlink -noexec \ +# RUN: -slab-allocate 100Kb -slab-address 0xfff00000 -slab-page-size 4096 \ +# RUN: -abs external_data=0x1 \ +# RUN: -abs extern_out_of_range32=0x7fff00000000 \ +# RUN: -abs extern_in_range32=0xffe00000 \ +# RUN: -check %s %t/elf_pic_reloc.o +# XFAIL: * + + .text + .section .text.main + .globl main + .p2align 4 + .type main,@function +main: + br %r14 + .size main, .-main + + .globl named_func + .p2align 4 + .type named_func,@function +named_func: + br %r14 + .size named_func, .-named_func + +# Check R_390_PC32DBL handling with a call to a local function in the text +# section. This produces a Delta32dbl edge that is resolved like a regular +# direct relative branches(no PLT entry created). +# +# jitlink-check: decode_operand(test_call_local, 1) = \ +# jitlink-check: named_func - test_call_local + .globl test_call_local + .p2align 4 + .type test_call_local,@function +test_call_local: + brasl %r14, named_func@PLT + + .size test_call_local, .-test_call_local + +# Check R_390_PLT32dbl(DeltaPLT32dbl) handling with a call to an +# external via PLT. This produces a Delta32dbl edge, because externals are +# not defined locally. As the target is out-of-range from the callsite, +# the edge keeps using its PLT entry. +# +# jitlink-check: decode_operand(test_call_extern_plt, 1) = \ +# jitlink-check: stub_addr(elf_pic_reloc.o, extern_out_of_range32) - \ +# jitlink-check: test_call_extern_plt +# jitlink-check: *{8}(got_addr(elf_pic_reloc.o, extern_out_of_range32)) = \ +# jitlink-check: extern_out_of_range32 + .globl test_call_extern_plt + .p2align 4 + .type test_call_extern_plt,@function +test_call_extern_plt: + brasl %r14, extern_out_of_range32@plt + + .size test_call_extern_plt, .-test_call_extern_plt + +# Check PLT stub relocation for lgrl(Delta32dbl). +# +# jitlink-check: *{4}(stub_addr(elf_pic_reloc.o, extern_out_of_range32) + 2) = \ +# jitlink-check: (got_addr(elf_pic_reloc.o, extern_out_of_range32) - \ +# jitlink-check: stub_addr(elf_pic_reloc.o, extern_out_of_range32)) >> 1 + .globl test_call_extern_plt_stub + .p2align 4 + .type test_call_extern_plt_stub,@function +test_call_extern_plt_stub: + brasl %r14, extern_out_of_range32@plt + + .size test_call_extern_plt_stub, .-test_call_extern_plt_stub + +# Check R_390_PLT32(DeltaPLT32dbl) handling with a call to an external. +# This produces a Delta32dbl edge, because externals are not defined +# locally. During resolution, the target turns out to be in-range from the +# callsite. +### TODO: edge can be relaxed in post-allocation optimization, it will then +### require: +### jitlink-check: decode_operand(test_call_extern, 1) = \ +### jitlink-check: extern_in_range32 - test_call_extern +# +# Same as test_call_extern_plt(no-optimization) +# jitlink-check: decode_operand(test_call_extern, 1) = \ +# jitlink-check: stub_addr(elf_pic_reloc.o, extern_in_range32) - \ +# jitlink-check: test_call_extern +# jitlink-check: *{8}(got_addr(elf_pic_reloc.o, extern_in_range32)) = \ +# jitlink-check: extern_in_range32 + .globl test_call_extern + .p2align 4 + .type test_call_extern,@function +test_call_extern: + brasl %r14, extern_in_range32@plt + .size test_call_extern, .-test_call_extern + diff --git a/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_reloc_disp12.s b/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_reloc_disp12.s new file mode 100644 index 0000000000000..cf12cdc987ce3 --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_reloc_disp12.s @@ -0,0 +1,28 @@ +# REQUIRES: system-linux + +# RUN: llvm-mc -triple=systemz-unknown-linux -position-independent \ +# RUN: -filetype=obj -o %t.o %s +# +# RUN: llvm-jitlink -noexec -abs DISP=0xFFF -check=%s %t.o + +# RUN: not llvm-jitlink -noexec -abs DISP=0x1000 %t.o 2>&1 | \ +# RUN: FileCheck -check-prefix=CHECK-ERROR %s +# +# Check success and failure cases of R_390_12 handling. + +# CHECK-ERROR: relocation target {{.*}} (DISP) is out of range of +# CHECK-ERROR: Pointer12 fixup + +# jitlink-check: decode_operand(main, 2) = DISP + .text + .section .text.main + .globl main + .p2align 4 + .type main,@function +main: + .reloc .+2, R_390_12, DISP + l %r6, 0(%r7,%r8) + br %r14 +.Lfunc_end0: + .size main, .Lfunc_end0-main + diff --git a/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_reloc_disp20.s b/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_reloc_disp20.s new file mode 100644 index 0000000000000..5c7de535cf8b4 --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_reloc_disp20.s @@ -0,0 +1,31 @@ +# REQUIRES: system-linux + +# RUN: llvm-mc -triple=systemz-unknown-linux -position-independent \ +# RUN: -filetype=obj -o %t.o %s +# +# RUN: llvm-jitlink -noexec -abs DISP=0x7FFFF -check=%s %t.o + +# RUN: not llvm-jitlink -noexec -abs DISP=0x80000 %t.o 2>&1 | \ +# RUN: FileCheck -check-prefix=CHECK-ERROR %s + +# RUN: not llvm-jitlink -noexec -abs DISP=0xFFFFF %t.o 2>&1 | \ +# RUN: FileCheck -check-prefix=CHECK-ERROR %s +# +# Check success and failure cases of R_390_20 handling. + +# CHECK-ERROR: relocation target {{.*}} (DISP) is out of range of +# CHECK-ERROR: Pointer20 fixup + +# jitlink-check: decode_operand(main, 2) = DISP + .text + .section .text.main + .globl main + .p2align 4 + .type main,@function +main: + .reloc .+2, R_390_20, DISP + lg %r6, 0(%r7,%r8) + br %r14 +.Lfunc_end0: + .size main, .Lfunc_end0-main + diff --git a/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_reloc_got.s b/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_reloc_got.s new file mode 100644 index 0000000000000..7da48cfa704e2 --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_reloc_got.s @@ -0,0 +1,244 @@ +# REQUIRES: system-linux +# RUN: rm -rf %t && mkdir -p %t +# RUN: llvm-mc -triple=systemz-unknown-linux -position-independent \ +# RUN: -filetype=obj -o %t/elf_reloc.o %s +# +# RUN: llvm-jitlink -noexec \ +# RUN: -slab-allocate 100Kb -slab-address 0x6ff00000 -slab-page-size 4096 \ +# RUN: -abs foo=0x6ff04040 \ +# RUN: -abs bar=0x6ff04048 \ +# RUN: %t/elf_reloc.o -check %s + +# Verifying GOT related relocations. + + .text + .globl main + .type main,@function +main: +# jitlink-check: decode_operand(main, 1) = _GLOBAL_OFFSET_TABLE_ - main + larl %r12, _GLOBAL_OFFSET_TABLE_ + .globl test_gotent_foo +test_gotent_foo: +# jitlink-check: decode_operand(test_gotent_foo, 1) = \ +# jitlink-check: (got_addr(elf_reloc.o, foo) - test_gotent_foo) + .reloc .+2, R_390_GOTENT, foo+2 + larl %r1, 0 + .size test_gotent_foo, .-test_gotent_foo + + .globl test_gotent_bar +test_gotent_bar: +# jitlink-check: decode_operand(test_gotent_bar, 1) = \ +# jitlink-check: (got_addr(elf_reloc.o, bar) - test_gotent_bar) + .reloc .+2, R_390_GOTENT, bar+2 + larl %r1, 0 + .size test_gotent_bar, .-test_gotent_bar + + .globl test_gotpltent_foo +test_gotpltent_foo: +# jitlink-check: decode_operand(test_gotpltent_foo, 1) = \ +# jitlink-check: (got_addr(elf_reloc.o, foo) - test_gotpltent_foo) + .reloc .+2, R_390_GOTPLTENT, foo+2 + larl %r1, 0 + .size test_gotpltent_foo, .-test_gotpltent_foo + + .globl test_gotpltent_bar +test_gotpltent_bar: +# jitlink-check: decode_operand(test_gotpltent_bar, 1) = \ +# jitlink-check: (got_addr(elf_reloc.o, bar) - test_gotpltent_bar) + .reloc .+2, R_390_GOTPLTENT, bar+2 + larl %r1, 0 + .size test_gotpltent_bar, .-test_gotpltent_bar + + .globl test_got12_foo +test_got12_foo: +# jitlink-check: decode_operand(test_got12_foo, 2) = \ +# jitlink-check: (got_addr(elf_reloc.o, foo) - _GLOBAL_OFFSET_TABLE_) + .reloc .+2, R_390_GOT12, foo + l %r1, 0(%r12) + .size test_got12_foo, .-test_got12_foo + + .globl test_got12_bar +test_got12_bar: +# jitlink-check: decode_operand(test_got12_bar, 2) = \ +# jitlink-check: (got_addr(elf_reloc.o, bar) - _GLOBAL_OFFSET_TABLE_) + .reloc .+2, R_390_GOT12, bar + l %r1, 0(%r12) + .size test_got12_bar, .-test_got12_bar + + .globl test_gotplt12_foo +test_gotplt12_foo: +# jitlink-check: decode_operand(test_gotplt12_foo, 2) = \ +# jitlink-check: (got_addr(elf_reloc.o, foo) - _GLOBAL_OFFSET_TABLE_) + .reloc .+2, R_390_GOTPLT12, foo + l %r1, 0(%r12) + .size test_gotplt12_foo, .-test_gotplt12_foo + + .globl test_gotplt12_bar +test_gotplt12_bar: +# jitlink-check: decode_operand(test_gotplt12_bar, 2) = \ +# jitlink-check: (got_addr(elf_reloc.o, bar) - _GLOBAL_OFFSET_TABLE_) + .reloc .+2, R_390_GOTPLT12, bar + l %r1, 0(%r12) + .size test_gotplt12_bar, .-test_gotplt12_bar + + .globl test_got20_foo +test_got20_foo: +# jitlink-check: decode_operand(test_got20_foo, 2) = \ +# jitlink-check: (got_addr(elf_reloc.o, foo) - _GLOBAL_OFFSET_TABLE_) + .reloc .+2, R_390_GOT20, foo + lg %r1, 0(%r12) + .size test_got20_foo, .-test_got20_foo + + .globl test_got20_bar +test_got20_bar: +# jitlink-check: decode_operand(test_got20_bar, 2) = \ +# jitlink-check: (got_addr(elf_reloc.o, bar) - _GLOBAL_OFFSET_TABLE_) + .reloc .+2, R_390_GOT20, bar + lg %r1, 0(%r12) + .size test_got20_bar, .-test_got20_bar + + .globl test_gotplt20_foo +test_gotplt20_foo: +# jitlink-check: decode_operand(test_gotplt20_foo, 2) = \ +# jitlink-check: (got_addr(elf_reloc.o, foo) - _GLOBAL_OFFSET_TABLE_) + .reloc .+2, R_390_GOTPLT20, foo + lg %r1, 0(%r12) + .size test_gotplt20_foo, .-test_gotplt20_foo + + .globl test_gotplt20_bar +test_gotplt20_bar: +# jitlink-check: decode_operand(test_gotplt20_bar, 2) = \ +# jitlink-check: (got_addr(elf_reloc.o, bar) - _GLOBAL_OFFSET_TABLE_) + .reloc .+2, R_390_GOTPLT20, bar + lg %r1, 0(%r12) + .size test_gotplt20_bar, .-test_gotplt20_bar + br %r14 + .size main, .-main + + .data + .globl test_got16_foo +# jitlink-check: *{2}test_got16_foo = \ +# jitlink-check: (got_addr(elf_reloc.o, foo) - _GLOBAL_OFFSET_TABLE_) +test_got16_foo: + .reloc ., R_390_GOT16, foo + .space 2 + .size test_got16_foo, .-test_got16_foo + + .globl test_got16_bar +# jitlink-check: *{2}test_got16_bar = \ +# jitlink-check: (got_addr(elf_reloc.o, bar) - _GLOBAL_OFFSET_TABLE_) +test_got16_bar: + .reloc ., R_390_GOT16, bar + .space 2 + .size test_got16_bar, .-test_got16_bar + + .globl test_gotplt16_foo +# jitlink-check: *{2}test_gotplt16_foo = \ +# jitlink-check: (got_addr(elf_reloc.o, foo) - _GLOBAL_OFFSET_TABLE_) +test_gotplt16_foo: + .reloc ., R_390_GOTPLT16, foo + .space 2 + .size test_gotplt16_foo, .-test_gotplt16_foo + + .globl test_gotplt16_bar +# jitlink-check: *{2}test_gotplt16_bar = \ +# jitlink-check: (got_addr(elf_reloc.o, bar) - _GLOBAL_OFFSET_TABLE_) +test_gotplt16_bar: + .reloc ., R_390_GOTPLT16, bar + .space 2 + .size test_gotplt16_bar, .-test_gotplt16_bar + + .globl test_got32_foo +# jitlink-check: *{4}test_got32_foo = \ +# jitlink-check: (got_addr(elf_reloc.o, foo) - _GLOBAL_OFFSET_TABLE_) +test_got32_foo: + .reloc ., R_390_GOT32, foo + .space 4 + .size test_got32_foo, .-test_got32_foo + + .globl test_got32_bar +# jitlink-check: *{4}test_got32_bar = \ +# jitlink-check: (got_addr(elf_reloc.o, bar) - _GLOBAL_OFFSET_TABLE_) +test_got32_bar: + .reloc ., R_390_GOT32, bar + .space 4 + .size test_got32_bar, .-test_got32_bar + + .globl test_gotplt32_foo +# jitlink-check: *{4}test_gotplt32_foo = \ +# jitlink-check: (got_addr(elf_reloc.o, foo) - _GLOBAL_OFFSET_TABLE_) +test_gotplt32_foo: + .reloc ., R_390_GOTPLT32, foo + .space 4 + .size test_gotplt32_foo, .-test_gotplt32_foo + + .globl test_gotplt32_bar +# jitlink-check: *{4}test_gotplt32_bar = \ +# jitlink-check: (got_addr(elf_reloc.o, bar) - _GLOBAL_OFFSET_TABLE_) +test_gotplt32_bar: + .reloc ., R_390_GOTPLT32, bar + .space 4 + .size test_gotplt32_bar, .-test_gotplt32_bar + + .globl test_got64_foo +# jitlink-check: *{8}test_got64_foo = \ +# jitlink-check: (got_addr(elf_reloc.o, foo) - _GLOBAL_OFFSET_TABLE_) +test_got64_foo: + .reloc ., R_390_GOT64, foo + .space 8 + .size test_got64_foo, .-test_got64_foo + + .globl test_got64_bar +# jitlink-check: *{8}test_got64_bar = \ +# jitlink-check: (got_addr(elf_reloc.o, bar) - _GLOBAL_OFFSET_TABLE_) +test_got64_bar: + .reloc ., R_390_GOT64, bar + .space 8 + .size test_got64_bar, .-test_got64_bar + + .globl test_gotplt64_foo +# jitlink-check: *{8}test_gotplt64_foo = \ +# jitlink-check: (got_addr(elf_reloc.o, foo) - _GLOBAL_OFFSET_TABLE_) +test_gotplt64_foo: + .reloc ., R_390_GOTPLT64, foo + .space 8 + .size test_gotplt64_foo, .-test_gotplt64_foo + + .globl test_gotplt64_bar +# jitlink-check: *{8}test_gotplt64_bar = \ +# jitlink-check: (got_addr(elf_reloc.o, bar) - _GLOBAL_OFFSET_TABLE_) +test_gotplt64_bar: + .reloc ., R_390_GOTPLT64, bar + .space 8 + .size test_gotplt64_bar, .-test_gotplt64_bar + + .globl test_gotpc_foo +# jitlink-check: *{4}test_gotpc_foo = _GLOBAL_OFFSET_TABLE_ - test_gotpc_foo +test_gotpc_foo: + .reloc ., R_390_GOTPC, foo + .space 4 + .size test_gotpc_foo, .-test_gotpc_foo + + .globl test_gotpc_bar +# jitlink-check: *{4}test_gotpc_bar = _GLOBAL_OFFSET_TABLE_ - test_gotpc_bar +test_gotpc_bar: + .reloc ., R_390_GOTPC, bar + .space 4 + .size test_gotpc_bar, .-test_gotpc_bar + + .globl test_gotpcdbl_foo +# jitlink-check: *{4}test_gotpcdbl_foo = \ +# jitlink-check: (_GLOBAL_OFFSET_TABLE_ - test_gotpcdbl_foo) >> 1 +test_gotpcdbl_foo: + .reloc ., R_390_GOTPCDBL, foo + .space 4 + .size test_gotpcdbl_foo, .-test_gotpcdbl_foo + + .globl test_gotpcdbl_bar +# jitlink-check: *{4}test_gotpcdbl_bar = \ +# jitlink-check: (_GLOBAL_OFFSET_TABLE_ - test_gotpcdbl_bar) >> 1 +test_gotpcdbl_bar: + .reloc ., R_390_GOTPCDBL, bar + .space 4 + .size test_gotpcdbl_bar, .-test_gotpcdbl_bar + diff --git a/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_reloc_gotrel.s b/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_reloc_gotrel.s new file mode 100644 index 0000000000000..af37b3f75ca42 --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_reloc_gotrel.s @@ -0,0 +1,68 @@ +# REQUIRES: system-linux +# RUN: rm -rf %t && mkdir -p %t +# RUN: llvm-mc -triple=systemz-unknown-linux -position-independent \ +# RUN: -filetype=obj -o %t/elf_reloc.o %s +# +# RUN: llvm-jitlink -noexec \ +# RUN: -slab-allocate 100Kb -slab-address 0x6ff00000 -slab-page-size 4096 \ +# RUN: -abs foo=0x6ff04080 \ +# RUN: -abs bar=0x6ff04040 \ +# RUN: %t/elf_reloc.o -check %s + + .text + .globl main + .type main,@function +main: + br %r14 + .size main, .-main + + .data + .globl test_gotoff16_bar +# jitlink-check: *{2}test_gotoff16_bar = (bar - _GLOBAL_OFFSET_TABLE_) & 0xffff +test_gotoff16_bar: + .reloc ., R_390_GOTOFF16, bar + .space 2 + .size test_gotoff16_bar, .-test_gotoff16_bar + + .globl test_pltoff16_foo +# jitlink-check: *{2}test_pltoff16_foo = \ +# jitlink-check: (stub_addr(elf_reloc.o, foo) - _GLOBAL_OFFSET_TABLE_) \ +# jitlink-check: & 0xffff +test_pltoff16_foo: + .reloc ., R_390_PLTOFF16, foo + .space 2 + .size test_pltoff16_foo, .-test_pltoff16_foo + + + .globl test_gotoff32_bar +# jitlink-check: *{4}test_gotoff32_bar = (bar - _GLOBAL_OFFSET_TABLE_) \ +# jitlink-check: & 0xffffffff +test_gotoff32_bar: + .reloc ., R_390_GOTOFF, bar + .space 4 + .size test_gotoff32_bar, .-test_gotoff32_bar + + .globl test_pltoff32_foo +# jitlink-check: *{4}test_pltoff32_foo = \ +# jitlink-check: (stub_addr(elf_reloc.o, foo) - _GLOBAL_OFFSET_TABLE_) \ +# jitlink-check: & 0xffffffff +test_pltoff32_foo: + .reloc ., R_390_PLTOFF32, foo + .space 4 + .size test_pltoff32_foo, .-test_pltoff32_foo + + .globl test_gotoff64_bar +# jitlink-check: *{8}test_gotoff64_bar = bar - _GLOBAL_OFFSET_TABLE_ +test_gotoff64_bar: + .reloc ., R_390_GOTOFF64, bar + .space 8 + .size test_gotoff64_bar, .-test_gotoff64_bar + + .globl test_pltoff64_foo +# jitlink-check: *{8}test_pltoff64_foo = \ +# jitlink-check: (stub_addr(elf_reloc.o, foo) - _GLOBAL_OFFSET_TABLE_) +test_pltoff64_foo: + .reloc ., R_390_PLTOFF64, foo + .space 8 + .size test_pltoff64_foo, .-test_pltoff64_foo + diff --git a/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_reloc_pc.s b/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_reloc_pc.s new file mode 100644 index 0000000000000..4b3a65e53ab93 --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_reloc_pc.s @@ -0,0 +1,20 @@ +# REQUIRES: system-linux +# RUN: llvm-mc -triple=systemz-unknown-linux -position-independent \ +# RUN: -filetype=obj -o %t.o %s +# +# RUN: llvm-jitlink -noexec %t.o +# +# Check R_390_PC* handling. + + .text + .globl main + .type main,@function +main: + br %r14 + .size main, .-main + + .rodata + .short main-. # Generate R_390_PC16 relocation. + .long main-. # Generate R_390_PC32 relocation. + .quad main-. # Generate R_390_PC64 relocation. + diff --git a/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_reloc_pc16.s b/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_reloc_pc16.s new file mode 100644 index 0000000000000..0da54b2a58972 --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_reloc_pc16.s @@ -0,0 +1,41 @@ +# REQUIRES: system-linux +# RUN: llvm-mc -triple=systemz-unknown-linux -position-independent \ +# RUN: -defsym OFFSET=0x8000 -filetype=obj -o %t.o %s +# RUN: llvm-jitlink -noexec -abs OFFSET=0x8000 -check=%s %t.o +# +# RUN: llvm-mc -triple=systemz-unknown-linux -position-independent \ +# RUN: -defsym OFFSET=0xFFFF -filetype=obj -o %t.o %s +# RUN: not llvm-jitlink -noexec -abs OFFSET=0xFFFF %t.o 2>&1 | \ +# RUN: FileCheck -check-prefix=CHECK-ERROR %s +# +# RUN: llvm-mc -triple=systemz-unknown-linux -position-independent \ +# RUN: -defsym OFFSET=0x8001 -filetype=obj -o %t.o %s +# RUN: not llvm-jitlink -noexec -abs OFFSET=0x8001 %t.o 2>&1 | \ +# RUN: FileCheck -check-prefix=CHECK-ERROR %s +# +# jitlink-check: *{2}test_pc16 = OFFSET +# jitlink-check: *{2}test_pc16dbl = OFFSET + +# CHECK-ERROR: {{.*}} is out of range of Delta16 fixup + + .text + .section .text.main + .globl main + .p2align 4 + .type main,@function +main: + br %r14 + .size main, .-main + + .globl test_pc16 +test_pc16: + .reloc test_pc16, R_390_PC16, .-OFFSET + .space 2 + .size test_pc16, .-test_pc16 + + .globl test_pc16dbl +test_pc16dbl: + .reloc test_pc16dbl, R_390_PC16DBL, .-(OFFSET + OFFSET) + .space 2 + .size test_pc16dbl, .-test_pc16dbl + diff --git a/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_reloc_pc32.s b/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_reloc_pc32.s new file mode 100644 index 0000000000000..503fd2d0a5d49 --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_reloc_pc32.s @@ -0,0 +1,41 @@ +# REQUIRES: system-linux +# RUN: llvm-mc -triple=systemz-unknown-linux -position-independent \ +# RUN: -defsym OFFSET=0x80000000 -filetype=obj -o %t.o %s +# RUN: llvm-jitlink -noexec -abs OFFSET=0x80000000 -check=%s %t.o +# +# RUN: llvm-mc -triple=systemz-unknown-linux -position-independent \ +# RUN: -defsym OFFSET=0xFFFFFFFF -filetype=obj -o %t.o %s +# RUN: not llvm-jitlink -noexec -abs OFFSET=0xFFFFFFFF %t.o 2>&1 | \ +# RUN: FileCheck -check-prefix=CHECK-ERROR %s +# +# RUN: llvm-mc -triple=systemz-unknown-linux -position-independent \ +# RUN: -defsym OFFSET=0x80000001 -filetype=obj -o %t.o %s +# RUN: not llvm-jitlink -noexec -abs OFFSET=0x80000001 %t.o 2>&1 | \ +# RUN: FileCheck -check-prefix=CHECK-ERROR %s +# +# jitlink-check: *{4}test_pc32 = OFFSET +# jitlink-check: *{4}test_pc32dbl = OFFSET + +# CHECK-ERROR: {{.*}} is out of range of Delta32 fixup + + .text + .section .text.main + .globl main + .p2align 4 + .type main,@function +main: + br %r14 + .size main, .-main + + .globl test_pc32 +test_pc32: + .reloc test_pc32, R_390_PC32, .-OFFSET + .space 4 + .size test_pc32, .-test_pc32 + + .globl test_pc32dbl +test_pc32dbl: + .reloc test_pc32dbl, R_390_PC32DBL, .-(OFFSET + OFFSET) + .space 4 + .size test_pc32dbl, .-test_pc32dbl + diff --git a/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_reloc_pc64.s b/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_reloc_pc64.s new file mode 100644 index 0000000000000..0d33ae2976de5 --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_reloc_pc64.s @@ -0,0 +1,34 @@ +# REQUIRES: system-linux +# RUN: rm -rf %t && mkdir -p %t +# RUN: llvm-mc -triple=systemz-unknown-linux -position-independent \ +# RUN: -filetype=obj -o %t/elf_reloc.o %s +# +# RUN: llvm-jitlink -noexec \ +# RUN: -slab-allocate 100Kb -slab-address 0xffff0000 -slab-page-size 4096 \ +# RUN: -abs external_data=0x1 \ +# RUN: -abs foo=0x6ff04040 \ +# RUN: -abs bar=0x6ff04048 \ +# RUN: -check %s %t/elf_reloc.o + + .text + .section .text.main + .globl main + .p2align 4 + .type main,@function +main: + br %r14 + .size main, .-main + + .globl test_pc64_foo +# jitlink-check: *{8}test_pc64_foo = foo - test_pc64_foo +test_pc64_foo: + .reloc ., R_390_PC64, foo + .space 8 + .size test_pc64_foo, .-test_pc64_foo + + .globl test_pc64_bar +# jitlink-check: *{8}test_pc64_bar = bar - test_pc64_bar +test_pc64_bar: + .reloc ., R_390_PC64, bar + .space 8 + .size test_pc64_bar, .-test_pc64_bar diff --git a/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_reloc_pcdbl.s b/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_reloc_pcdbl.s new file mode 100644 index 0000000000000..efe8357e76bef --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_reloc_pcdbl.s @@ -0,0 +1,84 @@ +# REQUIRES: system-linux +# RUN: llvm-mc -triple=systemz-unknown-linux -mcpu=z16 -position-independent \ +# RUN: -defsym OFF12=0xffe -defsym OFF16=4 -defsym OFF24=6 \ +# RUN: -defsym OFF32=6 -filetype=obj -o %t.o %s +# +# RUN: llvm-jitlink -noexec -abs OFF12=0xffe -abs OFF16=4 -abs OFF24=6 \ +# RUN: -abs OFF32=6 -check=%s %t.o +# +# RUN: llvm-mc -triple=systemz-unknown-linux -mcpu=z16 -position-independent \ +# RUN: -defsym OFF12=6 -defsym OFF16=0xfffe -defsym OFF24=6 \ +# RUN: -defsym OFF32=6 -filetype=obj -o %t.o %s +# +# RUN: llvm-jitlink -noexec -abs OFF12=6 -abs OFF16=0xfffe -abs OFF24=6 \ +# RUN: -abs OFF32=6 -check=%s %t.o +# +# RUN: llvm-mc -triple=systemz-unknown-linux -mcpu=z16 -position-independent \ +# RUN: -defsym OFF12=6 -defsym OFF16=4 -defsym OFF24=0xfffffe \ +# RUN: -defsym OFF32=6 -filetype=obj -o %t.o %s +# +# RUN: llvm-jitlink -noexec -abs OFF12=6 -abs OFF16=4 -abs OFF24=0xfffffe \ +# RUN: -abs OFF32=6 -check=%s %t.o +# +# RUN: llvm-mc -triple=systemz-unknown-linux -mcpu=z16 -position-independent \ +# RUN: -defsym OFF12=6 -defsym OFF16=4 -defsym OFF24=6 \ +# RUN: -defsym OFF32=0xffffffc8 -filetype=obj -o %t.o %s +# +# RUN: llvm-jitlink -noexec -abs OFF12=6 -abs OFF16=4 -abs OFF24=6 \ +# RUN: -abs OFF32=0xffffffc8 -check=%s %t.o + +# Check R_390_PC*dbl relocations. + + .text + .section .text.main + .globl main + .p2align 4 + .type main,@function +main: + br %r14 + .size main, .-main + +# R_390_PC16DBL +# jitlink-check: *{2}(test_pc16dbl + 2) = (OFF16 >> 1) + .globl test_pc16dbl + .p2align 3 +test_pc16dbl: + je .Lpc16dbl + .space OFF16 - 4 +.Lpc16dbl: + jne test_pc16dbl + .size test_pc16dbl,.-test_pc16dbl + +# R_390_PC32DBL +# jitlink-check: *{4}(test_pc32dbl + 2) = (OFF32 >> 1) + .globl test_pc32dbl + .p2align 3 +test_pc32dbl: + jge .Lpc32dbl + .space OFF32 - 6 +.Lpc32dbl: + jgne test_pc32dbl + .size test_pc32dbl,.-test_pc32dbl + +# R_390_PC12DBL +# jitlink-check: ((*{2} (test_pc12dbl + 1)) & 0x0fff) = (OFF12 >> 1) + .globl test_pc12dbl + .p2align 4 +test_pc12dbl: + bprp 0, .Lpc12dbl, 0 + .space OFF12 - 6 +.Lpc12dbl: + bprp 0, test_pc12dbl, 0 + .size test_pc12dbl,.-test_pc12dbl + +# R_390_PC24DBL +# jitlink-check: ((*{4} (test_pc24dbl + 2)) & 0x0ffffff) = (OFF24 >> 1) + .globl test_pc24dbl + .p2align 4 +test_pc24dbl: + bprp 0, 0, .Lpc24dbl + .space OFF24 - 6 +.Lpc24dbl: + bprp 0, 0, test_pc24dbl + .size test_pc24dbl,.-test_pc24dbl + diff --git a/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_reloc_plt.s b/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_reloc_plt.s new file mode 100644 index 0000000000000..47f064b45816a --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_reloc_plt.s @@ -0,0 +1,71 @@ +# REQUIRES: system-linux +# RUN: rm -rf %t && mkdir -p %t +# RUN: llvm-mc -triple=systemz-unknown-linux -position-independent \ +# RUN: -filetype=obj -o %t/elf_reloc.o %s +# +# RUN: llvm-jitlink -noexec \ +# RUN: -slab-allocate 100Kb -slab-address 0xffff0000 -slab-page-size 4096 \ +# RUN: -abs external_data=0x1 \ +# RUN: -abs foo=0x6ff04040 \ +# RUN: -abs bar=0x6ff04048 \ +# RUN: -check %s %t/elf_reloc.o + +# Check R_390_PLT32/64 relocations. + + .text + .section .text.main + .globl main + .p2align 4 + .type main,@function +main: + br %r14 + .size main, .-main + + .globl test_plt32_foo +# jitlink-check: *{4}test_plt32_foo = \ +# jitlink-check: stub_addr(elf_reloc.o, foo) - test_plt32_foo +test_plt32_foo: + .reloc ., R_390_PLT32, foo + .space 4 + .size test_plt32_foo, .-test_plt32_foo + + .globl test_plt32_bar +# jitlink-check: *{4}test_plt32_bar = \ +# jitlink-check: stub_addr(elf_reloc.o, bar) - test_plt32_bar +test_plt32_bar: + .reloc ., R_390_PLT32, bar + .space 4 + .size test_plt32_bar, .-test_plt32_bar + + .globl test_plt64_foo +# jitlink-check: *{8}test_plt64_foo = \ +# jitlink-check: stub_addr(elf_reloc.o, foo) - test_plt64_foo +test_plt64_foo: + .reloc ., R_390_PLT64, foo + .space 8 + .size test_plt64_foo, .-test_plt64_foo + + .globl test_plt64_bar +# jitlink-check: *{8}test_plt64_bar = \ +# jitlink-check: stub_addr(elf_reloc.o, bar) - test_plt64_bar +test_plt64_bar: + .reloc ., R_390_PLT64, bar + .space 8 + .size test_plt64_bar, .-test_plt64_bar + + .globl test_plt32dbl_foo +# jitlink-check: *{4}test_plt32dbl_foo = \ +# jitlink-check: (stub_addr(elf_reloc.o, foo) - test_plt32dbl_foo) >> 1 +test_plt32dbl_foo: + .reloc ., R_390_PLT32DBL, foo + .space 4 + .size test_plt32dbl_foo, .-test_plt32dbl_foo + + .globl test_plt32dbl_bar +# jitlink-check: *{4}test_plt32dbl_bar = \ +# jitlink-check: (stub_addr(elf_reloc.o, bar) - test_plt32dbl_bar) >> 1 +test_plt32dbl_bar: + .reloc ., R_390_PLT32DBL, bar + .space 4 + .size test_plt32dbl_bar, .-test_plt32dbl_bar + diff --git a/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_reloc_pltdbl.s b/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_reloc_pltdbl.s new file mode 100644 index 0000000000000..c36a77684008a --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/systemz/ELF_systemz_reloc_pltdbl.s @@ -0,0 +1,51 @@ +# REQUIRES: system-linux +# RUN: rm -rf %t && mkdir -p %t +# RUN: llvm-mc -triple=systemz-unknown-linux -position-independent \ +# RUN: -mcpu=z16 -filetype=obj -o %t/elf_reloc.o %s + +# RUN: llvm-jitlink -noexec \ +# RUN: -abs external_addr12=0xffe \ +# RUN: -abs external_addr16=0xfffe \ +# RUN: -abs external_addr24=0xffffe \ +# RUN: %t/elf_reloc.o -check %s + + + .text + .section .text.main + .globl main + .p2align 4 + .type main,@function +main: + br %r14 + .size main, .-main + +# R_390_PLT16DBL +# jitlink-check: *{2}(test_plt16dbl + 4) = \ +# jitlink-check: (stub_addr(elf_reloc.o, external_addr16) - \ +# jitlink-check: test_plt16dbl) >> 1 + .globl test_plt16dbl + .p2align 4 +test_plt16dbl: + bpp 0, external_addr16@plt, 0 + .size test_plt16dbl,.-test_plt16dbl + +# R_390_PLT12DBL +# jitlink-check: ((*{2}(test_plt12dbl + 1)) & 0x0fff) = \ +# jitlink-check: (stub_addr(elf_reloc.o, external_addr12) - \ +# jitlink-check: test_plt12dbl) >> 1 + .globl test_plt12dbl + .p2align 4 +test_plt12dbl: + bprp 0, external_addr12@plt, 0 + .size test_plt12dbl,.-test_plt12dbl + +# R_390_PLT24DBL +# jitlink-check: ((*{4}(test_plt24dbl + 2)) & 0x0ffffff) = \ +# jitlink-check: (stub_addr(elf_reloc.o, external_addr24) - \ +# jitlink-check: test_plt24dbl) >> 1 + .globl test_plt24dbl + .p2align 4 +test_plt24dbl: + bprp 0, 0, external_addr24@plt + .size test_plt24dbl,.-test_plt24dbl + diff --git a/llvm/test/ExecutionEngine/JITLink/systemz/lit.local.cfg b/llvm/test/ExecutionEngine/JITLink/systemz/lit.local.cfg new file mode 100644 index 0000000000000..caf81b69c06fd --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/systemz/lit.local.cfg @@ -0,0 +1,2 @@ +if not "SystemZ" in config.root.targets: + config.unsupported = True diff --git a/llvm/test/ExecutionEngine/lit.local.cfg b/llvm/test/ExecutionEngine/lit.local.cfg index 1951f140ea889..bbffee852e10e 100644 --- a/llvm/test/ExecutionEngine/lit.local.cfg +++ b/llvm/test/ExecutionEngine/lit.local.cfg @@ -1,4 +1,4 @@ -if config.root.native_target in ['Sparc', 'SystemZ', 'Hexagon']: +if config.root.native_target in ['Sparc', 'Hexagon']: config.unsupported = True # ExecutionEngine tests are not expected to pass in a cross-compilation setup. diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll index 877484f5159fd..212a5c99676f4 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll @@ -63,7 +63,7 @@ define void @predicated_uniform_load(ptr %src, i32 %n, ptr %dst, i1 %cond) { ; CHECK-NEXT: store i32 [[STORE]], ptr [[NBRBOXES]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp sgt i32 [[IV]], [[IBOX]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -114,7 +114,7 @@ define void @predicated_strided_store(ptr %start) { ; RVA23-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP3]] ; RVA23-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] ; RVA23-NEXT: [[TMP7:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; RVA23-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; RVA23-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; RVA23: middle.block: ; RVA23-NEXT: br label [[LOOP:%.*]] ; RVA23: exit: @@ -141,7 +141,7 @@ define void @predicated_strided_store(ptr %start) { ; RVA23ZVL1024B-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP3]] ; RVA23ZVL1024B-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] ; RVA23ZVL1024B-NEXT: [[TMP7:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; RVA23ZVL1024B-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; RVA23ZVL1024B-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; RVA23ZVL1024B: middle.block: ; RVA23ZVL1024B-NEXT: br label [[LOOP:%.*]] ; RVA23ZVL1024B: exit: @@ -185,16 +185,16 @@ define void @store_to_addr_generated_from_invariant_addr(ptr noalias %p0, ptr no ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[P1:%.*]], [[VEC_IND]] ; CHECK-NEXT: call void @llvm.vp.scatter.nxv2p0.nxv2p0( [[BROADCAST_SPLAT1]], align 8 [[TMP5]], splat (i1 true), i32 [[TMP3]]) ; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[P2:%.*]], align 4 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[P3:%.*]], i64 [[TMP6]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, ptr [[TMP8]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[TMP6]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[P3:%.*]], [[BROADCAST_SPLAT2]] ; CHECK-NEXT: call void @llvm.vp.scatter.nxv2i32.nxv2p0( zeroinitializer, align 4 [[TMP7]], splat (i1 true), i32 [[TMP3]]) ; CHECK-NEXT: call void @llvm.vp.scatter.nxv2i32.nxv2p0( zeroinitializer, align 4 [[TMP7]], splat (i1 true), i32 [[TMP3]]) ; CHECK-NEXT: call void @llvm.vp.scatter.nxv2i8.nxv2p0( zeroinitializer, align 1 [[TMP7]], splat (i1 true), i32 [[TMP3]]) ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP4]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: exit: diff --git a/llvm/test/Transforms/LoopVectorize/widen-gep-all-indices-invariant.ll b/llvm/test/Transforms/LoopVectorize/widen-gep-all-indices-invariant.ll index c37bf74f9c1b0..d08ca8c99e8ba 100644 --- a/llvm/test/Transforms/LoopVectorize/widen-gep-all-indices-invariant.ll +++ b/llvm/test/Transforms/LoopVectorize/widen-gep-all-indices-invariant.ll @@ -8,14 +8,14 @@ define void @pr63340(ptr %A, ptr %B) { ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 1 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP0]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT]], <4 x ptr> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP0]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x ptr> [[DOTSPLATINSERT]], <4 x ptr> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i8 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[B]], i8 [[OFFSET_IDX]] -; CHECK-NEXT: store <4 x ptr> [[BROADCAST_SPLAT]], ptr [[TMP1]], align 8 +; CHECK-NEXT: store <4 x ptr> [[DOTSPLAT]], ptr [[TMP1]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 128 ; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -55,11 +55,11 @@ define void @wide_gep_index_invariant(ptr noalias %dst, ptr noalias %src, i64 %n ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SRC]], align 8 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[TMP0]], i64 [[N]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP1]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP0]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT]], <4 x ptr> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, <4 x ptr> [[BROADCAST_SPLAT]], i64 [[N]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr ptr, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: store <4 x ptr> [[BROADCAST_SPLAT]], ptr [[TMP2]], align 8 +; CHECK-NEXT: store <4 x ptr> [[TMP1]], ptr [[TMP2]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] diff --git a/llvm/tools/llvm-diff/lib/DifferenceEngine.h b/llvm/tools/llvm-diff/lib/DifferenceEngine.h index b829b2cd0bcbc..01fd0d9540dc2 100644 --- a/llvm/tools/llvm-diff/lib/DifferenceEngine.h +++ b/llvm/tools/llvm-diff/lib/DifferenceEngine.h @@ -17,7 +17,6 @@ #include "DiffConsumer.h" #include "DiffLog.h" #include "llvm/ADT/StringRef.h" -#include namespace llvm { class Function; diff --git a/llvm/tools/llvm-diff/llvm-diff.cpp b/llvm/tools/llvm-diff/llvm-diff.cpp index 2126b91f75ae1..45b8ed91ce52c 100644 --- a/llvm/tools/llvm-diff/llvm-diff.cpp +++ b/llvm/tools/llvm-diff/llvm-diff.cpp @@ -20,11 +20,9 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SourceMgr.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/WithColor.h" +#include "llvm/Support/raw_ostream.h" #include -#include - using namespace llvm; diff --git a/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp b/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp index 66c770d9ca86b..afc20bed25914 100644 --- a/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp +++ b/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp @@ -8,7 +8,6 @@ #include "MCInstrDescView.h" -#include #include #include "llvm/ADT/STLExtras.h" diff --git a/llvm/tools/llvm-pdbutil/PrettyCompilandDumper.cpp b/llvm/tools/llvm-pdbutil/PrettyCompilandDumper.cpp index b347cfdfc3923..902a1d79106f0 100644 --- a/llvm/tools/llvm-pdbutil/PrettyCompilandDumper.cpp +++ b/llvm/tools/llvm-pdbutil/PrettyCompilandDumper.cpp @@ -32,8 +32,6 @@ #include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" -#include - using namespace llvm; using namespace llvm::pdb; diff --git a/llvm/tools/llvm-profgen/MissingFrameInferrer.cpp b/llvm/tools/llvm-profgen/MissingFrameInferrer.cpp index 7ebca23ba7956..d692de75cfb89 100644 --- a/llvm/tools/llvm-profgen/MissingFrameInferrer.cpp +++ b/llvm/tools/llvm-profgen/MissingFrameInferrer.cpp @@ -14,7 +14,6 @@ #include "llvm/ADT/Statistic.h" #include #include -#include #include #include diff --git a/llvm/unittests/ADT/CombinationGeneratorTest.cpp b/llvm/unittests/ADT/CombinationGeneratorTest.cpp index 219e18bc5e12c..cf187e68479c0 100644 --- a/llvm/unittests/ADT/CombinationGeneratorTest.cpp +++ b/llvm/unittests/ADT/CombinationGeneratorTest.cpp @@ -13,7 +13,6 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" #include -#include #include using namespace llvm; diff --git a/llvm/unittests/ADT/FunctionExtrasTest.cpp b/llvm/unittests/ADT/FunctionExtrasTest.cpp index 9809a92daac72..fdabdca269da2 100644 --- a/llvm/unittests/ADT/FunctionExtrasTest.cpp +++ b/llvm/unittests/ADT/FunctionExtrasTest.cpp @@ -11,7 +11,6 @@ #include "gtest/gtest.h" #include -#include using namespace llvm; diff --git a/llvm/unittests/ADT/IntervalMapTest.cpp b/llvm/unittests/ADT/IntervalMapTest.cpp index 38f397ff2eb54..06848e218da10 100644 --- a/llvm/unittests/ADT/IntervalMapTest.cpp +++ b/llvm/unittests/ADT/IntervalMapTest.cpp @@ -8,7 +8,6 @@ #include "llvm/ADT/IntervalMap.h" #include "gtest/gtest.h" -#include using namespace llvm; diff --git a/llvm/unittests/Support/HashBuilderTest.cpp b/llvm/unittests/Support/HashBuilderTest.cpp index 0aacfcd185ba0..70cb92b21848c 100644 --- a/llvm/unittests/Support/HashBuilderTest.cpp +++ b/llvm/unittests/Support/HashBuilderTest.cpp @@ -15,7 +15,6 @@ #include #include -#include #include #include diff --git a/llvm/unittests/Support/NativeFormatTests.cpp b/llvm/unittests/Support/NativeFormatTests.cpp index ac04c5a53d74a..974709efce9df 100644 --- a/llvm/unittests/Support/NativeFormatTests.cpp +++ b/llvm/unittests/Support/NativeFormatTests.cpp @@ -10,8 +10,6 @@ #include "llvm/Support/raw_ostream.h" #include "gtest/gtest.h" -#include - using namespace llvm; namespace { diff --git a/llvm/unittests/Target/SPIRV/SPIRVAPITest.cpp b/llvm/unittests/Target/SPIRV/SPIRVAPITest.cpp index da713007f662d..321c092bc7518 100644 --- a/llvm/unittests/Target/SPIRV/SPIRVAPITest.cpp +++ b/llvm/unittests/Target/SPIRV/SPIRVAPITest.cpp @@ -20,7 +20,6 @@ #include "gtest/gtest.h" #include #include -#include namespace llvm { diff --git a/llvm/utils/TableGen/Common/CodeGenTarget.cpp b/llvm/utils/TableGen/Common/CodeGenTarget.cpp index 1e9378845854e..c0daac127f71a 100644 --- a/llvm/utils/TableGen/Common/CodeGenTarget.cpp +++ b/llvm/utils/TableGen/Common/CodeGenTarget.cpp @@ -23,7 +23,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" -#include #include using namespace llvm; diff --git a/llvm/utils/TableGen/Common/InfoByHwMode.h b/llvm/utils/TableGen/Common/InfoByHwMode.h index ef688a6f6b3d1..ce84960ef79a7 100644 --- a/llvm/utils/TableGen/Common/InfoByHwMode.h +++ b/llvm/utils/TableGen/Common/InfoByHwMode.h @@ -24,7 +24,6 @@ #include #include #include -#include namespace llvm { diff --git a/mlir/include/mlir/Conversion/ArithToAPFloat/ArithToAPFloat.h b/mlir/include/mlir/Conversion/ArithToAPFloat/ArithToAPFloat.h new file mode 100644 index 0000000000000..64a42a228199e --- /dev/null +++ b/mlir/include/mlir/Conversion/ArithToAPFloat/ArithToAPFloat.h @@ -0,0 +1,21 @@ +//===- ArithToAPFloat.h - Arith to APFloat impl conversion ---*- C++ ----*-===// +// +// Part of the APFloat Project, under the Apache License v2.0 with APFloat +// Exceptions. See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH APFloat-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_CONVERSION_ARITHTOAPFLOAT_ARITHTOAPFLOAT_H +#define MLIR_CONVERSION_ARITHTOAPFLOAT_ARITHTOAPFLOAT_H + +#include + +namespace mlir { +class Pass; + +#define GEN_PASS_DECL_ARITHTOAPFLOATCONVERSIONPASS +#include "mlir/Conversion/Passes.h.inc" +} // namespace mlir + +#endif // MLIR_CONVERSION_ARITHTOAPFLOAT_ARITHTOAPFLOAT_H diff --git a/mlir/include/mlir/Conversion/Passes.h b/mlir/include/mlir/Conversion/Passes.h index 40d866ec7bf10..82bdfd02661a6 100644 --- a/mlir/include/mlir/Conversion/Passes.h +++ b/mlir/include/mlir/Conversion/Passes.h @@ -12,6 +12,7 @@ #include "mlir/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.h" #include "mlir/Conversion/AffineToStandard/AffineToStandard.h" #include "mlir/Conversion/ArithToAMDGPU/ArithToAMDGPU.h" +#include "mlir/Conversion/ArithToAPFloat/ArithToAPFloat.h" #include "mlir/Conversion/ArithToArmSME/ArithToArmSME.h" #include "mlir/Conversion/ArithToEmitC/ArithToEmitCPass.h" #include "mlir/Conversion/ArithToLLVM/ArithToLLVM.h" diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td index e0cac8b699c30..d5665b439b059 100644 --- a/mlir/include/mlir/Conversion/Passes.td +++ b/mlir/include/mlir/Conversion/Passes.td @@ -186,6 +186,21 @@ def ArithToLLVMConversionPass : Pass<"convert-arith-to-llvm"> { ]; } +//===----------------------------------------------------------------------===// +// ArithToAPFloat +//===----------------------------------------------------------------------===// + +def ArithToAPFloatConversionPass + : Pass<"convert-arith-to-apfloat", "ModuleOp"> { + let summary = "Convert Arith ops to APFloat runtime library calls"; + let description = [{ + This pass converts supported Arith ops to APFloat-based runtime library + calls (APFloatWrappers.cpp). APFloat is a software implementation of + floating-point arithmetic operations. + }]; + let dependentDialects = ["func::FuncDialect"]; +} + //===----------------------------------------------------------------------===// // ArithToSPIRV //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/Func/Utils/Utils.h b/mlir/include/mlir/Dialect/Func/Utils/Utils.h index 3576126a487ac..00d50874a2e8d 100644 --- a/mlir/include/mlir/Dialect/Func/Utils/Utils.h +++ b/mlir/include/mlir/Dialect/Func/Utils/Utils.h @@ -60,6 +60,13 @@ mlir::FailureOr> deduplicateArgsOfFuncOp(mlir::RewriterBase &rewriter, mlir::func::FuncOp funcOp, mlir::ModuleOp moduleOp); +/// Look up a FuncOp with signature `resultTypes`(`paramTypes`)` and name +/// `name`. Return a failure if the FuncOp is found but with a different +/// signature. +FailureOr lookupFnDecl(SymbolOpInterface symTable, StringRef name, + FunctionType funcT, + SymbolTableCollection *symbolTables = nullptr); + } // namespace func } // namespace mlir diff --git a/mlir/include/mlir/Dialect/LLVMIR/FunctionCallUtils.h b/mlir/include/mlir/Dialect/LLVMIR/FunctionCallUtils.h index 8ad9ed18acebd..b09d32022e348 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/FunctionCallUtils.h +++ b/mlir/include/mlir/Dialect/LLVMIR/FunctionCallUtils.h @@ -52,6 +52,10 @@ lookupOrCreatePrintF32Fn(OpBuilder &b, Operation *moduleOp, FailureOr lookupOrCreatePrintF64Fn(OpBuilder &b, Operation *moduleOp, SymbolTableCollection *symbolTables = nullptr); +FailureOr +lookupOrCreateApFloatPrintFn(OpBuilder &b, Operation *moduleOp, + SymbolTableCollection *symbolTables = nullptr); + /// Declares a function to print a C-string. /// If a custom runtime function is defined via `runtimeFunctionName`, it must /// have the signature void(char const*). The default function is `printString`. diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td index 238fa42cae427..784bdd8e22f1f 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td @@ -109,7 +109,7 @@ def Linalg_PackOp : Linalg_RelayoutOp<"pack", [ within [0, n). - The tiled dimensions (of size `inner_tiles`) are added to the end of the result tensor in the order in which they appear, i.e. - `shape(result)[rank(result) + i] = inner_tiles[i]` for `0 <= i < k`. + `shape(result)[rank(source) + i] = inner_tiles[i]` for `0 <= i < k`. - The following relationship for the tiled dimensions holds: `shape(result)[inner_dims_pos[i]] = shape(source)[inner_dims_pos[i]] / inner_tiles[i]`, where (⌈/⌉ indicates CeilDiv). diff --git a/mlir/lib/Bindings/Python/IRCore.cpp b/mlir/lib/Bindings/Python/IRCore.cpp index d90f27bd037e6..40a466beee159 100644 --- a/mlir/lib/Bindings/Python/IRCore.cpp +++ b/mlir/lib/Bindings/Python/IRCore.cpp @@ -204,8 +204,8 @@ namespace { class PyRegionIterator { public: - PyRegionIterator(PyOperationRef operation) - : operation(std::move(operation)) {} + PyRegionIterator(PyOperationRef operation, int nextIndex) + : operation(std::move(operation)), nextIndex(nextIndex) {} PyRegionIterator &dunderIter() { return *this; } @@ -228,7 +228,7 @@ class PyRegionIterator { private: PyOperationRef operation; - int nextIndex = 0; + intptr_t nextIndex = 0; }; /// Regions of an op are fixed length and indexed numerically so are represented @@ -247,7 +247,7 @@ class PyRegionList : public Sliceable { PyRegionIterator dunderIter() { operation->checkValid(); - return PyRegionIterator(operation); + return PyRegionIterator(operation, startIndex); } static void bindDerived(ClassTy &c) { diff --git a/mlir/lib/Bindings/Python/NanobindUtils.h b/mlir/lib/Bindings/Python/NanobindUtils.h index 64ea4329f65f1..658e8ad5330ef 100644 --- a/mlir/lib/Bindings/Python/NanobindUtils.h +++ b/mlir/lib/Bindings/Python/NanobindUtils.h @@ -395,7 +395,6 @@ class Sliceable { /// Hook for derived classes willing to bind more methods. static void bindDerived(ClassTy &) {} -private: intptr_t startIndex; intptr_t length; intptr_t step; diff --git a/mlir/lib/Conversion/ArithToAPFloat/ArithToAPFloat.cpp b/mlir/lib/Conversion/ArithToAPFloat/ArithToAPFloat.cpp new file mode 100644 index 0000000000000..01fd5b278aca4 --- /dev/null +++ b/mlir/lib/Conversion/ArithToAPFloat/ArithToAPFloat.cpp @@ -0,0 +1,158 @@ +//===- ArithToAPFloat.cpp - Arithmetic to APFloat Conversion --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Conversion/ArithToAPFloat/ArithToAPFloat.h" + +#include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/Arith/Transforms/Passes.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/Func/Utils/Utils.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/IR/Verifier.h" +#include "mlir/Transforms/WalkPatternRewriteDriver.h" + +namespace mlir { +#define GEN_PASS_DEF_ARITHTOAPFLOATCONVERSIONPASS +#include "mlir/Conversion/Passes.h.inc" +} // namespace mlir + +using namespace mlir; +using namespace mlir::func; + +static FuncOp createFnDecl(OpBuilder &b, SymbolOpInterface symTable, + StringRef name, FunctionType funcT, bool setPrivate, + SymbolTableCollection *symbolTables = nullptr) { + OpBuilder::InsertionGuard g(b); + assert(!symTable->getRegion(0).empty() && "expected non-empty region"); + b.setInsertionPointToStart(&symTable->getRegion(0).front()); + FuncOp funcOp = FuncOp::create(b, symTable->getLoc(), name, funcT); + if (setPrivate) + funcOp.setPrivate(); + if (symbolTables) { + SymbolTable &symbolTable = symbolTables->getSymbolTable(symTable); + symbolTable.insert(funcOp, symTable->getRegion(0).front().begin()); + } + return funcOp; +} + +/// Helper function to look up or create the symbol for a runtime library +/// function for a binary arithmetic operation. +/// +/// Parameter 1: APFloat semantics +/// Parameter 2: Left-hand side operand +/// Parameter 3: Right-hand side operand +/// +/// This function will return a failure if the function is found but has an +/// unexpected signature. +/// +static FailureOr +lookupOrCreateBinaryFn(OpBuilder &b, SymbolOpInterface symTable, StringRef name, + SymbolTableCollection *symbolTables = nullptr) { + auto i32Type = IntegerType::get(symTable->getContext(), 32); + auto i64Type = IntegerType::get(symTable->getContext(), 64); + + std::string funcName = (llvm::Twine("_mlir_apfloat_") + name).str(); + FunctionType funcT = + FunctionType::get(b.getContext(), {i32Type, i64Type, i64Type}, {i64Type}); + FailureOr func = + lookupFnDecl(symTable, funcName, funcT, symbolTables); + // Failed due to type mismatch. + if (failed(func)) + return func; + // Successfully matched existing decl. + if (*func) + return *func; + + return createFnDecl(b, symTable, funcName, funcT, + /*setPrivate=*/true, symbolTables); +} + +/// Rewrite a binary arithmetic operation to an APFloat function call. +template +struct BinaryArithOpToAPFloatConversion final : OpRewritePattern { + BinaryArithOpToAPFloatConversion(MLIRContext *context, PatternBenefit benefit, + SymbolOpInterface symTable) + : OpRewritePattern(context, benefit), symTable(symTable) {}; + + LogicalResult matchAndRewrite(OpTy op, + PatternRewriter &rewriter) const override { + // Get APFloat function from runtime library. + FailureOr fn = + lookupOrCreateBinaryFn(rewriter, symTable, APFloatName); + if (failed(fn)) + return fn; + + rewriter.setInsertionPoint(op); + // Cast operands to 64-bit integers. + Location loc = op.getLoc(); + auto floatTy = cast(op.getType()); + auto intWType = rewriter.getIntegerType(floatTy.getWidth()); + auto int64Type = rewriter.getI64Type(); + Value lhsBits = arith::ExtUIOp::create( + rewriter, loc, int64Type, + arith::BitcastOp::create(rewriter, loc, intWType, op.getLhs())); + Value rhsBits = arith::ExtUIOp::create( + rewriter, loc, int64Type, + arith::BitcastOp::create(rewriter, loc, intWType, op.getRhs())); + + // Call APFloat function. + int32_t sem = + llvm::APFloatBase::SemanticsToEnum(floatTy.getFloatSemantics()); + Value semValue = arith::ConstantOp::create( + rewriter, loc, rewriter.getI32Type(), + rewriter.getIntegerAttr(rewriter.getI32Type(), sem)); + SmallVector params = {semValue, lhsBits, rhsBits}; + auto resultOp = + func::CallOp::create(rewriter, loc, TypeRange(rewriter.getI64Type()), + SymbolRefAttr::get(*fn), params); + + // Truncate result to the original width. + Value truncatedBits = arith::TruncIOp::create(rewriter, loc, intWType, + resultOp->getResult(0)); + rewriter.replaceOp( + op, arith::BitcastOp::create(rewriter, loc, floatTy, truncatedBits)); + return success(); + } + + SymbolOpInterface symTable; +}; + +namespace { +struct ArithToAPFloatConversionPass final + : impl::ArithToAPFloatConversionPassBase { + using Base::Base; + + void runOnOperation() override { + MLIRContext *context = &getContext(); + RewritePatternSet patterns(context); + static const char add[] = "add"; + static const char subtract[] = "subtract"; + static const char multiply[] = "multiply"; + static const char divide[] = "divide"; + static const char remainder[] = "remainder"; + patterns.add, + BinaryArithOpToAPFloatConversion, + BinaryArithOpToAPFloatConversion, + BinaryArithOpToAPFloatConversion, + BinaryArithOpToAPFloatConversion>( + context, 1, getOperation()); + LogicalResult result = success(); + ScopedDiagnosticHandler scopedHandler(context, [&result](Diagnostic &diag) { + if (diag.getSeverity() == DiagnosticSeverity::Error) { + result = failure(); + } + // NB: if you don't return failure, no other diag handlers will fire (see + // mlir/lib/IR/Diagnostics.cpp:DiagnosticEngineImpl::emit). + return failure(); + }); + walkAndApplyPatterns(getOperation(), std::move(patterns)); + if (failed(result)) + return signalPassFailure(); + } +}; +} // namespace diff --git a/mlir/lib/Conversion/ArithToAPFloat/CMakeLists.txt b/mlir/lib/Conversion/ArithToAPFloat/CMakeLists.txt new file mode 100644 index 0000000000000..b5ec49c087163 --- /dev/null +++ b/mlir/lib/Conversion/ArithToAPFloat/CMakeLists.txt @@ -0,0 +1,18 @@ +add_mlir_conversion_library(MLIRArithToAPFloat + ArithToAPFloat.cpp + + ADDITIONAL_HEADER_DIRS + ${MLIR_MAIN_INCLUDE_DIR}/mlir/Conversion/ArithToLLVM + + DEPENDS + MLIRConversionPassIncGen + + LINK_COMPONENTS + Core + + LINK_LIBS PUBLIC + MLIRArithDialect + MLIRArithTransforms + MLIRFuncDialect + MLIRFuncUtils + ) diff --git a/mlir/lib/Conversion/ArithToLLVM/ArithToLLVM.cpp b/mlir/lib/Conversion/ArithToLLVM/ArithToLLVM.cpp index b6099902cc337..f2bacc3399144 100644 --- a/mlir/lib/Conversion/ArithToLLVM/ArithToLLVM.cpp +++ b/mlir/lib/Conversion/ArithToLLVM/ArithToLLVM.cpp @@ -14,6 +14,7 @@ #include "mlir/Conversion/LLVMCommon/VectorPattern.h" #include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/Arith/Transforms/Passes.h" +#include "mlir/Dialect/LLVMIR/FunctionCallUtils.h" #include "mlir/Dialect/LLVMIR/LLVMAttrs.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/IR/TypeUtilities.h" diff --git a/mlir/lib/Conversion/CMakeLists.txt b/mlir/lib/Conversion/CMakeLists.txt index bebf1b8fff3f9..613dc6d242ceb 100644 --- a/mlir/lib/Conversion/CMakeLists.txt +++ b/mlir/lib/Conversion/CMakeLists.txt @@ -2,6 +2,7 @@ add_subdirectory(AffineToStandard) add_subdirectory(AMDGPUToROCDL) add_subdirectory(ArithCommon) add_subdirectory(ArithToAMDGPU) +add_subdirectory(ArithToAPFloat) add_subdirectory(ArithToArmSME) add_subdirectory(ArithToEmitC) add_subdirectory(ArithToLLVM) diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp index 69a317ecd101f..c747e1b59558a 100644 --- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp +++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp @@ -1654,6 +1654,20 @@ class VectorPrintOpConversion : public ConvertOpToLLVMPattern { return failure(); } } + } else if (auto floatTy = dyn_cast(printType)) { + // Print other floating-point types using the APFloat runtime library. + int32_t sem = + llvm::APFloatBase::SemanticsToEnum(floatTy.getFloatSemantics()); + Value semValue = LLVM::ConstantOp::create( + rewriter, loc, rewriter.getI32Type(), + rewriter.getIntegerAttr(rewriter.getI32Type(), sem)); + Value floatBits = + LLVM::ZExtOp::create(rewriter, loc, rewriter.getI64Type(), value); + printer = + LLVM::lookupOrCreateApFloatPrintFn(rewriter, parent, symbolTables); + emitCall(rewriter, loc, printer.value(), + ValueRange({semValue, floatBits})); + return success(); } else { return failure(); } diff --git a/mlir/lib/Dialect/Arith/Transforms/ExpandOps.cpp b/mlir/lib/Dialect/Arith/Transforms/ExpandOps.cpp index adeb50b6da628..c4e81e5dbed21 100644 --- a/mlir/lib/Dialect/Arith/Transforms/ExpandOps.cpp +++ b/mlir/lib/Dialect/Arith/Transforms/ExpandOps.cpp @@ -35,7 +35,7 @@ static Value createConst(Location loc, Type type, int value, } /// Create a float constant. -static Value createFloatConst(Location loc, Type type, APFloat value, +static Value createFloatConst(Location loc, Type type, const APFloat &value, PatternRewriter &rewriter) { auto attr = rewriter.getFloatAttr(getElementTypeOrSelf(type), value); if (auto shapedTy = dyn_cast(type)) { diff --git a/mlir/lib/Dialect/Func/Utils/Utils.cpp b/mlir/lib/Dialect/Func/Utils/Utils.cpp index b4cb0932ef631..d6dfd0229963c 100644 --- a/mlir/lib/Dialect/Func/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Func/Utils/Utils.cpp @@ -254,3 +254,28 @@ func::deduplicateArgsOfFuncOp(RewriterBase &rewriter, func::FuncOp funcOp, return std::make_pair(*newFuncOpOrFailure, newCallOp); } + +FailureOr +func::lookupFnDecl(SymbolOpInterface symTable, StringRef name, + FunctionType funcT, SymbolTableCollection *symbolTables) { + FuncOp func; + if (symbolTables) { + func = symbolTables->lookupSymbolIn( + symTable, StringAttr::get(symTable->getContext(), name)); + } else { + func = llvm::dyn_cast_or_null( + SymbolTable::lookupSymbolIn(symTable, name)); + } + + if (!func) + return func; + + mlir::FunctionType foundFuncT = func.getFunctionType(); + // Assert the signature of the found function is same as expected + if (funcT != foundFuncT) { + return func.emitError("matched function '") + << name << "' but with different type: " << foundFuncT + << " (expected " << funcT << ")"; + } + return func; +} diff --git a/mlir/lib/Dialect/LLVMIR/IR/FunctionCallUtils.cpp b/mlir/lib/Dialect/LLVMIR/IR/FunctionCallUtils.cpp index feaffa34897b6..160b6ae89215c 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/FunctionCallUtils.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/FunctionCallUtils.cpp @@ -30,6 +30,7 @@ static constexpr llvm::StringRef kPrintF16 = "printF16"; static constexpr llvm::StringRef kPrintBF16 = "printBF16"; static constexpr llvm::StringRef kPrintF32 = "printF32"; static constexpr llvm::StringRef kPrintF64 = "printF64"; +static constexpr llvm::StringRef kPrintApFloat = "printApFloat"; static constexpr llvm::StringRef kPrintString = "printString"; static constexpr llvm::StringRef kPrintOpen = "printOpen"; static constexpr llvm::StringRef kPrintClose = "printClose"; @@ -160,6 +161,16 @@ mlir::LLVM::lookupOrCreatePrintF64Fn(OpBuilder &b, Operation *moduleOp, LLVM::LLVMVoidType::get(moduleOp->getContext()), symbolTables); } +FailureOr +mlir::LLVM::lookupOrCreateApFloatPrintFn(OpBuilder &b, Operation *moduleOp, + SymbolTableCollection *symbolTables) { + return lookupOrCreateReservedFn( + b, moduleOp, kPrintApFloat, + {IntegerType::get(moduleOp->getContext(), 32), + IntegerType::get(moduleOp->getContext(), 64)}, + LLVM::LLVMVoidType::get(moduleOp->getContext()), symbolTables); +} + static LLVM::LLVMPointerType getCharPtr(MLIRContext *context) { return LLVM::LLVMPointerType::get(context); } diff --git a/mlir/lib/ExecutionEngine/APFloatWrappers.cpp b/mlir/lib/ExecutionEngine/APFloatWrappers.cpp new file mode 100644 index 0000000000000..0a05f7369e556 --- /dev/null +++ b/mlir/lib/ExecutionEngine/APFloatWrappers.cpp @@ -0,0 +1,89 @@ +//===- APFloatWrappers.cpp - Software Implementation of FP Arithmetics --- ===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file exposes the APFloat infrastructure to MLIR programs as a runtime +// library. APFloat is a software implementation of floating point arithmetics. +// +// On the MLIR side, floating-point values must be bitcasted to 64-bit integers +// before calling a runtime function. If a floating-point type has less than +// 64 bits, it must be zero-extended to 64 bits after bitcasting it to an +// integer. +// +// Runtime functions receive the floating-point operands of the arithmeic +// operation in the form of 64-bit integers, along with the APFloat semantics +// in the form of a 32-bit integer, which will be interpreted as an +// APFloatBase::Semantics enum value. +// +#include "llvm/ADT/APFloat.h" + +#ifdef _WIN32 +#ifndef MLIR_APFLOAT_WRAPPERS_EXPORT +#ifdef mlir_apfloat_wrappers_EXPORTS +// We are building this library +#define MLIR_APFLOAT_WRAPPERS_EXPORT __declspec(dllexport) +#else +// We are using this library +#define MLIR_APFLOAT_WRAPPERS_EXPORT __declspec(dllimport) +#endif // mlir_apfloat_wrappers_EXPORTS +#endif // MLIR_APFLOAT_WRAPPERS_EXPORT +#else +// Non-windows: use visibility attributes. +#define MLIR_APFLOAT_WRAPPERS_EXPORT __attribute__((visibility("default"))) +#endif // _WIN32 + +/// Binary operations without rounding mode. +#define APFLOAT_BINARY_OP(OP) \ + MLIR_APFLOAT_WRAPPERS_EXPORT int64_t _mlir_apfloat_##OP( \ + int32_t semantics, uint64_t a, uint64_t b) { \ + const llvm::fltSemantics &sem = llvm::APFloatBase::EnumToSemantics( \ + static_cast(semantics)); \ + unsigned bitWidth = llvm::APFloatBase::semanticsSizeInBits(sem); \ + llvm::APFloat lhs(sem, llvm::APInt(bitWidth, a)); \ + llvm::APFloat rhs(sem, llvm::APInt(bitWidth, b)); \ + lhs.OP(rhs); \ + return lhs.bitcastToAPInt().getZExtValue(); \ + } + +/// Binary operations with rounding mode. +#define APFLOAT_BINARY_OP_ROUNDING_MODE(OP, ROUNDING_MODE) \ + MLIR_APFLOAT_WRAPPERS_EXPORT int64_t _mlir_apfloat_##OP( \ + int32_t semantics, uint64_t a, uint64_t b) { \ + const llvm::fltSemantics &sem = llvm::APFloatBase::EnumToSemantics( \ + static_cast(semantics)); \ + unsigned bitWidth = llvm::APFloatBase::semanticsSizeInBits(sem); \ + llvm::APFloat lhs(sem, llvm::APInt(bitWidth, a)); \ + llvm::APFloat rhs(sem, llvm::APInt(bitWidth, b)); \ + lhs.OP(rhs, ROUNDING_MODE); \ + return lhs.bitcastToAPInt().getZExtValue(); \ + } + +extern "C" { + +#define BIN_OPS_WITH_ROUNDING(X) \ + X(add, llvm::RoundingMode::NearestTiesToEven) \ + X(subtract, llvm::RoundingMode::NearestTiesToEven) \ + X(multiply, llvm::RoundingMode::NearestTiesToEven) \ + X(divide, llvm::RoundingMode::NearestTiesToEven) + +BIN_OPS_WITH_ROUNDING(APFLOAT_BINARY_OP_ROUNDING_MODE) +#undef BIN_OPS_WITH_ROUNDING +#undef APFLOAT_BINARY_OP_ROUNDING_MODE + +APFLOAT_BINARY_OP(remainder) + +#undef APFLOAT_BINARY_OP + +MLIR_APFLOAT_WRAPPERS_EXPORT void printApFloat(int32_t semantics, uint64_t a) { + const llvm::fltSemantics &sem = llvm::APFloatBase::EnumToSemantics( + static_cast(semantics)); + unsigned bitWidth = llvm::APFloatBase::semanticsSizeInBits(sem); + llvm::APFloat x(sem, llvm::APInt(bitWidth, a)); + double d = x.convertToDouble(); + fprintf(stdout, "%lg", d); +} +} diff --git a/mlir/lib/ExecutionEngine/CMakeLists.txt b/mlir/lib/ExecutionEngine/CMakeLists.txt index fdeb4dacf9278..8c09e50e4de7b 100644 --- a/mlir/lib/ExecutionEngine/CMakeLists.txt +++ b/mlir/lib/ExecutionEngine/CMakeLists.txt @@ -2,6 +2,7 @@ # is a big dependency which most don't need. set(LLVM_OPTIONAL_SOURCES + APFloatWrappers.cpp ArmRunnerUtils.cpp ArmSMEStubs.cpp AsyncRuntime.cpp @@ -167,6 +168,15 @@ if(LLVM_ENABLE_PIC) set_property(TARGET mlir_float16_utils PROPERTY CXX_STANDARD 17) target_compile_definitions(mlir_float16_utils PRIVATE mlir_float16_utils_EXPORTS) + add_mlir_library(mlir_apfloat_wrappers + SHARED + APFloatWrappers.cpp + + EXCLUDE_FROM_LIBMLIR + ) + set_property(TARGET mlir_apfloat_wrappers PROPERTY CXX_STANDARD 17) + target_compile_definitions(mlir_apfloat_wrappers PRIVATE mlir_apfloat_wrappers_EXPORTS) + add_subdirectory(SparseTensor) add_mlir_library(mlir_c_runner_utils @@ -177,6 +187,7 @@ if(LLVM_ENABLE_PIC) EXCLUDE_FROM_LIBMLIR LINK_LIBS PUBLIC + mlir_apfloat_wrappers mlir_float16_utils MLIRSparseTensorEnums MLIRSparseTensorRuntime @@ -191,6 +202,7 @@ if(LLVM_ENABLE_PIC) EXCLUDE_FROM_LIBMLIR LINK_LIBS PUBLIC + mlir_apfloat_wrappers mlir_float16_utils ) target_compile_definitions(mlir_runner_utils PRIVATE mlir_runner_utils_EXPORTS) diff --git a/mlir/lib/Transforms/Utils/DialectConversion.cpp b/mlir/lib/Transforms/Utils/DialectConversion.cpp index 9945a711d5c74..365bfddeaab73 100644 --- a/mlir/lib/Transforms/Utils/DialectConversion.cpp +++ b/mlir/lib/Transforms/Utils/DialectConversion.cpp @@ -25,6 +25,7 @@ #include "llvm/Support/SaveAndRestore.h" #include "llvm/Support/ScopedPrinter.h" #include +#include using namespace mlir; using namespace mlir::detail; @@ -1051,7 +1052,7 @@ struct ConversionPatternRewriterImpl : public RewriterBase::Listener { MLIRContext *context, std::function opErasedCallback = nullptr) : RewriterBase(context, /*listener=*/this), - opErasedCallback(opErasedCallback) {} + opErasedCallback(std::move(opErasedCallback)) {} /// Erase the given op (unless it was already erased). void eraseOp(Operation *op) override { diff --git a/mlir/test/Conversion/ArithToApfloat/arith-to-apfloat.mlir b/mlir/test/Conversion/ArithToApfloat/arith-to-apfloat.mlir new file mode 100644 index 0000000000000..797f42c37a26f --- /dev/null +++ b/mlir/test/Conversion/ArithToApfloat/arith-to-apfloat.mlir @@ -0,0 +1,128 @@ +// RUN: mlir-opt %s --convert-arith-to-apfloat -split-input-file -verify-diagnostics | FileCheck %s + +// CHECK-LABEL: func.func private @_mlir_apfloat_add(i32, i64, i64) -> i64 + +// CHECK-LABEL: func.func @foo() -> f8E4M3FN { +// CHECK: %[[CONSTANT_0:.*]] = arith.constant 2.250000e+00 : f8E4M3FN +// CHECK: return %[[CONSTANT_0]] : f8E4M3FN +// CHECK: } + +// CHECK-LABEL: func.func @bar() -> f6E3M2FN { +// CHECK: %[[CONSTANT_0:.*]] = arith.constant 3.000000e+00 : f6E3M2FN +// CHECK: return %[[CONSTANT_0]] : f6E3M2FN +// CHECK: } + +// Illustrate that both f8E4M3FN and f6E3M2FN calling the same _mlir_apfloat_add is fine +// because each gets its own semantics enum and gets bitcast/extui/trunci to its own width. +// CHECK-LABEL: func.func @full_example() { +// CHECK: %[[CONSTANT_0:.*]] = arith.constant 1.375000e+00 : f8E4M3FN +// CHECK: %[[VAL_0:.*]] = call @foo() : () -> f8E4M3FN +// CHECK: %[[BITCAST_0:.*]] = arith.bitcast %[[CONSTANT_0]] : f8E4M3FN to i8 +// CHECK: %[[EXTUI_0:.*]] = arith.extui %[[BITCAST_0]] : i8 to i64 +// CHECK: %[[BITCAST_1:.*]] = arith.bitcast %[[VAL_0]] : f8E4M3FN to i8 +// CHECK: %[[EXTUI_1:.*]] = arith.extui %[[BITCAST_1]] : i8 to i64 +// // fltSemantics semantics for f8E4M3FN +// CHECK: %[[CONSTANT_1:.*]] = arith.constant 10 : i32 +// CHECK: %[[VAL_1:.*]] = call @_mlir_apfloat_add(%[[CONSTANT_1]], %[[EXTUI_0]], %[[EXTUI_1]]) : (i32, i64, i64) -> i64 +// CHECK: %[[TRUNCI_0:.*]] = arith.trunci %[[VAL_1]] : i64 to i8 +// CHECK: %[[BITCAST_2:.*]] = arith.bitcast %[[TRUNCI_0]] : i8 to f8E4M3FN +// CHECK: vector.print %[[BITCAST_2]] : f8E4M3FN + +// CHECK: %[[CONSTANT_2:.*]] = arith.constant 2.500000e+00 : f6E3M2FN +// CHECK: %[[VAL_2:.*]] = call @bar() : () -> f6E3M2FN +// CHECK: %[[BITCAST_3:.*]] = arith.bitcast %[[CONSTANT_2]] : f6E3M2FN to i6 +// CHECK: %[[EXTUI_2:.*]] = arith.extui %[[BITCAST_3]] : i6 to i64 +// CHECK: %[[BITCAST_4:.*]] = arith.bitcast %[[VAL_2]] : f6E3M2FN to i6 +// CHECK: %[[EXTUI_3:.*]] = arith.extui %[[BITCAST_4]] : i6 to i64 +// // fltSemantics semantics for f6E3M2FN +// CHECK: %[[CONSTANT_3:.*]] = arith.constant 16 : i32 +// CHECK: %[[VAL_3:.*]] = call @_mlir_apfloat_add(%[[CONSTANT_3]], %[[EXTUI_2]], %[[EXTUI_3]]) : (i32, i64, i64) -> i64 +// CHECK: %[[TRUNCI_1:.*]] = arith.trunci %[[VAL_3]] : i64 to i6 +// CHECK: %[[BITCAST_5:.*]] = arith.bitcast %[[TRUNCI_1]] : i6 to f6E3M2FN +// CHECK: vector.print %[[BITCAST_5]] : f6E3M2FN +// CHECK: return +// CHECK: } + +// Put rhs into separate function so that it won't be constant-folded. +func.func @foo() -> f8E4M3FN { + %cst = arith.constant 2.2 : f8E4M3FN + return %cst : f8E4M3FN +} + +func.func @bar() -> f6E3M2FN { + %cst = arith.constant 3.2 : f6E3M2FN + return %cst : f6E3M2FN +} + +func.func @full_example() { + %a = arith.constant 1.4 : f8E4M3FN + %b = func.call @foo() : () -> (f8E4M3FN) + %c = arith.addf %a, %b : f8E4M3FN + vector.print %c : f8E4M3FN + + %d = arith.constant 2.4 : f6E3M2FN + %e = func.call @bar() : () -> (f6E3M2FN) + %f = arith.addf %d, %e : f6E3M2FN + vector.print %f : f6E3M2FN + return +} + +// ----- + +// CHECK: func.func private @_mlir_apfloat_add(i32, i64, i64) -> i64 +// CHECK: %[[sem:.*]] = arith.constant 18 : i32 +// CHECK: call @_mlir_apfloat_add(%[[sem]], %{{.*}}, %{{.*}}) : (i32, i64, i64) -> i64 +func.func @addf(%arg0: f4E2M1FN, %arg1: f4E2M1FN) { + %0 = arith.addf %arg0, %arg1 : f4E2M1FN + return +} + +// ----- + +// Test decl collision (different type) +// expected-error@+1{{matched function '_mlir_apfloat_add' but with different type: '(i32, i32, f32) -> index' (expected '(i32, i64, i64) -> i64')}} +func.func private @_mlir_apfloat_add(i32, i32, f32) -> index +func.func @addf(%arg0: f4E2M1FN, %arg1: f4E2M1FN) { + %0 = arith.addf %arg0, %arg1 : f4E2M1FN + return +} + +// ----- + +// CHECK: func.func private @_mlir_apfloat_subtract(i32, i64, i64) -> i64 +// CHECK: %[[sem:.*]] = arith.constant 18 : i32 +// CHECK: call @_mlir_apfloat_subtract(%[[sem]], %{{.*}}, %{{.*}}) : (i32, i64, i64) -> i64 +func.func @subf(%arg0: f4E2M1FN, %arg1: f4E2M1FN) { + %0 = arith.subf %arg0, %arg1 : f4E2M1FN + return +} + +// ----- + +// CHECK: func.func private @_mlir_apfloat_multiply(i32, i64, i64) -> i64 +// CHECK: %[[sem:.*]] = arith.constant 18 : i32 +// CHECK: call @_mlir_apfloat_multiply(%[[sem]], %{{.*}}, %{{.*}}) : (i32, i64, i64) -> i64 +func.func @subf(%arg0: f4E2M1FN, %arg1: f4E2M1FN) { + %0 = arith.mulf %arg0, %arg1 : f4E2M1FN + return +} + +// ----- + +// CHECK: func.func private @_mlir_apfloat_divide(i32, i64, i64) -> i64 +// CHECK: %[[sem:.*]] = arith.constant 18 : i32 +// CHECK: call @_mlir_apfloat_divide(%[[sem]], %{{.*}}, %{{.*}}) : (i32, i64, i64) -> i64 +func.func @subf(%arg0: f4E2M1FN, %arg1: f4E2M1FN) { + %0 = arith.divf %arg0, %arg1 : f4E2M1FN + return +} + +// ----- + +// CHECK: func.func private @_mlir_apfloat_remainder(i32, i64, i64) -> i64 +// CHECK: %[[sem:.*]] = arith.constant 18 : i32 +// CHECK: call @_mlir_apfloat_remainder(%[[sem]], %{{.*}}, %{{.*}}) : (i32, i64, i64) -> i64 +func.func @remf(%arg0: f4E2M1FN, %arg1: f4E2M1FN) { + %0 = arith.remf %arg0, %arg1 : f4E2M1FN + return +} diff --git a/mlir/test/Dialect/Tosa/ops.mlir b/mlir/test/Dialect/Tosa/ops.mlir index 22fde3b7d28a5..80886c31cb58f 100644 --- a/mlir/test/Dialect/Tosa/ops.mlir +++ b/mlir/test/Dialect/Tosa/ops.mlir @@ -1276,6 +1276,42 @@ func.func @test_matmul_t_block_scaled_mxint8(%arg0: tensor<4x8x32x!tosa.mxint8>, return %0 : tensor<4x8x16xf32> } +// ----- +// CHECK-LABEL: test_matmul_t_block_scaled_fp6e3m2_e2e +func.func @test_matmul_t_block_scaled_fp6e3m2_e2e(%arg0: tensor<6x2x32xf32>, %arg1: tensor<6x64x32xf32>) -> tensor<6x2x64xf32> { + %a, %sa = tosa.cast_to_block_scaled %arg0 {block_size = #tosa.block_size : i32} : (tensor<6x2x32xf32>) -> (tensor<6x2x32xf6E3M2FN>, tensor<6x2x1xf8E8M0FNU>) + %b, %sb = tosa.cast_to_block_scaled %arg1 {block_size = #tosa.block_size : i32} : (tensor<6x64x32xf32>) -> (tensor<6x64x32xf6E3M2FN>, tensor<6x64x1xf8E8M0FNU>) + %res = tosa.matmul_t_block_scaled %a, %sa, %b, %sb {block_size = #tosa.block_size : i32} : (tensor<6x2x32xf6E3M2FN>, tensor<6x2x1xf8E8M0FNU>, tensor<6x64x32xf6E3M2FN>, tensor<6x64x1xf8E8M0FNU>) -> tensor<6x2x64xf32> + return %res : tensor<6x2x64xf32> +} + +// ----- +// CHECK-LABEL: test_matmul_t_block_scaled_fp6e2m3_e2e +func.func @test_matmul_t_block_scaled_fp6e2m3_e2e(%arg0: tensor<6x2x32xf32>, %arg1: tensor<6x64x32xf32>) -> tensor<6x2x64xf32> { + %a, %sa = tosa.cast_to_block_scaled %arg0 {block_size = #tosa.block_size : i32} : (tensor<6x2x32xf32>) -> (tensor<6x2x32xf6E2M3FN>, tensor<6x2x1xf8E8M0FNU>) + %b, %sb = tosa.cast_to_block_scaled %arg1 {block_size = #tosa.block_size : i32} : (tensor<6x64x32xf32>) -> (tensor<6x64x32xf6E2M3FN>, tensor<6x64x1xf8E8M0FNU>) + %res = tosa.matmul_t_block_scaled %a, %sa, %b, %sb {block_size = #tosa.block_size : i32} : (tensor<6x2x32xf6E2M3FN>, tensor<6x2x1xf8E8M0FNU>, tensor<6x64x32xf6E2M3FN>, tensor<6x64x1xf8E8M0FNU>) -> tensor<6x2x64xf32> + return %res : tensor<6x2x64xf32> +} + +// ----- +// CHECK-LABEL: test_matmul_t_block_scaled_fp4e2m1_e2e +func.func @test_matmul_t_block_scaled_fp4e2m1_e2e(%arg0: tensor<6x2x32xf32>, %arg1: tensor<6x64x32xf32>) -> tensor<6x2x64xf32> { + %a, %sa = tosa.cast_to_block_scaled %arg0 {block_size = #tosa.block_size : i32} : (tensor<6x2x32xf32>) -> (tensor<6x2x32xf4E2M1FN>, tensor<6x2x1xf8E8M0FNU>) + %b, %sb = tosa.cast_to_block_scaled %arg1 {block_size = #tosa.block_size : i32} : (tensor<6x64x32xf32>) -> (tensor<6x64x32xf4E2M1FN>, tensor<6x64x1xf8E8M0FNU>) + %res = tosa.matmul_t_block_scaled %a, %sa, %b, %sb {block_size = #tosa.block_size : i32} : (tensor<6x2x32xf4E2M1FN>, tensor<6x2x1xf8E8M0FNU>, tensor<6x64x32xf4E2M1FN>, tensor<6x64x1xf8E8M0FNU>) -> tensor<6x2x64xf32> + return %res : tensor<6x2x64xf32> +} + +// ----- +// CHECK-LABEL: test_matmul_t_block_scaled_mxint8_e2e +func.func @test_matmul_t_block_scaled_mxint8_e2e(%arg0: tensor<6x2x32xf32>, %arg1: tensor<6x64x32xf32>) -> tensor<6x2x64xf32> { + %a, %sa = tosa.cast_to_block_scaled %arg0 {block_size = #tosa.block_size : i32} : (tensor<6x2x32xf32>) -> (tensor<6x2x32x!tosa.mxint8>, tensor<6x2x1xf8E8M0FNU>) + %b, %sb = tosa.cast_to_block_scaled %arg1 {block_size = #tosa.block_size : i32} : (tensor<6x64x32xf32>) -> (tensor<6x64x32x!tosa.mxint8>, tensor<6x64x1xf8E8M0FNU>) + %res = tosa.matmul_t_block_scaled %a, %sa, %b, %sb {block_size = #tosa.block_size : i32} : (tensor<6x2x32x!tosa.mxint8>, tensor<6x2x1xf8E8M0FNU>, tensor<6x64x32x!tosa.mxint8>, tensor<6x64x1xf8E8M0FNU>) -> tensor<6x2x64xf32> + return %res : tensor<6x2x64xf32> +} + // ----- // CHECK-LABEL: test_cast_from_block_scaled_static func.func @test_cast_from_block_scaled_static(%arg0: tensor<4x32xf4E2M1FN>, %arg1: tensor<4x1xf8E8M0FNU>) -> tensor<4x32xf32> { @@ -1307,7 +1343,7 @@ func.func @test_cast_to_block_scaled_unranked(%arg0: tensor<*xf32>) -> (tensor<* // ----- // CHECK-LABEL: test_cast_to_block_scaled_mxint8 func.func @test_cast_to_block_scaled_mxint8(%arg0: tensor<4x32xf32>) -> (tensor<4x32x!tosa.mxint8>, tensor<4x1xf8E8M0FNU>) { - %0:2 = tosa.cast_to_block_scaled %arg0 {block_size = #tosa.block_size : i32, stochastic_round = false} : (tensor<4x32xf32>) -> (tensor<4x32x!tosa.mxint8>, tensor<4x1xf8E8M0FNU>) + %0:2 = tosa.cast_to_block_scaled %arg0 {block_size = #tosa.block_size : i32} : (tensor<4x32xf32>) -> (tensor<4x32x!tosa.mxint8>, tensor<4x1xf8E8M0FNU>) return %0#0, %0#1 : tensor<4x32x!tosa.mxint8>, tensor<4x1xf8E8M0FNU> } diff --git a/mlir/test/Integration/Dialect/Arith/CPU/test-apfloat-emulation.mlir b/mlir/test/Integration/Dialect/Arith/CPU/test-apfloat-emulation.mlir new file mode 100644 index 0000000000000..2768afe0834b5 --- /dev/null +++ b/mlir/test/Integration/Dialect/Arith/CPU/test-apfloat-emulation.mlir @@ -0,0 +1,36 @@ +// Case 1: All floating-point arithmetics is lowered through APFloat. +// RUN: mlir-opt %s --convert-arith-to-apfloat --convert-to-llvm | \ +// RUN: mlir-runner -e entry --entry-point-result=void \ +// RUN: --shared-libs=%mlir_c_runner_utils \ +// RUN: --shared-libs=%mlir_apfloat_wrappers | FileCheck %s + +// Case 2: Only unsupported arithmetics (f8E4M3FN) is lowered through APFloat. +// Arithmetics on f32 is lowered directly to LLVM. +// RUN: mlir-opt %s --convert-to-llvm --convert-arith-to-apfloat \ +// RUN: --convert-to-llvm --reconcile-unrealized-casts | \ +// RUN: mlir-runner -e entry --entry-point-result=void \ +// RUN: --shared-libs=%mlir_c_runner_utils \ +// RUN: --shared-libs=%mlir_apfloat_wrappers | FileCheck %s + +// Put rhs into separate function so that it won't be constant-folded. +func.func @foo() -> (f8E4M3FN, f32) { + %cst1 = arith.constant 2.2 : f8E4M3FN + %cst2 = arith.constant 2.2 : f32 + return %cst1, %cst2 : f8E4M3FN, f32 +} + +func.func @entry() { + %a1 = arith.constant 1.4 : f8E4M3FN + %a2 = arith.constant 1.4 : f32 + %b1, %b2 = func.call @foo() : () -> (f8E4M3FN, f32) + %c1 = arith.addf %a1, %b1 : f8E4M3FN // not supported by LLVM + %c2 = arith.addf %a2, %b2 : f32 // supported by LLVM + + // CHECK: 3.5 + vector.print %c1 : f8E4M3FN + + // CHECK: 3.6 + vector.print %c2 : f32 + + return +} diff --git a/mlir/test/lit.cfg.py b/mlir/test/lit.cfg.py index 6ff12d66523f5..4a38ed605be0c 100644 --- a/mlir/test/lit.cfg.py +++ b/mlir/test/lit.cfg.py @@ -208,6 +208,7 @@ def find_real_python_interpreter(): add_runtime("mlir_c_runner_utils"), add_runtime("mlir_async_runtime"), add_runtime("mlir_float16_utils"), + add_runtime("mlir_apfloat_wrappers"), "mlir-linalg-ods-yaml-gen", "mlir-reduce", "mlir-pdll", diff --git a/mlir/test/python/ir/operation.py b/mlir/test/python/ir/operation.py index f5fa4dad856f8..1bdd345d98c05 100644 --- a/mlir/test/python/ir/operation.py +++ b/mlir/test/python/ir/operation.py @@ -2,12 +2,12 @@ import gc import io -import itertools from tempfile import NamedTemporaryFile from mlir.ir import * from mlir.dialects.builtin import ModuleOp -from mlir.dialects import arith +from mlir.dialects import arith, func, scf from mlir.dialects._ods_common import _cext +from mlir.extras import types as T def run(f): @@ -1199,3 +1199,27 @@ def testGetOwnerConcreteOpview(): r = arith.AddIOp(a, a, overflowFlags=arith.IntegerOverflowFlags.nsw) for u in a.result.uses: assert isinstance(u.owner, arith.AddIOp) + + +# CHECK-LABEL: TEST: testIndexSwitch +@run +def testIndexSwitch(): + with Context() as ctx, Location.unknown(): + i32 = T.i32() + module = Module.create() + with InsertionPoint(module.body): + + @func.FuncOp.from_py_func(T.index()) + def index_switch(index): + c1 = arith.constant(i32, 1) + switch_op = scf.IndexSwitchOp( + results_=[i32], arg=index, cases=range(3), num_caseRegions=3 + ) + + assert len(switch_op.regions) == 4 + assert len(switch_op.regions[2:]) == 2 + assert len([i for i in switch_op.regions[2:]]) == 2 + assert len(switch_op.caseRegions) == 3 + assert len([i for i in switch_op.caseRegions]) == 3 + assert len(switch_op.caseRegions[1:]) == 2 + assert len([i for i in switch_op.caseRegions[1:]]) == 2 diff --git a/mlir/tools/tblgen-to-irdl/tblgen-to-irdl.cpp b/mlir/tools/tblgen-to-irdl/tblgen-to-irdl.cpp index 092ec2ebe81a6..33421b4e2ddd6 100644 --- a/mlir/tools/tblgen-to-irdl/tblgen-to-irdl.cpp +++ b/mlir/tools/tblgen-to-irdl/tblgen-to-irdl.cpp @@ -18,10 +18,11 @@ using namespace llvm; using namespace mlir; // Generator that prints records. -GenRegistration printRecords("print-records", "Print all records to stdout", - [](const RecordKeeper &records, raw_ostream &os) { - os << records; - return false; - }); +static GenRegistration + printRecords("print-records", "Print all records to stdout", + [](const RecordKeeper &records, raw_ostream &os) { + os << records; + return false; + }); int main(int argc, char **argv) { return MlirTblgenMain(argc, argv); } diff --git a/revert_patches.txt b/revert_patches.txt index 6be344f862546..b88b846f64b68 100644 --- a/revert_patches.txt +++ b/revert_patches.txt @@ -8,6 +8,3 @@ breaks build of ROCmValidationSuite breaks fortran declare-target-link1 [OMPIRBuilder] Fix addrspace of internal critical section lock (#166459 --- -complicated to land parallel-EQ -Reland "[clang] Refactor option-related code from clangDriver into new clangOptions library" (#167374) ---- diff --git a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel index 4ee23d19abf12..deb56dc0957e9 100644 --- a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel @@ -1502,9 +1502,9 @@ cc_library( gentbl_cc_library( name = "driver_options_inc_gen", - tbl_outs = {"include/clang/Driver/Options.inc": ["-gen-opt-parser-defs"]}, + tbl_outs = {"include/clang/Options/Options.inc": ["-gen-opt-parser-defs"]}, tblgen = "//llvm:llvm-tblgen", - td_file = "include/clang/Driver/Options.td", + td_file = "include/clang/Options/Options.td", deps = ["//llvm:OptParserTdFiles"], ) diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 7066f498c7d49..674a3ec8e18ed 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -3881,6 +3881,7 @@ cc_library( includes = ["include"], deps = [ ":DialectUtils", + ":GPUDialect", ":IR", ":SCFDialect", ":TransformDialect",