diff --git a/.ci/generate_test_report_github.py b/.ci/generate_test_report_github.py index 6785e82f3440b..08387de817467 100644 --- a/.ci/generate_test_report_github.py +++ b/.ci/generate_test_report_github.py @@ -8,6 +8,7 @@ import generate_test_report_lib + def compute_platform_title() -> str: logo = ":window:" if platform.system() == "Windows" else ":penguin:" # On Linux the machine value is x86_64 on Windows it is AMD64. diff --git a/.ci/generate_test_report_lib.py b/.ci/generate_test_report_lib.py index 7820fbda803d7..0c025c561f6f7 100644 --- a/.ci/generate_test_report_lib.py +++ b/.ci/generate_test_report_lib.py @@ -100,6 +100,7 @@ def _format_ninja_failures(ninja_failures: list[tuple[str, str]]) -> list[str]: ) return output + def get_failures(junit_objects) -> dict[str, list[tuple[str, str]]]: failures = {} for results in junit_objects: diff --git a/bolt/lib/Core/Relocation.cpp b/bolt/lib/Core/Relocation.cpp index 4b827b647b06c..f872db2cae0ce 100644 --- a/bolt/lib/Core/Relocation.cpp +++ b/bolt/lib/Core/Relocation.cpp @@ -1018,41 +1018,15 @@ void Relocation::print(raw_ostream &OS) const { default: OS << "RType:" << Twine::utohexstr(Type); break; - - case Triple::aarch64: { - static const char *const AArch64RelocNames[] = { -#define ELF_RELOC(name, value) #name, -#include "llvm/BinaryFormat/ELFRelocs/AArch64.def" -#undef ELF_RELOC - }; - assert(Type < ArrayRef(AArch64RelocNames).size()); - OS << AArch64RelocNames[Type]; - } break; - + case Triple::aarch64: + OS << object::getELFRelocationTypeName(ELF::EM_AARCH64, Type); + break; case Triple::riscv64: - // RISC-V relocations are not sequentially numbered so we cannot use an - // array - switch (Type) { - default: - llvm_unreachable("illegal RISC-V relocation"); -#define ELF_RELOC(name, value) \ - case value: \ - OS << #name; \ + OS << object::getELFRelocationTypeName(ELF::EM_RISCV, Type); break; -#include "llvm/BinaryFormat/ELFRelocs/RISCV.def" -#undef ELF_RELOC - } + case Triple::x86_64: + OS << object::getELFRelocationTypeName(ELF::EM_X86_64, Type); break; - - case Triple::x86_64: { - static const char *const X86RelocNames[] = { -#define ELF_RELOC(name, value) #name, -#include "llvm/BinaryFormat/ELFRelocs/x86_64.def" -#undef ELF_RELOC - }; - assert(Type < ArrayRef(X86RelocNames).size()); - OS << X86RelocNames[Type]; - } break; } OS << ", 0x" << Twine::utohexstr(Offset); if (Symbol) { diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp index 57db6a436c5c6..3c77091d91ebd 100644 --- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp +++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp @@ -640,7 +640,8 @@ class AArch64MCPlusBuilder : public MCPlusBuilder { Insts[1].addOperand(MCOperand::createImm(0)); Insts[1].addOperand(MCOperand::createImm(0)); setOperandToSymbolRef(Insts[1], /* OpNum */ 2, Target, 0, Ctx, - ELF::R_AARCH64_ADD_ABS_LO12_NC); + isLDRXl(LDRInst) ? ELF::R_AARCH64_LDST64_ABS_LO12_NC + : ELF::R_AARCH64_LDST32_ABS_LO12_NC); return Insts; } diff --git a/bolt/test/AArch64/relocation-type-print.s b/bolt/test/AArch64/relocation-type-print.s new file mode 100644 index 0000000000000..111cbbb94bc54 --- /dev/null +++ b/bolt/test/AArch64/relocation-type-print.s @@ -0,0 +1,24 @@ +## Verify that llvm-bolt correctly prints relocation types. + +# REQUIRES: system-linux + +# RUN: %clang %cflags -nostartfiles %s -o %t.exe -Wl,-q,--no-relax +# RUN: llvm-bolt %t.exe --print-cfg --print-relocations -o %t.bolt \ +# RUN: | FileCheck %s + + .section .text + .align 4 + .globl _start + .type _start, %function +_start: + + adrp x0, _start +# CHECK: adrp +# CHECK-SAME: R_AARCH64_ADR_PREL_PG_HI21 + + add x0, x0, :lo12:_start +# CHECK-NEXT: add +# CHECK-SAME: R_AARCH64_ADD_ABS_LO12_NC + + ret + .size _start, .-_start diff --git a/bolt/test/runtime/AArch64/inline-memcpy.s b/bolt/test/runtime/AArch64/inline-memcpy.s index badff299603a0..75066c855b9ed 100644 --- a/bolt/test/runtime/AArch64/inline-memcpy.s +++ b/bolt/test/runtime/AArch64/inline-memcpy.s @@ -81,14 +81,14 @@ # CHECK-ASM: bl{{.*}}: +# CHECK-ASM-LABEL: : # CHECK-ASM: bl{{.*}}: +# CHECK-ASM-LABEL: : # CHECK-ASM: bl{{.*}}: +# CHECK-ASM-LABEL: : # CHECK-ASM: bl{{.*}} { /// before calling this method. unsigned getColumnNumber(FileID FID, unsigned FilePos, bool *Invalid = nullptr) const; + unsigned getColumnNumber(SourceLocation Loc, bool *Invalid = nullptr) const; unsigned getSpellingColumnNumber(SourceLocation Loc, - bool *Invalid = nullptr) const; + bool *Invalid = nullptr) const { + return getColumnNumber(getSpellingLoc(Loc), Invalid); + } unsigned getExpansionColumnNumber(SourceLocation Loc, - bool *Invalid = nullptr) const; + bool *Invalid = nullptr) const { + return getColumnNumber(getExpansionLoc(Loc), Invalid); + } unsigned getPresumedColumnNumber(SourceLocation Loc, bool *Invalid = nullptr) const; @@ -1423,8 +1428,15 @@ class SourceManager : public RefCountedBase { /// MemoryBuffer, so this is not cheap: use only when about to emit a /// diagnostic. unsigned getLineNumber(FileID FID, unsigned FilePos, bool *Invalid = nullptr) const; - unsigned getSpellingLineNumber(SourceLocation Loc, bool *Invalid = nullptr) const; - unsigned getExpansionLineNumber(SourceLocation Loc, bool *Invalid = nullptr) const; + unsigned getLineNumber(SourceLocation Loc, bool *Invalid = nullptr) const; + unsigned getSpellingLineNumber(SourceLocation Loc, + bool *Invalid = nullptr) const { + return getLineNumber(getSpellingLoc(Loc), Invalid); + } + unsigned getExpansionLineNumber(SourceLocation Loc, + bool *Invalid = nullptr) const { + return getLineNumber(getExpansionLoc(Loc), Invalid); + } unsigned getPresumedLineNumber(SourceLocation Loc, bool *Invalid = nullptr) const; /// Return the filename or buffer identifier of the buffer the diff --git a/clang/lib/Basic/IdentifierTable.cpp b/clang/lib/Basic/IdentifierTable.cpp index 4a2b77cd16bfc..d1c959b9687c4 100644 --- a/clang/lib/Basic/IdentifierTable.cpp +++ b/clang/lib/Basic/IdentifierTable.cpp @@ -77,57 +77,6 @@ IdentifierTable::IdentifierTable(const LangOptions &LangOpts, // Language Keyword Implementation //===----------------------------------------------------------------------===// -// Constants for TokenKinds.def -namespace { - -enum TokenKey : unsigned { - KEYC99 = 0x1, - KEYCXX = 0x2, - KEYCXX11 = 0x4, - KEYGNU = 0x8, - KEYMS = 0x10, - BOOLSUPPORT = 0x20, - KEYALTIVEC = 0x40, - KEYNOCXX = 0x80, - KEYBORLAND = 0x100, - KEYOPENCLC = 0x200, - KEYC23 = 0x400, - KEYNOMS18 = 0x800, - KEYNOOPENCL = 0x1000, - WCHARSUPPORT = 0x2000, - HALFSUPPORT = 0x4000, - CHAR8SUPPORT = 0x8000, - KEYOBJC = 0x10000, - KEYZVECTOR = 0x20000, - KEYCOROUTINES = 0x40000, - KEYMODULES = 0x80000, - KEYCXX20 = 0x100000, - KEYOPENCLCXX = 0x200000, - KEYMSCOMPAT = 0x400000, - KEYSYCL = 0x800000, - KEYCUDA = 0x1000000, - KEYZOS = 0x2000000, - KEYNOZOS = 0x4000000, - KEYHLSL = 0x8000000, - KEYFIXEDPOINT = 0x10000000, - KEYMAX = KEYFIXEDPOINT, // The maximum key - KEYALLCXX = KEYCXX | KEYCXX11 | KEYCXX20, - KEYALL = (KEYMAX | (KEYMAX - 1)) & ~KEYNOMS18 & ~KEYNOOPENCL & - ~KEYNOZOS // KEYNOMS18, KEYNOOPENCL, KEYNOZOS are excluded. -}; - -/// How a keyword is treated in the selected standard. This enum is ordered -/// intentionally so that the value that 'wins' is the most 'permissive'. -enum KeywordStatus { - KS_Unknown, // Not yet calculated. Used when figuring out the status. - KS_Disabled, // Disabled - KS_Future, // Is a keyword in future standard - KS_Extension, // Is an extension - KS_Enabled, // Enabled -}; - -} // namespace - // This works on a single TokenKey flag and checks the LangOpts to get the // KeywordStatus based exclusively on this flag, so that it can be merged in // getKeywordStatus. Most should be enabled/disabled, but some might imply @@ -220,9 +169,7 @@ static KeywordStatus getKeywordStatusHelper(const LangOptions &LangOpts, } } -/// Translates flags as specified in TokenKinds.def into keyword status -/// in the given language standard. -static KeywordStatus getKeywordStatus(const LangOptions &LangOpts, +KeywordStatus clang::getKeywordStatus(const LangOptions &LangOpts, unsigned Flags) { // KEYALL means always enabled, so special case this one. if (Flags == KEYALL) return KS_Enabled; diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp index 7dc81c50f87a2..b6cc6ec9365f5 100644 --- a/clang/lib/Basic/SourceManager.cpp +++ b/clang/lib/Basic/SourceManager.cpp @@ -1159,17 +1159,11 @@ static bool isInvalid(LocType Loc, bool *Invalid) { return MyInvalid; } -unsigned SourceManager::getSpellingColumnNumber(SourceLocation Loc, - bool *Invalid) const { - if (isInvalid(Loc, Invalid)) return 0; - FileIDAndOffset LocInfo = getDecomposedSpellingLoc(Loc); - return getColumnNumber(LocInfo.first, LocInfo.second, Invalid); -} - -unsigned SourceManager::getExpansionColumnNumber(SourceLocation Loc, - bool *Invalid) const { +unsigned SourceManager::getColumnNumber(SourceLocation Loc, + bool *Invalid) const { + assert(Loc.isFileID()); if (isInvalid(Loc, Invalid)) return 0; - FileIDAndOffset LocInfo = getDecomposedExpansionLoc(Loc); + FileIDAndOffset LocInfo = getDecomposedLoc(Loc); return getColumnNumber(LocInfo.first, LocInfo.second, Invalid); } @@ -1367,18 +1361,13 @@ unsigned SourceManager::getLineNumber(FileID FID, unsigned FilePos, return LineNo; } -unsigned SourceManager::getSpellingLineNumber(SourceLocation Loc, - bool *Invalid) const { - if (isInvalid(Loc, Invalid)) return 0; - FileIDAndOffset LocInfo = getDecomposedSpellingLoc(Loc); - return getLineNumber(LocInfo.first, LocInfo.second); -} -unsigned SourceManager::getExpansionLineNumber(SourceLocation Loc, - bool *Invalid) const { +unsigned SourceManager::getLineNumber(SourceLocation Loc, bool *Invalid) const { + assert(Loc.isFileID()); if (isInvalid(Loc, Invalid)) return 0; - FileIDAndOffset LocInfo = getDecomposedExpansionLoc(Loc); + FileIDAndOffset LocInfo = getDecomposedLoc(Loc); return getLineNumber(LocInfo.first, LocInfo.second); } + unsigned SourceManager::getPresumedLineNumber(SourceLocation Loc, bool *Invalid) const { PresumedLoc PLoc = getPresumedLoc(Loc); diff --git a/clang/lib/Format/UnwrappedLineFormatter.cpp b/clang/lib/Format/UnwrappedLineFormatter.cpp index ac9c81d4416c9..d31d656a63fc5 100644 --- a/clang/lib/Format/UnwrappedLineFormatter.cpp +++ b/clang/lib/Format/UnwrappedLineFormatter.cpp @@ -285,7 +285,8 @@ class LineJoiner { if (Tok && Tok->is(tok::kw_typedef)) Tok = Tok->getNextNonComment(); if (Tok && Tok->isOneOf(tok::kw_class, tok::kw_struct, tok::kw_union, - tok::kw_extern, Keywords.kw_interface)) { + tok::kw_extern, Keywords.kw_interface, + Keywords.kw_record)) { return !Style.BraceWrapping.SplitEmptyRecord && EmptyBlock ? tryMergeSimpleBlock(I, E, Limit) : 0; @@ -498,7 +499,8 @@ class LineJoiner { ShouldMerge = Style.AllowShortEnumsOnASingleLine; } else if (TheLine->Last->is(TT_CompoundRequirementLBrace)) { ShouldMerge = Style.AllowShortCompoundRequirementOnASingleLine; - } else if (TheLine->Last->isOneOf(TT_ClassLBrace, TT_StructLBrace)) { + } else if (TheLine->Last->isOneOf(TT_ClassLBrace, TT_StructLBrace, + TT_RecordLBrace)) { // NOTE: We use AfterClass (whereas AfterStruct exists) for both classes // and structs, but it seems that wrapping is still handled correctly // elsewhere. @@ -507,7 +509,7 @@ class LineJoiner { !Style.BraceWrapping.SplitEmptyRecord); } else if (TheLine->InPPDirective || TheLine->First->isNoneOf(tok::kw_class, tok::kw_enum, - tok::kw_struct)) { + tok::kw_struct, Keywords.kw_record)) { // Try to merge a block with left brace unwrapped that wasn't yet // covered. ShouldMerge = !Style.BraceWrapping.AfterFunction || diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index 5e2584edac8f4..8b7dd02d548af 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -948,7 +948,11 @@ static bool isIIFE(const UnwrappedLine &Line, } static bool ShouldBreakBeforeBrace(const FormatStyle &Style, - const FormatToken &InitialToken) { + const FormatToken &InitialToken, + const bool IsJavaRecord) { + if (IsJavaRecord) + return Style.BraceWrapping.AfterClass; + tok::TokenKind Kind = InitialToken.Tok.getKind(); if (InitialToken.is(TT_NamespaceMacro)) Kind = tok::kw_namespace; @@ -3200,7 +3204,7 @@ void UnwrappedLineParser::parseNamespace() { if (FormatTok->is(tok::l_brace)) { FormatTok->setFinalizedType(TT_NamespaceLBrace); - if (ShouldBreakBeforeBrace(Style, InitialToken)) + if (ShouldBreakBeforeBrace(Style, InitialToken, /*IsJavaRecord=*/false)) addUnwrappedLine(); unsigned AddLevels = @@ -3865,7 +3869,7 @@ bool UnwrappedLineParser::parseEnum() { } if (!Style.AllowShortEnumsOnASingleLine && - ShouldBreakBeforeBrace(Style, InitialToken)) { + ShouldBreakBeforeBrace(Style, InitialToken, /*IsJavaRecord=*/false)) { addUnwrappedLine(); } // Parse enum body. @@ -4160,7 +4164,7 @@ void UnwrappedLineParser::parseRecord(bool ParseAsExpr, bool IsJavaRecord) { if (ParseAsExpr) { parseChildBlock(); } else { - if (ShouldBreakBeforeBrace(Style, InitialToken)) + if (ShouldBreakBeforeBrace(Style, InitialToken, IsJavaRecord)) addUnwrappedLine(); unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u; diff --git a/clang/lib/Sema/SemaFunctionEffects.cpp b/clang/lib/Sema/SemaFunctionEffects.cpp index 4b63eb7df1054..12cc02965e7d3 100644 --- a/clang/lib/Sema/SemaFunctionEffects.cpp +++ b/clang/lib/Sema/SemaFunctionEffects.cpp @@ -1302,6 +1302,14 @@ class Analyzer { return true; } + bool TraverseCXXRecordDecl(CXXRecordDecl *D) override { + // Completely skip local struct/class/union declarations since their + // methods would otherwise be incorrectly interpreted as part of the + // function we are currently traversing. The initial Sema pass will have + // already recorded any nonblocking methods needing analysis. + return true; + } + bool TraverseConstructorInitializer(CXXCtorInitializer *Init) override { ViolationSite PrevVS = VSite; if (Init->isAnyMemberInitializer()) diff --git a/clang/test/Sema/attr-nonblocking-constraints.cpp b/clang/test/Sema/attr-nonblocking-constraints.cpp index 881e816292d59..012c017798a1f 100644 --- a/clang/test/Sema/attr-nonblocking-constraints.cpp +++ b/clang/test/Sema/attr-nonblocking-constraints.cpp @@ -104,6 +104,25 @@ void nb8c() }; } +void nb8d() [[clang::nonblocking]] +{ + // Blocking methods of a local CXXRecordDecl do not generate diagnostics + // for the outer function. + struct F1 { + void method() { void* ptr = new int; } + }; + + // Skipping the CXXRecordDecl does not skip a following VarDecl. + struct F2 { + F2() { void* ptr = new int; } // expected-note {{constructor cannot be inferred 'nonblocking' because it allocates or deallocates memory}} + } f2; // expected-warning {{function with 'nonblocking' attribute must not call non-'nonblocking' constructor 'nb8d()::F2::F2'}} + + // Nonblocking methods of a local CXXRecordDecl are verified independently. + struct F3 { + void method() [[clang::nonblocking]] { void* ptr = new int; }// expected-warning {{function with 'nonblocking' attribute must not allocate or deallocate memory}} + }; +} + // Make sure template expansions are found and verified. template struct Adder { diff --git a/clang/test/SemaHIP/builtins-amdgcn-raw-buffer-atomic-add.hip b/clang/test/SemaHIP/builtins-amdgcn-raw-buffer-atomic-add.hip index 8ee64d486f4f4..fea86162c801d 100644 --- a/clang/test/SemaHIP/builtins-amdgcn-raw-buffer-atomic-add.hip +++ b/clang/test/SemaHIP/builtins-amdgcn-raw-buffer-atomic-add.hip @@ -14,5 +14,9 @@ __device__ void test_raw_ptr_atomics(__amdgpu_buffer_rsrc_t rsrc, int i32, float __device__ void test_raw_ptr_atomics_err(__amdgpu_buffer_rsrc_t rsrc, int i32, float f32, float16x2_t v2f16, int offset, int soffset) { i32 = __builtin_amdgcn_raw_ptr_buffer_atomic_add_i32(i32, rsrc, offset, soffset, 0, 4); // expected-error{{too many arguments to function call}} f32 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32(f32, rsrc, offset, soffset, 0, 4); // expected-error{{too many arguments to function call}} - v2f16 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16(v2f16, rsrc, offset, soffset, 0, 4); + v2f16 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16(v2f16, rsrc, offset, soffset, 0, 4); // expected-error{{too many arguments to function call}} +} + +__device__ void test_raw_ptr_atomics_f16_retty(__amdgpu_buffer_rsrc_t rsrc, int i32, float f32, float16x2_t v2f16, int offset, int soffset) { + v2f16 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16(v2f16, rsrc, offset, soffset, 0); } diff --git a/clang/unittests/Format/FormatTestJava.cpp b/clang/unittests/Format/FormatTestJava.cpp index 1416614bae29a..3cc97e2dc0b2e 100644 --- a/clang/unittests/Format/FormatTestJava.cpp +++ b/clang/unittests/Format/FormatTestJava.cpp @@ -848,6 +848,19 @@ TEST_F(FormatTestJava, TextBlock) { " Pat Q. Smith"); } +TEST_F(FormatTestJava, BreakAfterRecord) { + auto Style = getLLVMStyle(FormatStyle::LK_Java); + Style.EmptyLineBeforeAccessModifier = FormatStyle::ELBAMS_Never; + Style.BreakBeforeBraces = FormatStyle::BS_Custom; + Style.BraceWrapping.AfterClass = true; + Style.BraceWrapping.SplitEmptyRecord = true; + + verifyFormat("public record Foo(int i)\n" + "{\n" + "}", + "public record Foo(int i) {}", Style); +} + } // namespace } // namespace test } // namespace format diff --git a/flang/examples/FeatureList/FeatureList.cpp b/flang/examples/FeatureList/FeatureList.cpp index ef58da61e371b..bb55a8163d938 100644 --- a/flang/examples/FeatureList/FeatureList.cpp +++ b/flang/examples/FeatureList/FeatureList.cpp @@ -348,6 +348,7 @@ struct NodeVisitor { READ_FEATURE(TeamValue) READ_FEATURE(ImageSelector) READ_FEATURE(ImageSelectorSpec) + READ_FEATURE(ImageSelectorSpec::Notify) READ_FEATURE(ImageSelectorSpec::Stat) READ_FEATURE(ImageSelectorSpec::Team_Number) READ_FEATURE(ImplicitPart) diff --git a/flang/include/flang/Evaluate/traverse.h b/flang/include/flang/Evaluate/traverse.h index 48aafa8982559..d63c16f93230a 100644 --- a/flang/include/flang/Evaluate/traverse.h +++ b/flang/include/flang/Evaluate/traverse.h @@ -146,7 +146,7 @@ class Traverse { return Combine(x.base(), x.subscript()); } Result operator()(const CoarrayRef &x) const { - return Combine(x.base(), x.cosubscript(), x.stat(), x.team()); + return Combine(x.base(), x.cosubscript(), x.notify(), x.stat(), x.team()); } Result operator()(const DataRef &x) const { return visitor_(x.u); } Result operator()(const Substring &x) const { diff --git a/flang/include/flang/Evaluate/variable.h b/flang/include/flang/Evaluate/variable.h index 5c14421fd3a1b..4f64ede3d407d 100644 --- a/flang/include/flang/Evaluate/variable.h +++ b/flang/include/flang/Evaluate/variable.h @@ -260,6 +260,9 @@ class CoarrayRef { // it's TEAM=. std::optional> team() const; CoarrayRef &set_team(Expr &&); + // When notify() is Expr, it's NOTIFY=. + std::optional> notify() const; + CoarrayRef &set_notify(Expr &&); int Rank() const; int Corank() const { return 0; } @@ -272,6 +275,7 @@ class CoarrayRef { private: common::CopyableIndirection base_; std::vector> cosubscript_; + std::optional>> notify_; std::optional>> stat_; std::optional>> team_; }; diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h index de2716410d6cd..b2424023b0168 100644 --- a/flang/include/flang/Parser/dump-parse-tree.h +++ b/flang/include/flang/Parser/dump-parse-tree.h @@ -387,6 +387,7 @@ class ParseTreeDumper { NODE(parser, TeamValue) NODE(parser, ImageSelector) NODE(parser, ImageSelectorSpec) + NODE(ImageSelectorSpec, Notify) NODE(ImageSelectorSpec, Stat) NODE(ImageSelectorSpec, Team_Number) NODE(parser, ImplicitPart) diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index 8c7578f7a1941..32e444fbb2e6c 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -1684,13 +1684,15 @@ using Cosubscript = ScalarIntExpr; WRAPPER_CLASS(TeamValue, Scalar>); // R926 image-selector-spec -> +// NOTIFY = notify-variable | // STAT = stat-variable | TEAM = team-value | // TEAM_NUMBER = scalar-int-expr struct ImageSelectorSpec { WRAPPER_CLASS(Stat, Scalar>>); WRAPPER_CLASS(Team_Number, ScalarIntExpr); + WRAPPER_CLASS(Notify, Scalar>); UNION_CLASS_BOILERPLATE(ImageSelectorSpec); - std::variant u; + std::variant u; }; // R924 image-selector -> diff --git a/flang/include/flang/Semantics/tools.h b/flang/include/flang/Semantics/tools.h index 8a7b9867c0979..1c3477013b559 100644 --- a/flang/include/flang/Semantics/tools.h +++ b/flang/include/flang/Semantics/tools.h @@ -107,6 +107,7 @@ bool IsBindCProcedure(const Scope &); // Returns a pointer to the function's symbol when true, else null const Symbol *IsFunctionResultWithSameNameAsFunction(const Symbol &); bool IsOrContainsEventOrLockComponent(const Symbol &); +bool IsOrContainsNotifyComponent(const Symbol &); bool CanBeTypeBoundProc(const Symbol &); // Does a non-PARAMETER symbol have explicit initialization with =value or // =>target in its declaration (but not in a DATA statement)? (Being @@ -652,6 +653,8 @@ using PotentialAndPointerComponentIterator = // dereferenced. PotentialComponentIterator::const_iterator FindEventOrLockPotentialComponent( const DerivedTypeSpec &, bool ignoreCoarrays = false); +PotentialComponentIterator::const_iterator FindNotifyPotentialComponent( + const DerivedTypeSpec &, bool ignoreCoarrays = false); PotentialComponentIterator::const_iterator FindCoarrayPotentialComponent( const DerivedTypeSpec &); PotentialAndPointerComponentIterator::const_iterator diff --git a/flang/lib/Evaluate/variable.cpp b/flang/lib/Evaluate/variable.cpp index b9b34d4d5bc89..b257dad42fc58 100644 --- a/flang/lib/Evaluate/variable.cpp +++ b/flang/lib/Evaluate/variable.cpp @@ -89,6 +89,14 @@ std::optional> CoarrayRef::team() const { } } +std::optional> CoarrayRef::notify() const { + if (notify_) { + return notify_.value().value(); + } else { + return std::nullopt; + } +} + CoarrayRef &CoarrayRef::set_stat(Expr &&v) { CHECK(IsVariable(v)); stat_.emplace(std::move(v)); @@ -100,6 +108,11 @@ CoarrayRef &CoarrayRef::set_team(Expr &&v) { return *this; } +CoarrayRef &CoarrayRef::set_notify(Expr &&v) { + notify_.emplace(std::move(v)); + return *this; +} + const Symbol &CoarrayRef::GetFirstSymbol() const { return base().GetFirstSymbol(); } diff --git a/flang/lib/Lower/Support/Utils.cpp b/flang/lib/Lower/Support/Utils.cpp index cb3090df25680..605264dfcbe85 100644 --- a/flang/lib/Lower/Support/Utils.cpp +++ b/flang/lib/Lower/Support/Utils.cpp @@ -84,7 +84,7 @@ class HashEvaluateExpr { x.cosubscript()) cosubs -= getHashValue(v); return getHashValue(x.base()) * 97u - cosubs + getHashValue(x.stat()) + - 257u + getHashValue(x.team()); + 257u + getHashValue(x.team()) + getHashValue(x.notify()); } static unsigned getHashValue(const Fortran::evaluate::NamedEntity &x) { if (x.IsSymbol()) @@ -343,7 +343,8 @@ class IsEqualEvaluateExpr { const Fortran::evaluate::CoarrayRef &y) { return isEqual(x.base(), y.base()) && isEqual(x.cosubscript(), y.cosubscript()) && - isEqual(x.stat(), y.stat()) && isEqual(x.team(), y.team()); + isEqual(x.stat(), y.stat()) && isEqual(x.team(), y.team()) && + isEqual(x.notify(), y.notify()); } static bool isEqual(const Fortran::evaluate::NamedEntity &x, const Fortran::evaluate::NamedEntity &y) { diff --git a/flang/lib/Parser/Fortran-parsers.cpp b/flang/lib/Parser/Fortran-parsers.cpp index 59fe7d813d96a..ea6a1eada2741 100644 --- a/flang/lib/Parser/Fortran-parsers.cpp +++ b/flang/lib/Parser/Fortran-parsers.cpp @@ -1212,12 +1212,15 @@ TYPE_CONTEXT_PARSER("image selector"_en_US, // R926 image-selector-spec -> // STAT = stat-variable | TEAM = team-value | -// TEAM_NUMBER = scalar-int-expr +// TEAM_NUMBER = scalar-int-expr | +// NOTIFY = notify-variable TYPE_PARSER(construct(construct( "STAT =" >> scalar(integer(indirect(variable))))) || construct(construct("TEAM =" >> teamValue)) || construct(construct( - "TEAM_NUMBER =" >> scalarIntExpr))) + "TEAM_NUMBER =" >> scalarIntExpr)) || + construct(construct( + "NOTIFY =" >> scalar(indirect(variable))))) // R927 allocate-stmt -> // ALLOCATE ( [type-spec ::] allocation-list [, alloc-opt-list] ) diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp index 84123030195e9..6bb14a43e7b99 100644 --- a/flang/lib/Parser/unparse.cpp +++ b/flang/lib/Parser/unparse.cpp @@ -819,6 +819,7 @@ class UnparseVisitor { Word("TEAM="); } } + void Before(const ImageSelectorSpec::Notify &) { Word("NOTIFY="); } void Unparse(const AllocateStmt &x) { // R927 Word("ALLOCATE("); Walk(std::get>(x.t), "::"); diff --git a/flang/lib/Semantics/check-declarations.cpp b/flang/lib/Semantics/check-declarations.cpp index de407d3b1e125..9a6b3ff3cdc2c 100644 --- a/flang/lib/Semantics/check-declarations.cpp +++ b/flang/lib/Semantics/check-declarations.cpp @@ -855,6 +855,15 @@ void CheckHelper::CheckObjectEntity( messages_.Say( "Variable '%s' with EVENT_TYPE or LOCK_TYPE potential component '%s' must be a coarray"_err_en_US, symbol.name(), component.BuildResultDesignatorName()); + } else if (IsNotifyType(derived)) { // C1612 + messages_.Say( + "Variable '%s' with NOTIFY_TYPE must be a coarray"_err_en_US, + symbol.name()); + } else if (auto component{FindNotifyPotentialComponent( // C1611 + *derived, /*ignoreCoarrays=*/true)}) { + messages_.Say( + "Variable '%s' with NOTIFY_TYPE potential component '%s' must be a coarray"_err_en_US, + symbol.name(), component.BuildResultDesignatorName()); } } } @@ -873,6 +882,10 @@ void CheckHelper::CheckObjectEntity( messages_.Say( "An INTENT(OUT) dummy argument may not be, or contain, EVENT_TYPE or LOCK_TYPE"_err_en_US); } + if (IsOrContainsNotifyComponent(symbol)) { // C1613 + messages_.Say( + "An INTENT(OUT) dummy argument may not be, or contain, NOTIFY_TYPE"_err_en_US); + } if (IsAssumedSizeArray(symbol)) { // C834 if (type && type->IsPolymorphic()) { messages_.Say( diff --git a/flang/lib/Semantics/dump-expr.cpp b/flang/lib/Semantics/dump-expr.cpp index 66cedab94bfb4..8d354cf65b61e 100644 --- a/flang/lib/Semantics/dump-expr.cpp +++ b/flang/lib/Semantics/dump-expr.cpp @@ -23,6 +23,7 @@ void DumpEvaluateExpr::Show(const evaluate::CoarrayRef &x) { Indent("coarray ref"); Show(x.base()); Show(x.cosubscript()); + Show(x.notify()); Show(x.stat()); Show(x.team()); Outdent(); diff --git a/flang/lib/Semantics/expression.cpp b/flang/lib/Semantics/expression.cpp index c8167fd34f666..ac58dfc005f17 100644 --- a/flang/lib/Semantics/expression.cpp +++ b/flang/lib/Semantics/expression.cpp @@ -1579,6 +1579,19 @@ MaybeExpr ExpressionAnalyzer::Analyze(const parser::CoindexedNamedObject &x) { std::get>(x.imageSelector.t)) { common::visit( common::visitors{ + [&](const parser::ImageSelectorSpec::Notify &x) { + Analyze(x.v); + if (const auto *expr{GetExpr(context_, x.v)}) { + if (coarrayRef.notify()) { + Say("coindexed reference has multiple NOTIFY= specifiers"_err_en_US); + } else if (auto dyType{expr->GetType()}; + dyType && IsNotifyType(GetDerivedTypeSpec(*dyType))) { + coarrayRef.set_notify(Expr{*expr}); + } else { + Say("NOTIFY= specifier must have type NOTIFY_TYPE from ISO_FORTRAN_ENV"_err_en_US); + } + } + }, [&](const parser::ImageSelectorSpec::Stat &x) { Analyze(x.v); if (const auto *expr{GetExpr(context_, x.v)}) { diff --git a/flang/lib/Semantics/tools.cpp b/flang/lib/Semantics/tools.cpp index 8eddd03faa962..cf1e5e7d44565 100644 --- a/flang/lib/Semantics/tools.cpp +++ b/flang/lib/Semantics/tools.cpp @@ -582,6 +582,18 @@ bool IsOrContainsEventOrLockComponent(const Symbol &original) { return false; } +bool IsOrContainsNotifyComponent(const Symbol &original) { + const Symbol &symbol{ResolveAssociations(original, /*stopAtTypeGuard=*/true)}; + if (evaluate::IsVariable(symbol)) { + if (const DeclTypeSpec *type{symbol.GetType()}) { + if (const DerivedTypeSpec *derived{type->AsDerived()}) { + return IsNotifyType(derived) || FindNotifyPotentialComponent(*derived); + } + } + } + return false; +} + // Check this symbol suitable as a type-bound procedure - C769 bool CanBeTypeBoundProc(const Symbol &symbol) { if (IsDummy(symbol) || IsProcedurePointer(symbol)) { @@ -1489,6 +1501,32 @@ PotentialComponentIterator::const_iterator FindEventOrLockPotentialComponent( return iter; } +PotentialComponentIterator::const_iterator FindNotifyPotentialComponent( + const DerivedTypeSpec &derived, bool ignoreCoarrays) { + PotentialComponentIterator potentials{derived}; + auto iter{potentials.begin()}; + for (auto end{potentials.end()}; iter != end; ++iter) { + const Symbol &component{*iter}; + if (const auto *object{component.detailsIf()}) { + if (const DeclTypeSpec *type{object->type()}) { + if (IsNotifyType(type->AsDerived())) { + if (!ignoreCoarrays) { + break; // found one + } + auto path{iter.GetComponentPath()}; + path.pop_back(); + if (std::find_if(path.begin(), path.end(), [](const Symbol &sym) { + return evaluate::IsCoarray(sym); + }) == path.end()) { + break; // found one not in a coarray + } + } + } + } + } + return iter; +} + UltimateComponentIterator::const_iterator FindAllocatableUltimateComponent( const DerivedTypeSpec &derived) { UltimateComponentIterator ultimates{derived}; diff --git a/flang/test/Semantics/coarrays02.f90 b/flang/test/Semantics/coarrays02.f90 index b16e0ccb58797..e866dd89c07ab 100644 --- a/flang/test/Semantics/coarrays02.f90 +++ b/flang/test/Semantics/coarrays02.f90 @@ -16,6 +16,8 @@ program main type(event_type) event !ERROR: Variable 'lock' with EVENT_TYPE or LOCK_TYPE must be a coarray type(lock_type) lock + !ERROR: Variable 'notify' with NOTIFY_TYPE must be a coarray + type(notify_type) notify integer :: local[*] ! ok in main end @@ -120,3 +122,18 @@ subroutine s4 !ERROR: Subscripts must appear in a coindexed reference when its base is an array print *, ta(1)%a[1] end + +subroutine s5(a, notify, res) + use iso_fortran_env + type t + type(notify_type) :: a + end type + real, intent(in) :: a[*] + type(event_type), intent(in) :: notify[*] + !ERROR: An INTENT(OUT) dummy argument may not be, or contain, NOTIFY_TYPE + type(notify_type), intent(out) :: res[*] + !ERROR: Variable 'bad' with NOTIFY_TYPE potential component '%a' must be a coarray + type(t) :: bad + !ERROR: NOTIFY= specifier must have type NOTIFY_TYPE from ISO_FORTRAN_ENV + print *, a[1, NOTIFY=notify] +end diff --git a/flang/test/Semantics/notifywait03.f90 b/flang/test/Semantics/notifywait03.f90 index 0fc56f66ad32d..a336a7a67669a 100644 --- a/flang/test/Semantics/notifywait03.f90 +++ b/flang/test/Semantics/notifywait03.f90 @@ -10,6 +10,7 @@ program test_notify_wait implicit none ! notify_type variables must be coarrays + !ERROR: Variable 'non_coarray' with NOTIFY_TYPE must be a coarray type(notify_type) :: non_coarray type(notify_type) :: notify_var[*], notify_array(2)[*] diff --git a/libc/include/llvm-libc-macros/math-macros.h b/libc/include/llvm-libc-macros/math-macros.h index 6697ce5b03851..e1b12e3010fe9 100644 --- a/libc/include/llvm-libc-macros/math-macros.h +++ b/libc/include/llvm-libc-macros/math-macros.h @@ -42,14 +42,37 @@ #define FP_LLOGBNAN LONG_MAX #endif -#if defined(__NVPTX__) || defined(__AMDGPU__) || defined(__FAST_MATH__) -#define math_errhandling 0 -#elif defined(__NO_MATH_ERRNO__) -#define math_errhandling (MATH_ERREXCEPT) +// Math error handling. Target support is assumed to be existent unless +// explicitly disabled. +#if defined(__NVPTX__) || defined(__AMDGPU__) || defined(__FAST_MATH__) || \ + defined(__NO_MATH_ERRNO__) +#define __LIBC_SUPPORTS_MATH_ERRNO 0 +#else +#define __LIBC_SUPPORTS_MATH_ERRNO 1 +#endif + +#if defined(__FAST_MATH__) || \ + ((defined(__arm__) || defined(_M_ARM) || defined(__thumb__) || \ + defined(__aarch64__) || defined(_M_ARM64)) && \ + !defined(__ARM_FP)) +#define __LIBC_SUPPORTS_MATH_ERREXCEPT 0 #else +#define __LIBC_SUPPORTS_MATH_ERREXCEPT 1 +#endif + +#if __LIBC_SUPPORTS_MATH_ERRNO && __LIBC_SUPPORTS_MATH_ERREXCEPT #define math_errhandling (MATH_ERRNO | MATH_ERREXCEPT) +#elif __LIBC_SUPPORTS_MATH_ERRNO +#define math_errhandling (MATH_ERRNO) +#elif __LIBC_SUPPORTS_MATH_ERREXCEPT +#define math_errhandling (MATH_ERREXCEPT) +#else +#define math_errhandling 0 #endif +#undef __LIBC_SUPPORTS_MATH_ERRNO +#undef __LIBC_SUPPORTS_MATH_ERREXCEPT + // POSIX math constants // https://pubs.opengroup.org/onlinepubs/9799919799/basedefs/math.h.html #define M_E (__extension__ 0x1.5bf0a8b145769p1) diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt index b7af751ec3f27..96874702b1fdf 100644 --- a/libc/src/__support/CMakeLists.txt +++ b/libc/src/__support/CMakeLists.txt @@ -161,6 +161,7 @@ add_header_library( HDRS wctype_utils.h DEPENDS + libc.hdr.types.wchar_t libc.hdr.types.wint_t ) diff --git a/libc/src/__support/ctype_utils.h b/libc/src/__support/ctype_utils.h index be0f25330af9e..61b7a0aeb5b67 100644 --- a/libc/src/__support/ctype_utils.h +++ b/libc/src/__support/ctype_utils.h @@ -27,7 +27,7 @@ namespace internal { // as well as a way to support non-ASCII character encodings. // Similarly, do not change these functions to use case ranges. e.g. -// bool islower(int ch) { +// bool islower(char ch) { // switch(ch) { // case 'a'...'z': // return true; @@ -37,7 +37,7 @@ namespace internal { // EBCDIC. Technically we could use some smaller ranges, but that's even harder // to read. -LIBC_INLINE static constexpr bool islower(int ch) { +LIBC_INLINE static constexpr bool islower(char ch) { switch (ch) { case 'a': case 'b': @@ -71,7 +71,7 @@ LIBC_INLINE static constexpr bool islower(int ch) { } } -LIBC_INLINE static constexpr bool isupper(int ch) { +LIBC_INLINE static constexpr bool isupper(char ch) { switch (ch) { case 'A': case 'B': @@ -105,7 +105,7 @@ LIBC_INLINE static constexpr bool isupper(int ch) { } } -LIBC_INLINE static constexpr bool isdigit(int ch) { +LIBC_INLINE static constexpr bool isdigit(char ch) { switch (ch) { case '0': case '1': @@ -123,7 +123,7 @@ LIBC_INLINE static constexpr bool isdigit(int ch) { } } -LIBC_INLINE static constexpr int tolower(int ch) { +LIBC_INLINE static constexpr char tolower(char ch) { switch (ch) { case 'A': return 'a'; @@ -182,7 +182,7 @@ LIBC_INLINE static constexpr int tolower(int ch) { } } -LIBC_INLINE static constexpr int toupper(int ch) { +LIBC_INLINE static constexpr char toupper(char ch) { switch (ch) { case 'a': return 'A'; @@ -241,7 +241,7 @@ LIBC_INLINE static constexpr int toupper(int ch) { } } -LIBC_INLINE static constexpr bool isalpha(int ch) { +LIBC_INLINE static constexpr bool isalpha(char ch) { switch (ch) { case 'a': case 'b': @@ -301,7 +301,7 @@ LIBC_INLINE static constexpr bool isalpha(int ch) { } } -LIBC_INLINE static constexpr bool isalnum(int ch) { +LIBC_INLINE static constexpr bool isalnum(char ch) { switch (ch) { case 'a': case 'b': @@ -371,7 +371,7 @@ LIBC_INLINE static constexpr bool isalnum(int ch) { } } -LIBC_INLINE static constexpr int b36_char_to_int(int ch) { +LIBC_INLINE static constexpr int b36_char_to_int(char ch) { switch (ch) { case '0': return 0; @@ -476,7 +476,7 @@ LIBC_INLINE static constexpr int b36_char_to_int(int ch) { } } -LIBC_INLINE static constexpr int int_to_b36_char(int num) { +LIBC_INLINE static constexpr char int_to_b36_char(int num) { // Can't actually use LIBC_ASSERT here because it depends on integer_to_string // which depends on this. @@ -559,7 +559,7 @@ LIBC_INLINE static constexpr int int_to_b36_char(int num) { } } -LIBC_INLINE static constexpr bool isspace(int ch) { +LIBC_INLINE static constexpr bool isspace(char ch) { switch (ch) { case ' ': case '\t': @@ -574,7 +574,7 @@ LIBC_INLINE static constexpr bool isspace(int ch) { } // not yet encoding independent. -LIBC_INLINE static constexpr bool isgraph(int ch) { +LIBC_INLINE static constexpr bool isgraph(char ch) { return 0x20 < ch && ch < 0x7f; } diff --git a/libc/src/__support/integer_to_string.h b/libc/src/__support/integer_to_string.h index 29449bd739730..5e7369de00962 100644 --- a/libc/src/__support/integer_to_string.h +++ b/libc/src/__support/integer_to_string.h @@ -378,9 +378,8 @@ template class IntegerToString { using UNSIGNED_T = make_integral_or_big_int_unsigned_t; LIBC_INLINE static char digit_char(uint8_t digit) { - const int result = internal::int_to_b36_char(digit); - return static_cast(Fmt::IS_UPPERCASE ? internal::toupper(result) - : result); + const char result = internal::int_to_b36_char(digit); + return Fmt::IS_UPPERCASE ? internal::toupper(result) : result; } LIBC_INLINE static void diff --git a/libc/src/__support/wctype_utils.h b/libc/src/__support/wctype_utils.h index 2ae5ec93b2a63..60b6afb928475 100644 --- a/libc/src/__support/wctype_utils.h +++ b/libc/src/__support/wctype_utils.h @@ -9,6 +9,7 @@ #ifndef LLVM_LIBC_SRC___SUPPORT_WCTYPE_UTILS_H #define LLVM_LIBC_SRC___SUPPORT_WCTYPE_UTILS_H +#include "hdr/types/wchar_t.h" #include "hdr/types/wint_t.h" #include "src/__support/CPP/optional.h" #include "src/__support/macros/attributes.h" // LIBC_INLINE @@ -30,7 +31,7 @@ namespace internal { // Similarly, do not change these fumarks to show your new solution is faster, // as well as a way to support non-Anctions to use case ranges. e.g. -// bool iswlower(wint_t ch) { +// bool iswlower(wchar_t ch) { // switch(ch) { // case L'a'...L'z': // return true; @@ -40,7 +41,7 @@ namespace internal { // EBCDIC. Technically we could use some smaller ranges, but that's even harder // to read. -LIBC_INLINE static constexpr bool iswlower(wint_t wch) { +LIBC_INLINE static constexpr bool iswlower(wchar_t wch) { switch (wch) { case L'a': case L'b': @@ -74,7 +75,7 @@ LIBC_INLINE static constexpr bool iswlower(wint_t wch) { } } -LIBC_INLINE static constexpr bool iswupper(wint_t wch) { +LIBC_INLINE static constexpr bool iswupper(wchar_t wch) { switch (wch) { case L'A': case L'B': @@ -108,7 +109,7 @@ LIBC_INLINE static constexpr bool iswupper(wint_t wch) { } } -LIBC_INLINE static constexpr bool iswdigit(wint_t wch) { +LIBC_INLINE static constexpr bool iswdigit(wchar_t wch) { switch (wch) { case L'0': case L'1': @@ -126,7 +127,7 @@ LIBC_INLINE static constexpr bool iswdigit(wint_t wch) { } } -LIBC_INLINE static constexpr wint_t towlower(wint_t wch) { +LIBC_INLINE static constexpr wchar_t towlower(wchar_t wch) { switch (wch) { case L'A': return L'a'; @@ -185,7 +186,7 @@ LIBC_INLINE static constexpr wint_t towlower(wint_t wch) { } } -LIBC_INLINE static constexpr wint_t towupper(wint_t wch) { +LIBC_INLINE static constexpr wchar_t towupper(wchar_t wch) { switch (wch) { case L'a': return L'A'; @@ -244,7 +245,7 @@ LIBC_INLINE static constexpr wint_t towupper(wint_t wch) { } } -LIBC_INLINE static constexpr bool iswalpha(wint_t wch) { +LIBC_INLINE static constexpr bool iswalpha(wchar_t wch) { switch (wch) { case L'a': case L'b': @@ -304,7 +305,7 @@ LIBC_INLINE static constexpr bool iswalpha(wint_t wch) { } } -LIBC_INLINE static constexpr bool iswalnum(wint_t wch) { +LIBC_INLINE static constexpr bool iswalnum(wchar_t wch) { switch (wch) { case L'a': case L'b': @@ -374,7 +375,7 @@ LIBC_INLINE static constexpr bool iswalnum(wint_t wch) { } } -LIBC_INLINE static constexpr int b36_wchar_to_int(wint_t wch) { +LIBC_INLINE static constexpr int b36_wchar_to_int(wchar_t wch) { switch (wch) { case L'0': return 0; @@ -479,7 +480,7 @@ LIBC_INLINE static constexpr int b36_wchar_to_int(wint_t wch) { } } -LIBC_INLINE static constexpr wint_t int_to_b36_wchar(int num) { +LIBC_INLINE static constexpr wchar_t int_to_b36_wchar(int num) { // Can't actually use LIBC_ASSERT here because it depends on integer_to_string // which depends on this. @@ -562,7 +563,7 @@ LIBC_INLINE static constexpr wint_t int_to_b36_wchar(int num) { } } -LIBC_INLINE static constexpr bool iswspace(wint_t wch) { +LIBC_INLINE static constexpr bool iswspace(wchar_t wch) { switch (wch) { case L' ': case L'\t': diff --git a/libc/src/ctype/CMakeLists.txt b/libc/src/ctype/CMakeLists.txt index 8830c1bccf9ea..68e982bd4529e 100644 --- a/libc/src/ctype/CMakeLists.txt +++ b/libc/src/ctype/CMakeLists.txt @@ -6,6 +6,7 @@ add_entrypoint_object( isalnum.h DEPENDS libc.include.ctype + libc.src.__support.CPP.limits libc.src.__support.ctype_utils ) @@ -16,6 +17,7 @@ add_entrypoint_object( HDRS isalpha.h DEPENDS + libc.src.__support.CPP.limits libc.src.__support.ctype_utils ) @@ -50,6 +52,7 @@ add_entrypoint_object( HDRS isdigit.h DEPENDS + libc.src.__support.CPP.limits libc.src.__support.ctype_utils ) @@ -60,6 +63,7 @@ add_entrypoint_object( HDRS isgraph.h DEPENDS + libc.src.__support.CPP.limits libc.src.__support.ctype_utils ) @@ -70,6 +74,7 @@ add_entrypoint_object( HDRS islower.h DEPENDS + libc.src.__support.CPP.limits libc.src.__support.ctype_utils ) @@ -88,6 +93,7 @@ add_entrypoint_object( HDRS ispunct.h DEPENDS + libc.src.__support.CPP.limits libc.src.__support.ctype_utils ) @@ -97,6 +103,9 @@ add_entrypoint_object( isspace.cpp HDRS isspace.h + DEPENDS + libc.src.__support.CPP.limits + libc.src.__support.ctype_utils ) add_entrypoint_object( @@ -106,6 +115,7 @@ add_entrypoint_object( HDRS isupper.h DEPENDS + libc.src.__support.CPP.limits libc.src.__support.ctype_utils ) @@ -116,6 +126,7 @@ add_entrypoint_object( HDRS isxdigit.h DEPENDS + libc.src.__support.CPP.limits libc.src.__support.ctype_utils ) @@ -126,6 +137,7 @@ add_entrypoint_object( HDRS tolower.h DEPENDS + libc.src.__support.CPP.limits libc.src.__support.ctype_utils ) @@ -144,6 +156,7 @@ add_entrypoint_object( HDRS toupper.h DEPENDS + libc.src.__support.CPP.limits libc.src.__support.ctype_utils ) @@ -160,6 +173,7 @@ add_entrypoint_object( isalnum_l.h DEPENDS libc.include.ctype + libc.src.__support.CPP.limits libc.src.__support.ctype_utils libc.hdr.types.locale_t ) @@ -171,6 +185,7 @@ add_entrypoint_object( HDRS isalpha_l.h DEPENDS + libc.src.__support.CPP.limits libc.src.__support.ctype_utils libc.hdr.types.locale_t ) @@ -202,6 +217,7 @@ add_entrypoint_object( HDRS isdigit_l.h DEPENDS + libc.src.__support.CPP.limits libc.src.__support.ctype_utils libc.hdr.types.locale_t ) @@ -224,6 +240,7 @@ add_entrypoint_object( HDRS islower_l.h DEPENDS + libc.src.__support.CPP.limits libc.src.__support.ctype_utils libc.hdr.types.locale_t ) @@ -257,6 +274,8 @@ add_entrypoint_object( isspace_l.h DEPENDS libc.hdr.types.locale_t + libc.src.__support.CPP.limits + libc.src.__support.ctype_utils ) add_entrypoint_object( @@ -266,6 +285,7 @@ add_entrypoint_object( HDRS isupper_l.h DEPENDS + libc.src.__support.CPP.limits libc.src.__support.ctype_utils libc.hdr.types.locale_t ) @@ -277,6 +297,7 @@ add_entrypoint_object( HDRS isxdigit_l.h DEPENDS + libc.src.__support.CPP.limits libc.src.__support.ctype_utils libc.hdr.types.locale_t ) @@ -288,6 +309,7 @@ add_entrypoint_object( HDRS tolower_l.h DEPENDS + libc.src.__support.CPP.limits libc.src.__support.ctype_utils libc.hdr.types.locale_t ) @@ -299,6 +321,7 @@ add_entrypoint_object( HDRS toupper_l.h DEPENDS + libc.src.__support.CPP.limits libc.src.__support.ctype_utils libc.hdr.types.locale_t ) diff --git a/libc/src/ctype/isalnum.cpp b/libc/src/ctype/isalnum.cpp index 54a3e35748879..102b5e79e4a18 100644 --- a/libc/src/ctype/isalnum.cpp +++ b/libc/src/ctype/isalnum.cpp @@ -7,15 +7,18 @@ //===----------------------------------------------------------------------===// #include "src/ctype/isalnum.h" -#include "src/__support/ctype_utils.h" +#include "src/__support/CPP/limits.h" #include "src/__support/common.h" +#include "src/__support/ctype_utils.h" #include "src/__support/macros/config.h" namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, isalnum, (int c)) { - return static_cast(internal::isalnum(static_cast(c))); + if (c < 0 || c > cpp::numeric_limits::max()) + return 0; + return static_cast(internal::isalnum(static_cast(c))); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/ctype/isalnum_l.cpp b/libc/src/ctype/isalnum_l.cpp index 671d9b75c4c33..173e1c174121e 100644 --- a/libc/src/ctype/isalnum_l.cpp +++ b/libc/src/ctype/isalnum_l.cpp @@ -7,15 +7,18 @@ //===----------------------------------------------------------------------===// #include "src/ctype/isalnum_l.h" -#include "src/__support/ctype_utils.h" +#include "src/__support/CPP/limits.h" #include "src/__support/common.h" +#include "src/__support/ctype_utils.h" #include "src/__support/macros/config.h" namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, isalnum_l, (int c, locale_t)) { - return static_cast(internal::isalnum(static_cast(c))); + if (c < 0 || c > cpp::numeric_limits::max()) + return 0; + return static_cast(internal::isalnum(static_cast(c))); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/ctype/isalpha.cpp b/libc/src/ctype/isalpha.cpp index 78b26f6a486ea..7c874bf373866 100644 --- a/libc/src/ctype/isalpha.cpp +++ b/libc/src/ctype/isalpha.cpp @@ -8,6 +8,7 @@ #include "src/ctype/isalpha.h" +#include "src/__support/CPP/limits.h" #include "src/__support/common.h" #include "src/__support/ctype_utils.h" #include "src/__support/macros/config.h" @@ -15,7 +16,9 @@ namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, isalpha, (int c)) { - return static_cast(internal::isalpha(static_cast(c))); + if (c < 0 || c > cpp::numeric_limits::max()) + return 0; + return static_cast(internal::isalpha(static_cast(c))); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/ctype/isalpha_l.cpp b/libc/src/ctype/isalpha_l.cpp index 0619d979bedf2..982bcc569faaf 100644 --- a/libc/src/ctype/isalpha_l.cpp +++ b/libc/src/ctype/isalpha_l.cpp @@ -8,6 +8,7 @@ #include "src/ctype/isalpha_l.h" +#include "src/__support/CPP/limits.h" #include "src/__support/common.h" #include "src/__support/ctype_utils.h" #include "src/__support/macros/config.h" @@ -15,7 +16,9 @@ namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, isalpha_l, (int c, locale_t)) { - return static_cast(internal::isalpha(static_cast(c))); + if (c < 0 || c > cpp::numeric_limits::max()) + return 0; + return static_cast(internal::isalpha(static_cast(c))); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/ctype/isdigit.cpp b/libc/src/ctype/isdigit.cpp index 1f711943861f8..43553c794a2f3 100644 --- a/libc/src/ctype/isdigit.cpp +++ b/libc/src/ctype/isdigit.cpp @@ -7,6 +7,8 @@ //===----------------------------------------------------------------------===// #include "src/ctype/isdigit.h" + +#include "src/__support/CPP/limits.h" #include "src/__support/common.h" #include "src/__support/ctype_utils.h" #include "src/__support/macros/config.h" @@ -14,7 +16,9 @@ namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, isdigit, (int c)) { - return static_cast(internal::isdigit(static_cast(c))); + if (c < 0 || c > cpp::numeric_limits::max()) + return 0; + return static_cast(internal::isdigit(static_cast(c))); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/ctype/isdigit_l.cpp b/libc/src/ctype/isdigit_l.cpp index ca981362bfe83..40b5618906dac 100644 --- a/libc/src/ctype/isdigit_l.cpp +++ b/libc/src/ctype/isdigit_l.cpp @@ -7,6 +7,8 @@ //===----------------------------------------------------------------------===// #include "src/ctype/isdigit_l.h" + +#include "src/__support/CPP/limits.h" #include "src/__support/common.h" #include "src/__support/ctype_utils.h" #include "src/__support/macros/config.h" @@ -14,7 +16,9 @@ namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, isdigit_l, (int c, locale_t)) { - return static_cast(internal::isdigit(static_cast(c))); + if (c < 0 || c > cpp::numeric_limits::max()) + return 0; + return static_cast(internal::isdigit(static_cast(c))); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/ctype/isgraph.cpp b/libc/src/ctype/isgraph.cpp index 74bb2e75d138e..b9308ecb7367c 100644 --- a/libc/src/ctype/isgraph.cpp +++ b/libc/src/ctype/isgraph.cpp @@ -8,6 +8,7 @@ #include "src/ctype/isgraph.h" +#include "src/__support/CPP/limits.h" #include "src/__support/common.h" #include "src/__support/ctype_utils.h" #include "src/__support/macros/config.h" @@ -15,7 +16,9 @@ namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, isgraph, (int c)) { - return static_cast(internal::isgraph(static_cast(c))); + if (c < 0 || c > cpp::numeric_limits::max()) + return 0; + return static_cast(internal::isgraph(static_cast(c))); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/ctype/isgraph_l.cpp b/libc/src/ctype/isgraph_l.cpp index cbef6df148aed..dddcb9be4f80c 100644 --- a/libc/src/ctype/isgraph_l.cpp +++ b/libc/src/ctype/isgraph_l.cpp @@ -8,6 +8,7 @@ #include "src/ctype/isgraph_l.h" +#include "src/__support/CPP/limits.h" #include "src/__support/common.h" #include "src/__support/ctype_utils.h" #include "src/__support/macros/config.h" @@ -15,7 +16,9 @@ namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, isgraph_l, (int c, locale_t)) { - return static_cast(internal::isgraph(static_cast(c))); + if (c < 0 || c > cpp::numeric_limits::max()) + return 0; + return static_cast(internal::isgraph(static_cast(c))); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/ctype/islower.cpp b/libc/src/ctype/islower.cpp index 831aad32d3a22..920bfc1cc1a59 100644 --- a/libc/src/ctype/islower.cpp +++ b/libc/src/ctype/islower.cpp @@ -7,15 +7,18 @@ //===----------------------------------------------------------------------===// #include "src/ctype/islower.h" -#include "src/__support/ctype_utils.h" +#include "src/__support/CPP/limits.h" #include "src/__support/common.h" +#include "src/__support/ctype_utils.h" #include "src/__support/macros/config.h" namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, islower, (int c)) { - return static_cast(internal::islower(static_cast(c))); + if (c < 0 || c > cpp::numeric_limits::max()) + return 0; + return static_cast(internal::islower(static_cast(c))); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/ctype/islower_l.cpp b/libc/src/ctype/islower_l.cpp index b9be6acc81c99..da97026dc59a7 100644 --- a/libc/src/ctype/islower_l.cpp +++ b/libc/src/ctype/islower_l.cpp @@ -7,15 +7,18 @@ //===----------------------------------------------------------------------===// #include "src/ctype/islower_l.h" -#include "src/__support/ctype_utils.h" +#include "src/__support/CPP/limits.h" #include "src/__support/common.h" +#include "src/__support/ctype_utils.h" #include "src/__support/macros/config.h" namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, islower_l, (int c, locale_t)) { - return static_cast(internal::islower(static_cast(c))); + if (c < 0 || c > cpp::numeric_limits::max()) + return 0; + return static_cast(internal::islower(static_cast(c))); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/ctype/ispunct.cpp b/libc/src/ctype/ispunct.cpp index 0635294220b9c..4950036e9b81f 100644 --- a/libc/src/ctype/ispunct.cpp +++ b/libc/src/ctype/ispunct.cpp @@ -8,6 +8,7 @@ #include "src/ctype/ispunct.h" +#include "src/__support/CPP/limits.h" #include "src/__support/common.h" #include "src/__support/ctype_utils.h" #include "src/__support/macros/config.h" @@ -15,7 +16,9 @@ namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, ispunct, (int c)) { - const unsigned ch = static_cast(c); + if (c < 0 || c > cpp::numeric_limits::max()) + return 0; + const char ch = static_cast(c); return static_cast(!internal::isalnum(ch) && internal::isgraph(ch)); } diff --git a/libc/src/ctype/ispunct_l.cpp b/libc/src/ctype/ispunct_l.cpp index e825fbe2001b0..79cd47b6a214d 100644 --- a/libc/src/ctype/ispunct_l.cpp +++ b/libc/src/ctype/ispunct_l.cpp @@ -8,6 +8,7 @@ #include "src/ctype/ispunct_l.h" +#include "src/__support/CPP/limits.h" #include "src/__support/common.h" #include "src/__support/ctype_utils.h" #include "src/__support/macros/config.h" @@ -15,7 +16,9 @@ namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, ispunct_l, (int c, locale_t)) { - const unsigned ch = static_cast(c); + if (c < 0 || c > cpp::numeric_limits::max()) + return 0; + const char ch = static_cast(c); return static_cast(!internal::isalnum(ch) && internal::isgraph(ch)); } diff --git a/libc/src/ctype/isspace.cpp b/libc/src/ctype/isspace.cpp index 005bf460fc103..998dbf28f51d0 100644 --- a/libc/src/ctype/isspace.cpp +++ b/libc/src/ctype/isspace.cpp @@ -7,15 +7,18 @@ //===----------------------------------------------------------------------===// #include "src/ctype/isspace.h" -#include "src/__support/ctype_utils.h" +#include "src/__support/CPP/limits.h" #include "src/__support/common.h" +#include "src/__support/ctype_utils.h" #include "src/__support/macros/config.h" namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, isspace, (int c)) { - return static_cast(internal::isspace(static_cast(c))); + if (c < 0 || c > cpp::numeric_limits::max()) + return 0; + return static_cast(internal::isspace(static_cast(c))); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/ctype/isspace_l.cpp b/libc/src/ctype/isspace_l.cpp index 5c46dd6805126..e40765326b35e 100644 --- a/libc/src/ctype/isspace_l.cpp +++ b/libc/src/ctype/isspace_l.cpp @@ -7,15 +7,18 @@ //===----------------------------------------------------------------------===// #include "src/ctype/isspace_l.h" -#include "src/__support/ctype_utils.h" +#include "src/__support/CPP/limits.h" #include "src/__support/common.h" +#include "src/__support/ctype_utils.h" #include "src/__support/macros/config.h" namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, isspace_l, (int c, locale_t)) { - return static_cast(internal::isspace(static_cast(c))); + if (c < 0 || c > cpp::numeric_limits::max()) + return 0; + return static_cast(internal::isspace(static_cast(c))); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/ctype/isupper.cpp b/libc/src/ctype/isupper.cpp index 965fa336b28b4..c5c3dbd5d7d4a 100644 --- a/libc/src/ctype/isupper.cpp +++ b/libc/src/ctype/isupper.cpp @@ -7,15 +7,18 @@ //===----------------------------------------------------------------------===// #include "src/ctype/isupper.h" -#include "src/__support/ctype_utils.h" +#include "src/__support/CPP/limits.h" #include "src/__support/common.h" +#include "src/__support/ctype_utils.h" #include "src/__support/macros/config.h" namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, isupper, (int c)) { - return static_cast(internal::isupper(static_cast(c))); + if (c < 0 || c > cpp::numeric_limits::max()) + return 0; + return static_cast(internal::isupper(static_cast(c))); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/ctype/isupper_l.cpp b/libc/src/ctype/isupper_l.cpp index 358990261d603..44ed9dab90a16 100644 --- a/libc/src/ctype/isupper_l.cpp +++ b/libc/src/ctype/isupper_l.cpp @@ -7,15 +7,18 @@ //===----------------------------------------------------------------------===// #include "src/ctype/isupper_l.h" -#include "src/__support/ctype_utils.h" +#include "src/__support/CPP/limits.h" #include "src/__support/common.h" +#include "src/__support/ctype_utils.h" #include "src/__support/macros/config.h" namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, isupper_l, (int c, locale_t)) { - return static_cast(internal::isupper(static_cast(c))); + if (c < 0 || c > cpp::numeric_limits::max()) + return 0; + return static_cast(internal::isupper(static_cast(c))); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/ctype/isxdigit.cpp b/libc/src/ctype/isxdigit.cpp index 81f645c6f49fc..1b2e71769b3f8 100644 --- a/libc/src/ctype/isxdigit.cpp +++ b/libc/src/ctype/isxdigit.cpp @@ -7,15 +7,18 @@ //===----------------------------------------------------------------------===// #include "src/ctype/isxdigit.h" -#include "src/__support/ctype_utils.h" +#include "src/__support/CPP/limits.h" #include "src/__support/common.h" +#include "src/__support/ctype_utils.h" #include "src/__support/macros/config.h" namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, isxdigit, (int c)) { - const unsigned ch = static_cast(c); + if (c < 0 || c > cpp::numeric_limits::max()) + return 0; + const char ch = static_cast(c); return static_cast(internal::isalnum(ch) && internal::b36_char_to_int(ch) < 16); } diff --git a/libc/src/ctype/isxdigit_l.cpp b/libc/src/ctype/isxdigit_l.cpp index eddfd20a2da3b..e6150473b0043 100644 --- a/libc/src/ctype/isxdigit_l.cpp +++ b/libc/src/ctype/isxdigit_l.cpp @@ -7,15 +7,18 @@ //===----------------------------------------------------------------------===// #include "src/ctype/isxdigit_l.h" -#include "src/__support/ctype_utils.h" +#include "src/__support/CPP/limits.h" #include "src/__support/common.h" +#include "src/__support/ctype_utils.h" #include "src/__support/macros/config.h" namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, isxdigit_l, (int c, locale_t)) { - const unsigned ch = static_cast(c); + if (c < 0 || c > cpp::numeric_limits::max()) + return 0; + const char ch = static_cast(c); return static_cast(internal::isalnum(ch) && internal::b36_char_to_int(ch) < 16); } diff --git a/libc/src/ctype/tolower.cpp b/libc/src/ctype/tolower.cpp index 3ecad7bc5d5d5..b45c5f2688a61 100644 --- a/libc/src/ctype/tolower.cpp +++ b/libc/src/ctype/tolower.cpp @@ -7,13 +7,20 @@ //===----------------------------------------------------------------------===// #include "src/ctype/tolower.h" -#include "src/__support/ctype_utils.h" +#include "src/__support/CPP/limits.h" #include "src/__support/common.h" +#include "src/__support/ctype_utils.h" #include "src/__support/macros/config.h" namespace LIBC_NAMESPACE_DECL { -LLVM_LIBC_FUNCTION(int, tolower, (int c)) { return internal::tolower(c); } +LLVM_LIBC_FUNCTION(int, tolower, (int c)) { + if (c < cpp::numeric_limits::min() || + c > cpp::numeric_limits::max()) { + return c; + } + return static_cast(internal::tolower(static_cast(c))); +} } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/ctype/tolower_l.cpp b/libc/src/ctype/tolower_l.cpp index 7ccf31617e592..049e46aea13c0 100644 --- a/libc/src/ctype/tolower_l.cpp +++ b/libc/src/ctype/tolower_l.cpp @@ -7,15 +7,20 @@ //===----------------------------------------------------------------------===// #include "src/ctype/tolower_l.h" -#include "src/__support/ctype_utils.h" +#include "src/__support/CPP/limits.h" #include "src/__support/common.h" +#include "src/__support/ctype_utils.h" #include "src/__support/macros/config.h" namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, tolower_l, (int c, locale_t)) { - return internal::tolower(c); + if (c < cpp::numeric_limits::min() || + c > cpp::numeric_limits::max()) { + return c; + } + return static_cast(internal::tolower(static_cast(c))); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/ctype/toupper.cpp b/libc/src/ctype/toupper.cpp index 1e1e8fc400711..0e387238ce3b6 100644 --- a/libc/src/ctype/toupper.cpp +++ b/libc/src/ctype/toupper.cpp @@ -7,13 +7,20 @@ //===----------------------------------------------------------------------===// #include "src/ctype/toupper.h" -#include "src/__support/ctype_utils.h" +#include "src/__support/CPP/limits.h" #include "src/__support/common.h" +#include "src/__support/ctype_utils.h" #include "src/__support/macros/config.h" namespace LIBC_NAMESPACE_DECL { -LLVM_LIBC_FUNCTION(int, toupper, (int c)) { return internal::toupper(c); } +LLVM_LIBC_FUNCTION(int, toupper, (int c)) { + if (c < cpp::numeric_limits::min() || + c > cpp::numeric_limits::max()) { + return c; + } + return static_cast(internal::toupper(static_cast(c))); +} } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/ctype/toupper_l.cpp b/libc/src/ctype/toupper_l.cpp index a435ca1ab5d41..d1dff262c9377 100644 --- a/libc/src/ctype/toupper_l.cpp +++ b/libc/src/ctype/toupper_l.cpp @@ -7,15 +7,20 @@ //===----------------------------------------------------------------------===// #include "src/ctype/toupper_l.h" -#include "src/__support/ctype_utils.h" +#include "src/__support/CPP/limits.h" #include "src/__support/common.h" +#include "src/__support/ctype_utils.h" #include "src/__support/macros/config.h" namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, toupper_l, (int c, locale_t)) { - return internal::toupper(c); + if (c < cpp::numeric_limits::min() || + c > cpp::numeric_limits::max()) { + return c; + } + return static_cast(internal::toupper(static_cast(c))); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdio/printf_core/float_dec_converter_limited.h b/libc/src/stdio/printf_core/float_dec_converter_limited.h index 9cdc13573d320..0f85d0a8d26b4 100644 --- a/libc/src/stdio/printf_core/float_dec_converter_limited.h +++ b/libc/src/stdio/printf_core/float_dec_converter_limited.h @@ -363,8 +363,8 @@ DigitsOutput decimal_digits(DigitsInput input, int precision, bool e_mode) { // we made it from and doing the decimal conversion all over again.) for (size_t i = output.ndigits; i-- > 0;) { if (output.digits[i] != '9') { - output.digits[i] = static_cast(internal::int_to_b36_char( - internal::b36_char_to_int(output.digits[i]) + 1)); + output.digits[i] = internal::int_to_b36_char( + internal::b36_char_to_int(output.digits[i]) + 1); break; } else { output.digits[i] = '0'; diff --git a/libc/src/stdio/printf_core/float_hex_converter.h b/libc/src/stdio/printf_core/float_hex_converter.h index 16592e7bac932..9b57f1d803e74 100644 --- a/libc/src/stdio/printf_core/float_hex_converter.h +++ b/libc/src/stdio/printf_core/float_hex_converter.h @@ -137,9 +137,9 @@ LIBC_INLINE int convert_float_hex_exp(Writer *writer, size_t first_non_zero = 1; for (; mant_cur > 0; --mant_cur, mantissa >>= 4) { char mant_mod_16 = static_cast(mantissa % 16); - char new_digit = static_cast(internal::int_to_b36_char(mant_mod_16)); + char new_digit = internal::int_to_b36_char(mant_mod_16); if (internal::isupper(to_conv.conv_name)) - new_digit = static_cast(internal::toupper(new_digit)); + new_digit = internal::toupper(new_digit); mant_buffer[mant_cur - 1] = new_digit; if (new_digit != '0' && first_non_zero < mant_cur) first_non_zero = mant_cur; @@ -167,8 +167,7 @@ LIBC_INLINE int convert_float_hex_exp(Writer *writer, size_t exp_cur = EXP_LEN; for (; exponent > 0; --exp_cur, exponent /= 10) { - exp_buffer[exp_cur - 1] = - static_cast(internal::int_to_b36_char(exponent % 10)); + exp_buffer[exp_cur - 1] = internal::int_to_b36_char(exponent % 10); } if (exp_cur == EXP_LEN) { // if nothing else was written, write a 0. exp_buffer[EXP_LEN - 1] = '0'; diff --git a/libc/src/stdlib/l64a.cpp b/libc/src/stdlib/l64a.cpp index d59e65e7dc4c2..d8fe8ef86bf7d 100644 --- a/libc/src/stdlib/l64a.cpp +++ b/libc/src/stdlib/l64a.cpp @@ -32,15 +32,13 @@ constexpr static char b64_int_to_char(uint32_t num) { if (num == 1) return '/'; if (num < 38) - return static_cast( - internal::toupper(internal::int_to_b36_char(num - 2))); + return internal::toupper(internal::int_to_b36_char(num - 2)); // this tolower is technically unnecessary, but it provides safety if we // change the default behavior of int_to_b36_char. Also the compiler // completely elides it so there's no performance penalty, see: // https://godbolt.org/z/o5ennv7fc - return static_cast( - internal::tolower(internal::int_to_b36_char(num - 2 - 26))); + return internal::tolower(internal::int_to_b36_char(num - 2 - 26)); } // This function takes a long and converts the low 32 bits of it into at most 6 diff --git a/libc/src/string/strcasestr.cpp b/libc/src/string/strcasestr.cpp index de8e4bec7fe0b..575d6bed16d11 100644 --- a/libc/src/string/strcasestr.cpp +++ b/libc/src/string/strcasestr.cpp @@ -21,8 +21,8 @@ namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(char *, strcasestr, (const char *haystack, const char *needle)) { auto case_cmp = [](char a, char b) { - return LIBC_NAMESPACE::internal::tolower(a) - - LIBC_NAMESPACE::internal::tolower(b); + return static_cast(LIBC_NAMESPACE::internal::tolower(a)) - + static_cast(LIBC_NAMESPACE::internal::tolower(b)); }; LIBC_CRASH_ON_NULLPTR(haystack); diff --git a/libc/src/strings/strcasecmp.cpp b/libc/src/strings/strcasecmp.cpp index 4bbe2909df1e2..4518647deabe4 100644 --- a/libc/src/strings/strcasecmp.cpp +++ b/libc/src/strings/strcasecmp.cpp @@ -17,8 +17,8 @@ namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, strcasecmp, (const char *left, const char *right)) { auto case_cmp = [](char a, char b) { - return LIBC_NAMESPACE::internal::tolower(a) - - LIBC_NAMESPACE::internal::tolower(b); + return static_cast(LIBC_NAMESPACE::internal::tolower(a)) - + static_cast(LIBC_NAMESPACE::internal::tolower(b)); }; return inline_strcmp(left, right, case_cmp); } diff --git a/libc/src/strings/strcasecmp_l.cpp b/libc/src/strings/strcasecmp_l.cpp index 95117cb27a564..d77f95637a396 100644 --- a/libc/src/strings/strcasecmp_l.cpp +++ b/libc/src/strings/strcasecmp_l.cpp @@ -18,8 +18,8 @@ namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, strcasecmp_l, (const char *left, const char *right, locale_t)) { auto case_cmp = [](char a, char b) { - return LIBC_NAMESPACE::internal::tolower(a) - - LIBC_NAMESPACE::internal::tolower(b); + return static_cast(LIBC_NAMESPACE::internal::tolower(a)) - + static_cast(LIBC_NAMESPACE::internal::tolower(b)); }; return inline_strcmp(left, right, case_cmp); } diff --git a/libc/src/strings/strncasecmp.cpp b/libc/src/strings/strncasecmp.cpp index 9c2f0ab131269..a5926495a3e22 100644 --- a/libc/src/strings/strncasecmp.cpp +++ b/libc/src/strings/strncasecmp.cpp @@ -18,8 +18,8 @@ namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, strncasecmp, (const char *left, const char *right, size_t n)) { auto case_cmp = [](char a, char b) { - return LIBC_NAMESPACE::internal::tolower(a) - - LIBC_NAMESPACE::internal::tolower(b); + return static_cast(LIBC_NAMESPACE::internal::tolower(a)) - + static_cast(LIBC_NAMESPACE::internal::tolower(b)); }; return inline_strncmp(left, right, n, case_cmp); } diff --git a/libc/src/strings/strncasecmp_l.cpp b/libc/src/strings/strncasecmp_l.cpp index 91ac7e5e89107..a828f609fd9e8 100644 --- a/libc/src/strings/strncasecmp_l.cpp +++ b/libc/src/strings/strncasecmp_l.cpp @@ -18,8 +18,8 @@ namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, strncasecmp_l, (const char *left, const char *right, size_t n, locale_t)) { auto case_cmp = [](char a, char b) { - return LIBC_NAMESPACE::internal::tolower(a) - - LIBC_NAMESPACE::internal::tolower(b); + return static_cast(LIBC_NAMESPACE::internal::tolower(a)) - + static_cast(LIBC_NAMESPACE::internal::tolower(b)); }; return inline_strncmp(left, right, n, case_cmp); } diff --git a/libc/src/wctype/iswalpha.cpp b/libc/src/wctype/iswalpha.cpp index 09f55d391dbff..e151363b88d0b 100644 --- a/libc/src/wctype/iswalpha.cpp +++ b/libc/src/wctype/iswalpha.cpp @@ -14,6 +14,8 @@ namespace LIBC_NAMESPACE_DECL { -LLVM_LIBC_FUNCTION(int, iswalpha, (wint_t c)) { return internal::iswalpha(c); } +LLVM_LIBC_FUNCTION(int, iswalpha, (wint_t c)) { + return internal::iswalpha(static_cast(c)); +} } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/test/IntegrationTest/CMakeLists.txt b/libc/test/IntegrationTest/CMakeLists.txt index 235e9fe2f55ee..d0752ea178429 100644 --- a/libc/test/IntegrationTest/CMakeLists.txt +++ b/libc/test/IntegrationTest/CMakeLists.txt @@ -14,5 +14,6 @@ add_object_library( libc.hdr.stdint_proxy libc.src.__support.OSUtil.osutil libc.src.__support.CPP.atomic + libc.src.__support.macros.properties.architectures ${arch_specific_deps} ) diff --git a/libc/test/IntegrationTest/test.h b/libc/test/IntegrationTest/test.h index 4a03f7aa6318b..9f5a3dfb3583c 100644 --- a/libc/test/IntegrationTest/test.h +++ b/libc/test/IntegrationTest/test.h @@ -11,6 +11,7 @@ #include "src/__support/OSUtil/exit.h" #include "src/__support/OSUtil/io.h" +#include "src/__support/macros/properties/architectures.h" #define __AS_STRING(val) #val #define __CHECK_TRUE(file, line, val, should_exit) \ @@ -68,9 +69,15 @@ //////////////////////////////////////////////////////////////////////////////// // Errno checks. +#ifdef LIBC_TARGET_ARCH_IS_GPU +#define ASSERT_ERRNO_EQ(VAL) +#define ASSERT_ERRNO_SUCCESS() +#define ASSERT_ERRNO_FAILURE() +#else #define ASSERT_ERRNO_EQ(VAL) ASSERT_EQ(VAL, static_cast(errno)) #define ASSERT_ERRNO_SUCCESS() ASSERT_EQ(0, static_cast(errno)) #define ASSERT_ERRNO_FAILURE() ASSERT_NE(0, static_cast(errno)) +#endif // Integration tests are compiled with -ffreestanding which stops treating // the main function as a non-overloadable special function. Hence, we use a diff --git a/libc/test/UnitTest/CMakeLists.txt b/libc/test/UnitTest/CMakeLists.txt index 31d1e9dce8204..3197b3d7fd01b 100644 --- a/libc/test/UnitTest/CMakeLists.txt +++ b/libc/test/UnitTest/CMakeLists.txt @@ -204,5 +204,6 @@ add_header_library( ErrnoCheckingTest.h DEPENDS libc.src.__support.common + libc.src.__support.macros.properties.architectures libc.src.errno.errno ) diff --git a/libc/test/UnitTest/ErrnoCheckingTest.h b/libc/test/UnitTest/ErrnoCheckingTest.h index 5b1bc9441d830..111d812c58612 100644 --- a/libc/test/UnitTest/ErrnoCheckingTest.h +++ b/libc/test/UnitTest/ErrnoCheckingTest.h @@ -11,11 +11,17 @@ #include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" +#include "src/__support/macros/properties/architectures.h" #include "test/UnitTest/Test.h" // Define macro to validate the value stored in the errno and restore it // to zero. +#ifdef LIBC_TARGET_ARCH_IS_GPU +#define ASSERT_ERRNO_EQ(VAL) +#define ASSERT_ERRNO_SUCCESS() +#define ASSERT_ERRNO_FAILURE() +#else #define ASSERT_ERRNO_EQ(VAL) \ do { \ ASSERT_EQ(VAL, static_cast(libc_errno)); \ @@ -27,6 +33,7 @@ ASSERT_NE(0, static_cast(libc_errno)); \ libc_errno = 0; \ } while (0) +#endif namespace LIBC_NAMESPACE_DECL { namespace testing { diff --git a/libc/test/UnitTest/MemoryMatcher.cpp b/libc/test/UnitTest/MemoryMatcher.cpp index 6e375768e9333..405f226798f7a 100644 --- a/libc/test/UnitTest/MemoryMatcher.cpp +++ b/libc/test/UnitTest/MemoryMatcher.cpp @@ -41,8 +41,8 @@ bool MemoryMatcher::match(MemoryView actualValue) { static void display(char C) { const auto print = [](unsigned char i) { - tlog << static_cast(LIBC_NAMESPACE::internal::toupper( - LIBC_NAMESPACE::internal::int_to_b36_char(i))); + tlog << LIBC_NAMESPACE::internal::toupper( + LIBC_NAMESPACE::internal::int_to_b36_char(i)); }; print(static_cast(C) / 16); print(static_cast(C) & 15); diff --git a/libc/test/src/ctype/islower_test.cpp b/libc/test/src/ctype/islower_test.cpp index f877171abb9a3..e4e5f5cefd954 100644 --- a/libc/test/src/ctype/islower_test.cpp +++ b/libc/test/src/ctype/islower_test.cpp @@ -40,7 +40,7 @@ TEST(LlvmLibcIsLower, SimpleTest) { } TEST(LlvmLibcIsLower, DefaultLocale) { - // Loops through all characters, verifying that numbers and letters + // Loops through all characters, verifying that only lowercase letters // return non-zero integer and everything else returns a zero. for (int ch = -255; ch < 255; ++ch) { if (in_span(ch, LOWER_ARRAY)) diff --git a/libc/test/src/stdlib/StrtolTest.h b/libc/test/src/stdlib/StrtolTest.h index 03f0a6539c785..3a7da1fa85ac7 100644 --- a/libc/test/src/stdlib/StrtolTest.h +++ b/libc/test/src/stdlib/StrtolTest.h @@ -177,8 +177,8 @@ struct StrtoTest : public LIBC_NAMESPACE::testing::ErrnoCheckingTest { char small_string[4] = {'\0', '\0', '\0', '\0'}; for (int base = 2; base <= 36; ++base) { for (int first_digit = 0; first_digit <= 36; ++first_digit) { - small_string[0] = static_cast( - LIBC_NAMESPACE::internal::int_to_b36_char(first_digit)); + small_string[0] = + LIBC_NAMESPACE::internal::int_to_b36_char(first_digit); if (first_digit < base) { ASSERT_EQ(func(small_string, nullptr, base), static_cast(first_digit)); @@ -192,11 +192,11 @@ struct StrtoTest : public LIBC_NAMESPACE::testing::ErrnoCheckingTest { for (int base = 2; base <= 36; ++base) { for (int first_digit = 0; first_digit <= 36; ++first_digit) { - small_string[0] = static_cast( - LIBC_NAMESPACE::internal::int_to_b36_char(first_digit)); + small_string[0] = + LIBC_NAMESPACE::internal::int_to_b36_char(first_digit); for (int second_digit = 0; second_digit <= 36; ++second_digit) { - small_string[1] = static_cast( - LIBC_NAMESPACE::internal::int_to_b36_char(second_digit)); + small_string[1] = + LIBC_NAMESPACE::internal::int_to_b36_char(second_digit); if (first_digit < base && second_digit < base) { ASSERT_EQ( func(small_string, nullptr, base), @@ -216,14 +216,14 @@ struct StrtoTest : public LIBC_NAMESPACE::testing::ErrnoCheckingTest { for (int base = 2; base <= 36; ++base) { for (int first_digit = 0; first_digit <= 36; ++first_digit) { - small_string[0] = static_cast( - LIBC_NAMESPACE::internal::int_to_b36_char(first_digit)); + small_string[0] = + LIBC_NAMESPACE::internal::int_to_b36_char(first_digit); for (int second_digit = 0; second_digit <= 36; ++second_digit) { - small_string[1] = static_cast( - LIBC_NAMESPACE::internal::int_to_b36_char(second_digit)); + small_string[1] = + LIBC_NAMESPACE::internal::int_to_b36_char(second_digit); for (int third_digit = 0; third_digit <= limit; ++third_digit) { - small_string[2] = static_cast( - LIBC_NAMESPACE::internal::int_to_b36_char(third_digit)); + small_string[2] = + LIBC_NAMESPACE::internal::int_to_b36_char(third_digit); if (first_digit < base && second_digit < base && third_digit < base) { diff --git a/libc/test/src/wchar/WcstolTest.h b/libc/test/src/wchar/WcstolTest.h index 4d5b752e62238..cadf9e0c42b90 100644 --- a/libc/test/src/wchar/WcstolTest.h +++ b/libc/test/src/wchar/WcstolTest.h @@ -178,8 +178,8 @@ struct WcstoTest : public LIBC_NAMESPACE::testing::ErrnoCheckingTest { wchar_t small_string[4] = {L'\0', L'\0', L'\0', L'\0'}; for (int base = 2; base <= 36; ++base) { for (int first_digit = 0; first_digit <= 36; ++first_digit) { - small_string[0] = static_cast( - LIBC_NAMESPACE::internal::int_to_b36_wchar(first_digit)); + small_string[0] = + LIBC_NAMESPACE::internal::int_to_b36_wchar(first_digit); if (first_digit < base) { ASSERT_EQ(func(small_string, nullptr, base), static_cast(first_digit)); @@ -193,11 +193,11 @@ struct WcstoTest : public LIBC_NAMESPACE::testing::ErrnoCheckingTest { for (int base = 2; base <= 36; ++base) { for (int first_digit = 0; first_digit <= 36; ++first_digit) { - small_string[0] = static_cast( - LIBC_NAMESPACE::internal::int_to_b36_wchar(first_digit)); + small_string[0] = + LIBC_NAMESPACE::internal::int_to_b36_wchar(first_digit); for (int second_digit = 0; second_digit <= 36; ++second_digit) { - small_string[1] = static_cast( - LIBC_NAMESPACE::internal::int_to_b36_wchar(second_digit)); + small_string[1] = + LIBC_NAMESPACE::internal::int_to_b36_wchar(second_digit); if (first_digit < base && second_digit < base) { ASSERT_EQ( func(small_string, nullptr, base), @@ -217,14 +217,14 @@ struct WcstoTest : public LIBC_NAMESPACE::testing::ErrnoCheckingTest { for (int base = 2; base <= 36; ++base) { for (int first_digit = 0; first_digit <= 36; ++first_digit) { - small_string[0] = static_cast( - LIBC_NAMESPACE::internal::int_to_b36_wchar(first_digit)); + small_string[0] = + LIBC_NAMESPACE::internal::int_to_b36_wchar(first_digit); for (int second_digit = 0; second_digit <= 36; ++second_digit) { - small_string[1] = static_cast( - LIBC_NAMESPACE::internal::int_to_b36_wchar(second_digit)); + small_string[1] = + LIBC_NAMESPACE::internal::int_to_b36_wchar(second_digit); for (int third_digit = 0; third_digit <= limit; ++third_digit) { - small_string[2] = static_cast( - LIBC_NAMESPACE::internal::int_to_b36_wchar(third_digit)); + small_string[2] = + LIBC_NAMESPACE::internal::int_to_b36_wchar(third_digit); if (first_digit < base && second_digit < base && third_digit < base) { diff --git a/libcxxabi/src/demangle/cp-to-llvm.sh b/libcxxabi/src/demangle/cp-to-llvm.sh index f773dff9f0a8b..9c1db6fec29a6 100755 --- a/libcxxabi/src/demangle/cp-to-llvm.sh +++ b/libcxxabi/src/demangle/cp-to-llvm.sh @@ -42,6 +42,7 @@ copy_files() { chmod -w $dst/README.txt for I in $hdrs ; do + echo "Copying ${src}/$I to ${dst}/$I" rm -f $dst/$I dash=$(echo "$I---------------------------" | cut -c -27 |\ sed 's|[^-]*||') @@ -53,6 +54,6 @@ copy_files() { } if [[ $ANSWER =~ ^[Yy]$ ]]; then - copy_files . $LLVM_DEMANGLE_DIR $HDRS - copy_files ../../test $LLVM_TESTING_DIR $TEST_HDRS + copy_files . $LLVM_DEMANGLE_DIR "${HDRS}" + copy_files ../../test $LLVM_TESTING_DIR "${TEST_HDRS}" fi diff --git a/lldb/CMakeLists.txt b/lldb/CMakeLists.txt index 01b5546fee00d..0736e6ba132c8 100644 --- a/lldb/CMakeLists.txt +++ b/lldb/CMakeLists.txt @@ -62,11 +62,16 @@ if (LLDB_ENABLE_PYTHON) set(cachestring_LLDB_PYTHON_EXT_SUFFIX "Filename extension for native code python modules") + if (LLDB_ENABLE_PYTHON_LIMITED_API) + set(stable_abi "--stable-abi") + endif() + foreach(var LLDB_PYTHON_RELATIVE_PATH LLDB_PYTHON_EXE_RELATIVE_PATH LLDB_PYTHON_EXT_SUFFIX) if(NOT DEFINED ${var} AND NOT CMAKE_CROSSCOMPILING) execute_process( COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/bindings/python/get-python-config.py + ${stable_abi} ${var} OUTPUT_VARIABLE value OUTPUT_STRIP_TRAILING_WHITESPACE) diff --git a/lldb/bindings/python/get-python-config.py b/lldb/bindings/python/get-python-config.py index ae84cbb1215a9..bf8cc48b013e1 100755 --- a/lldb/bindings/python/get-python-config.py +++ b/lldb/bindings/python/get-python-config.py @@ -18,6 +18,9 @@ def relpath_nodots(path, base): def main(): parser = argparse.ArgumentParser(description="extract cmake variables from python") parser.add_argument("variable_name") + parser.add_argument( + "--stable-abi", action="store_true", help="Target the Stable C ABI" + ) args = parser.parse_args() if args.variable_name == "LLDB_PYTHON_RELATIVE_PATH": # LLDB_PYTHON_RELATIVE_PATH is the relative path from lldb's prefix @@ -68,7 +71,10 @@ def main(): print("sys.prefix:", sys.prefix, file=sys.stderr) sys.exit(1) elif args.variable_name == "LLDB_PYTHON_EXT_SUFFIX": - print(sysconfig.get_config_var("EXT_SUFFIX")) + if args.stable_abi: + print(".abi3%s" % sysconfig.get_config_var("SHLIB_SUFFIX")) + else: + print(sysconfig.get_config_var("EXT_SUFFIX")) else: parser.error(f"unknown variable {args.variable_name}") diff --git a/lldb/include/lldb/lldb-private-types.h b/lldb/include/lldb/lldb-private-types.h index b82a2b8aa0574..185467e91bf62 100644 --- a/lldb/include/lldb/lldb-private-types.h +++ b/lldb/include/lldb/lldb-private-types.h @@ -102,13 +102,18 @@ struct RegisterSet { /// A type-erased pair of llvm::dwarf::SourceLanguageName and version. struct SourceLanguage { SourceLanguage() = default; - SourceLanguage(lldb::LanguageType language_type); + explicit SourceLanguage(lldb::LanguageType language_type); + SourceLanguage(uint16_t name, uint32_t version) : name(name), version(version) {} - SourceLanguage(std::optional> name_vers) + + explicit SourceLanguage( + std::optional> name_vers) : name(name_vers ? name_vers->first : 0), version(name_vers ? name_vers->second : 0) {} - operator bool() const { return name > 0; } + + explicit operator bool() const { return name > 0; } + lldb::LanguageType AsLanguageType() const; llvm::StringRef GetDescription() const; bool IsC() const; diff --git a/lldb/source/Breakpoint/BreakpointLocation.cpp b/lldb/source/Breakpoint/BreakpointLocation.cpp index f25209c15e007..25285beb7ffd5 100644 --- a/lldb/source/Breakpoint/BreakpointLocation.cpp +++ b/lldb/source/Breakpoint/BreakpointLocation.cpp @@ -251,7 +251,7 @@ bool BreakpointLocation::ConditionSaysStop(ExecutionContext &exe_ctx, } m_user_expression_sp.reset(GetTarget().GetUserExpressionForLanguage( - condition.GetText(), llvm::StringRef(), language, + condition.GetText(), llvm::StringRef(), SourceLanguage{language}, Expression::eResultTypeAny, EvaluateExpressionOptions(), nullptr, error)); if (error.Fail()) { diff --git a/lldb/source/Commands/CommandObjectDWIMPrint.cpp b/lldb/source/Commands/CommandObjectDWIMPrint.cpp index 0d9eb45732161..40f00c90bbbfb 100644 --- a/lldb/source/Commands/CommandObjectDWIMPrint.cpp +++ b/lldb/source/Commands/CommandObjectDWIMPrint.cpp @@ -95,9 +95,9 @@ void CommandObjectDWIMPrint::DoExecute(StringRef command, StackFrame *frame = m_exe_ctx.GetFramePtr(); // Either the language was explicitly specified, or we check the frame. - lldb::LanguageType language = m_expr_options.language; - if (language == lldb::eLanguageTypeUnknown && frame) - language = frame->GuessLanguage().AsLanguageType(); + SourceLanguage language{m_expr_options.language}; + if (!language && frame) + language = frame->GuessLanguage(); // Add a hint if object description was requested, but no description // function was implemented. @@ -119,8 +119,8 @@ void CommandObjectDWIMPrint::DoExecute(StringRef command, "^<\\S+: 0x[[:xdigit:]]{5,}>\\s*$"); if (GetDebugger().GetShowDontUsePoHint() && target_ptr && - (language == lldb::eLanguageTypeSwift || - language == lldb::eLanguageTypeObjC) && + (language.AsLanguageType() == lldb::eLanguageTypeSwift || + language.IsObjC()) && std::regex_match(output.data(), swift_class_regex)) { result.AppendNote( @@ -193,7 +193,8 @@ void CommandObjectDWIMPrint::DoExecute(StringRef command, // Second, try `expr` as a persistent variable. if (expr.starts_with("$")) - if (auto *state = target.GetPersistentExpressionStateForLanguage(language)) + if (auto *state = target.GetPersistentExpressionStateForLanguage( + language.AsLanguageType())) if (auto var_sp = state->GetVariable(expr)) if (auto valobj_sp = var_sp->GetValueObject()) { dump_val_object(*valobj_sp); diff --git a/lldb/source/Expression/UserExpression.cpp b/lldb/source/Expression/UserExpression.cpp index af4b477660eeb..5563eba21777e 100644 --- a/lldb/source/Expression/UserExpression.cpp +++ b/lldb/source/Expression/UserExpression.cpp @@ -246,7 +246,7 @@ UserExpression::Evaluate(ExecutionContext &exe_ctx, // language in the target's properties if specified, else default to the // langage for the frame. if (!language) { - if (target->GetLanguage() != lldb::eLanguageTypeUnknown) + if (target->GetLanguage()) language = target->GetLanguage(); else if (StackFrame *frame = exe_ctx.GetFramePtr()) language = frame->GetLanguage(); diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp index 990074566be7e..6bab880b4d521 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp @@ -1502,7 +1502,7 @@ lldb_private::Status ClangExpressionParser::DoPrepareForExecution( LLDB_LOGF(log, "%s - Current expression language is %s\n", __FUNCTION__, lang.GetDescription().data()); lldb::ProcessSP process_sp = exe_ctx.GetProcessSP(); - if (process_sp && lang != lldb::eLanguageTypeUnknown) { + if (process_sp && lang) { auto runtime = process_sp->GetLanguageRuntime(lang.AsLanguageType()); if (runtime) runtime->GetIRPasses(custom_passes); diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/UdtRecordCompleter.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/UdtRecordCompleter.cpp index 1c575e90bd72c..46cf9b8524ede 100644 --- a/lldb/source/Plugins/SymbolFile/NativePDB/UdtRecordCompleter.cpp +++ b/lldb/source/Plugins/SymbolFile/NativePDB/UdtRecordCompleter.cpp @@ -442,6 +442,10 @@ void UdtRecordCompleter::Record::ConstructRecord() { // The end offset to a vector of field/struct that ends at the offset. std::map> end_offset_map; + auto is_last_end_offset = [&](auto it) { + return it != end_offset_map.end() && ++it == end_offset_map.end(); + }; + for (auto &pair : fields_map) { uint64_t offset = pair.first; auto &fields = pair.second; @@ -462,8 +466,23 @@ void UdtRecordCompleter::Record::ConstructRecord() { } if (iter->second.empty()) continue; - parent = iter->second.back(); - iter->second.pop_back(); + + // If the new fields come after the already added ones + // without overlap, go back to the root. + if (iter->first <= offset && is_last_end_offset(iter)) { + if (record.kind == Member::Struct) { + parent = &record; + } else { + assert(record.kind == Member::Union && + "Current record must be a union"); + assert(!record.fields.empty()); + // For unions, append the field to the last struct + parent = record.fields.back().get(); + } + } else { + parent = iter->second.back(); + iter->second.pop_back(); + } } // If it's a field, then the field is inside a union, so we can safely // increase its size by converting it to a struct to hold multiple fields. diff --git a/lldb/source/Target/StackFrame.cpp b/lldb/source/Target/StackFrame.cpp index 2ed58c5331df4..95b515412d693 100644 --- a/lldb/source/Target/StackFrame.cpp +++ b/lldb/source/Target/StackFrame.cpp @@ -1344,18 +1344,18 @@ const char *StackFrame::GetDisplayFunctionName() { SourceLanguage StackFrame::GetLanguage() { CompileUnit *cu = GetSymbolContext(eSymbolContextCompUnit).comp_unit; if (cu) - return cu->GetLanguage(); + return SourceLanguage{cu->GetLanguage()}; return {}; } SourceLanguage StackFrame::GuessLanguage() { SourceLanguage lang_type = GetLanguage(); - if (lang_type == eLanguageTypeUnknown) { + if (!lang_type) { SymbolContext sc = GetSymbolContext(eSymbolContextFunction | eSymbolContextSymbol); if (sc.function) - lang_type = LanguageType(sc.function->GetMangled().GuessLanguage()); + lang_type = SourceLanguage(sc.function->GetMangled().GuessLanguage()); else if (sc.symbol) lang_type = SourceLanguage(sc.symbol->GetMangled().GuessLanguage()); } diff --git a/lldb/source/Target/Target.cpp b/lldb/source/Target/Target.cpp index a23091ad09c6d..e53fc7a1e1bda 100644 --- a/lldb/source/Target/Target.cpp +++ b/lldb/source/Target/Target.cpp @@ -4945,7 +4945,7 @@ void TargetProperties::SetStandardErrorPath(llvm::StringRef path) { SourceLanguage TargetProperties::GetLanguage() const { const uint32_t idx = ePropertyLanguage; - return {GetPropertyAtIndexAs(idx, {})}; + return SourceLanguage{GetPropertyAtIndexAs(idx, {})}; } llvm::StringRef TargetProperties::GetExpressionPrefixContents() { diff --git a/lldb/test/Shell/SymbolFile/NativePDB/class_layout.cpp b/lldb/test/Shell/SymbolFile/NativePDB/class_layout.cpp index 36bfdb9a8e565..83ed533eb13e3 100644 --- a/lldb/test/Shell/SymbolFile/NativePDB/class_layout.cpp +++ b/lldb/test/Shell/SymbolFile/NativePDB/class_layout.cpp @@ -34,9 +34,6 @@ // CHECK-NEXT: s4 = { // CHECK-NEXT: x = ([0] = 67, [1] = 68, [2] = 99) // CHECK-NEXT: } -// CHECK-NEXT: s1 = { -// CHECK-NEXT: x = ([0] = 69, [1] = 70, [2] = 71) -// CHECK-NEXT: } // CHECK-NEXT: } // CHECK-NEXT: } // CHECK-NEXT: } @@ -47,6 +44,9 @@ // CHECK-NEXT: c2 = 'D' // CHECK-NEXT: } // CHECK-NEXT: } +// CHECK-NEXT: s1 = { +// CHECK-NEXT: x = ([0] = 69, [1] = 70, [2] = 71) +// CHECK-NEXT: } // CHECK-NEXT: } // CHECK-NEXT: (lldb) type lookup C // CHECK-NEXT: struct C { @@ -63,7 +63,6 @@ // CHECK-NEXT: struct { // CHECK-NEXT: char c4; // CHECK-NEXT: S3 s4; -// CHECK-NEXT: S3 s1; // CHECK-NEXT: }; // CHECK-NEXT: }; // CHECK-NEXT: }; @@ -72,6 +71,7 @@ // CHECK-NEXT: char c2; // CHECK-NEXT: }; // CHECK-NEXT: }; +// CHECK-NEXT: S3 s1; // CHECK-NEXT: } diff --git a/lldb/tools/debugserver/source/MacOSX/MachVMRegion.cpp b/lldb/tools/debugserver/source/MacOSX/MachVMRegion.cpp index 9d0d60fdaaed9..c8dce75af05eb 100644 --- a/lldb/tools/debugserver/source/MacOSX/MachVMRegion.cpp +++ b/lldb/tools/debugserver/source/MacOSX/MachVMRegion.cpp @@ -14,6 +14,12 @@ #include "DNBLog.h" #include #include +#include + +// From , but not on older OSs. +#ifndef VM_MEMORY_SANITIZER +#define VM_MEMORY_SANITIZER 99 +#endif MachVMRegion::MachVMRegion(task_t task) : m_task(task), m_addr(INVALID_NUB_ADDRESS), m_err(), diff --git a/lldb/tools/lldb-dap/src-ts/debug-adapter-factory.ts b/lldb/tools/lldb-dap/src-ts/debug-adapter-factory.ts index 7060638a94864..433d48fab9d85 100644 --- a/lldb/tools/lldb-dap/src-ts/debug-adapter-factory.ts +++ b/lldb/tools/lldb-dap/src-ts/debug-adapter-factory.ts @@ -6,6 +6,7 @@ import * as fs from "node:fs/promises"; import { ConfigureButton, OpenSettingsButton } from "./ui/show-error-message"; import { ErrorWithNotification } from "./ui/error-with-notification"; import { LogFilePathProvider, LogType } from "./logging"; +import { expandUser } from "./utils"; const exec = util.promisify(child_process.execFile); @@ -116,8 +117,9 @@ async function getDAPExecutable( configuration: vscode.DebugConfiguration, ): Promise { // Check if the executable was provided in the launch configuration. - const launchConfigPath = configuration["debugAdapterExecutable"]; + let launchConfigPath = configuration["debugAdapterExecutable"]; if (typeof launchConfigPath === "string" && launchConfigPath.length !== 0) { + launchConfigPath = expandUser(launchConfigPath); if (!(await isExecutable(launchConfigPath))) { throw new ErrorWithNotification( `Debug adapter path "${launchConfigPath}" is not a valid file. The path comes from your launch configuration.`, @@ -129,7 +131,7 @@ async function getDAPExecutable( // Check if the executable was provided in the extension's configuration. const config = vscode.workspace.getConfiguration("lldb-dap", workspaceFolder); - const configPath = config.get("executable-path"); + const configPath = expandUser(config.get("executable-path") ?? ""); if (configPath && configPath.length !== 0) { if (!(await isExecutable(configPath))) { throw new ErrorWithNotification( diff --git a/lldb/tools/lldb-dap/src-ts/utils.ts b/lldb/tools/lldb-dap/src-ts/utils.ts new file mode 100644 index 0000000000000..efebe0b0f42ba --- /dev/null +++ b/lldb/tools/lldb-dap/src-ts/utils.ts @@ -0,0 +1,41 @@ +import * as os from "os"; +import * as path from "path"; + +/** + * Expands the character `~` to the user's home directory + */ +export function expandUser(file_path: string): string { + if (os.platform() == "win32") { + return file_path; + } + + if (!file_path) { + return ""; + } + + if (!file_path.startsWith("~")) { + return file_path; + } + + const path_len = file_path.length; + if (path_len == 1) { + return os.homedir(); + } + + if (file_path.charAt(1) == path.sep) { + return path.join(os.homedir(), file_path.substring(1)); + } + + const sep_index = file_path.indexOf(path.sep); + const user_name_end = sep_index == -1 ? file_path.length : sep_index; + const user_name = file_path.substring(1, user_name_end); + try { + if (user_name == os.userInfo().username) { + return path.join(os.homedir(), file_path.substring(user_name_end)); + } + } catch (err) { + return file_path; + } + + return file_path; +} diff --git a/lldb/unittests/SymbolFile/NativePDB/UdtRecordCompleterTests.cpp b/lldb/unittests/SymbolFile/NativePDB/UdtRecordCompleterTests.cpp index 17284b61b9a6e..cd6db5fcb1f4c 100644 --- a/lldb/unittests/SymbolFile/NativePDB/UdtRecordCompleterTests.cpp +++ b/lldb/unittests/SymbolFile/NativePDB/UdtRecordCompleterTests.cpp @@ -99,7 +99,7 @@ Member *AddField(Member *member, StringRef name, uint64_t byte_offset, std::make_unique(name, byte_offset * 8, byte_size * 8, clang::QualType(), lldb::eAccessPublic, 0); field->kind = kind; - field->base_offset = base_offset; + field->base_offset = base_offset * 8; member->fields.push_back(std::move(field)); return member->fields.back().get(); } @@ -111,6 +111,9 @@ TEST_F(UdtRecordCompleterRecordTests, TestAnonymousUnionInStruct) { CollectMember("m2", 0, 4); CollectMember("m3", 0, 1); CollectMember("m4", 0, 8); + CollectMember("m5", 8, 8); + CollectMember("m6", 16, 4); + CollectMember("m7", 16, 8); ConstructRecord(); // struct { @@ -120,6 +123,11 @@ TEST_F(UdtRecordCompleterRecordTests, TestAnonymousUnionInStruct) { // m3; // m4; // }; + // m5; + // union { + // m6; + // m7; + // }; // }; Record record; record.start_offset = 0; @@ -128,6 +136,10 @@ TEST_F(UdtRecordCompleterRecordTests, TestAnonymousUnionInStruct) { AddField(u, "m2", 0, 4, Member::Field); AddField(u, "m3", 0, 1, Member::Field); AddField(u, "m4", 0, 8, Member::Field); + AddField(&record.record, "m5", 8, 8, Member::Field); + Member *u2 = AddField(&record.record, "", 16, 0, Member::Union); + AddField(u2, "m6", 16, 4, Member::Field); + AddField(u2, "m7", 16, 8, Member::Field); EXPECT_EQ(WrappedRecord(this->record), WrappedRecord(record)); } @@ -243,3 +255,41 @@ TEST_F(UdtRecordCompleterRecordTests, TestNestedUnionStructInUnion) { AddField(s2, "m4", 2, 4, Member::Field); EXPECT_EQ(WrappedRecord(this->record), WrappedRecord(record)); } + +TEST_F(UdtRecordCompleterRecordTests, TestNestedStructInUnionInStructInUnion) { + SetKind(Member::Kind::Union); + CollectMember("m1", 0, 4); + CollectMember("m2", 0, 2); + CollectMember("m3", 0, 2); + CollectMember("m4", 2, 4); + CollectMember("m5", 6, 2); + CollectMember("m6", 6, 2); + CollectMember("m7", 8, 2); + ConstructRecord(); + + // union { + // m1; + // m2; + // struct { + // m3; + // m4; + // union { + // m5; + // m6; + // }; + // m7; + // }; + // }; + Record record; + record.start_offset = 0; + AddField(&record.record, "m1", 0, 4, Member::Field); + AddField(&record.record, "m2", 0, 2, Member::Field); + Member *s = AddField(&record.record, "", 0, 0, Member::Struct); + AddField(s, "m3", 0, 2, Member::Field); + AddField(s, "m4", 2, 4, Member::Field); + Member *u = AddField(s, "", 6, 0, Member::Union); + AddField(u, "m5", 6, 2, Member::Field); + AddField(u, "m6", 6, 2, Member::Field); + AddField(s, "m7", 8, 2, Member::Field); + EXPECT_EQ(WrappedRecord(this->record), WrappedRecord(record)); +} diff --git a/llvm/docs/GoldPlugin.rst b/llvm/docs/GoldPlugin.rst index 07d2fc203eba5..606f9e0820e60 100644 --- a/llvm/docs/GoldPlugin.rst +++ b/llvm/docs/GoldPlugin.rst @@ -83,7 +83,7 @@ which is why you otherwise need gold to be the installed system linker in your path. ``ar`` and ``nm`` also accept the ``-plugin`` option and it's possible to -to install ``LLVMgold.so`` to ``/usr/lib/bfd-plugins`` for a seamless setup. +install ``LLVMgold.so`` to ``/usr/lib/bfd-plugins`` for a seamless setup. If you built your own gold, be sure to install the ``ar`` and ``nm-new`` you built to ``/usr/bin``. @@ -143,7 +143,7 @@ Quickstart for using LTO with autotooled projects ================================================= Once your system ``ld``, ``ar``, and ``nm`` all support LLVM bitcode, -everything is in place for an easy to use LTO build of autotooled projects: +everything is in place for an easy-to-use LTO build of autotooled projects: * Follow the instructions :ref:`on how to build LLVMgold.so `. diff --git a/llvm/include/llvm/ADT/STLForwardCompat.h b/llvm/include/llvm/ADT/STLForwardCompat.h index 3511776d3e4c1..528d14d71a1d9 100644 --- a/llvm/include/llvm/ADT/STLForwardCompat.h +++ b/llvm/include/llvm/ADT/STLForwardCompat.h @@ -143,7 +143,10 @@ struct identity // NOLINT(readability-identifier-naming) /// The std::pointer_traits<>::to_address(p) variations of these overloads has /// not been implemented. template auto to_address(const Ptr &P) { return P.operator->(); } -template constexpr T *to_address(T *P) { return P; } +template constexpr T *to_address(T *P) { + static_assert(!std::is_function_v); + return P; +} //===----------------------------------------------------------------------===// // Features from C++23 diff --git a/llvm/include/llvm/Analysis/DXILMetadataAnalysis.h b/llvm/include/llvm/Analysis/DXILMetadataAnalysis.h index cb535ac14f1c6..a1b030c157eae 100644 --- a/llvm/include/llvm/Analysis/DXILMetadataAnalysis.h +++ b/llvm/include/llvm/Analysis/DXILMetadataAnalysis.h @@ -27,6 +27,9 @@ struct EntryProperties { unsigned NumThreadsX{0}; // X component unsigned NumThreadsY{0}; // Y component unsigned NumThreadsZ{0}; // Z component + unsigned WaveSizeMin{0}; // Minimum component + unsigned WaveSizeMax{0}; // Maximum component + unsigned WaveSizePref{0}; // Preferred component EntryProperties(const Function *Fn = nullptr) : Entry(Fn) {}; }; diff --git a/llvm/include/llvm/CodeGen/LibcallLoweringInfo.h b/llvm/include/llvm/CodeGen/LibcallLoweringInfo.h new file mode 100644 index 0000000000000..e8eceeed6aca6 --- /dev/null +++ b/llvm/include/llvm/CodeGen/LibcallLoweringInfo.h @@ -0,0 +1,66 @@ +//===- LibcallLoweringInfo.h ------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/IR/RuntimeLibcalls.h" + +namespace llvm { + +class LibcallLoweringInfo { +private: + LLVM_ABI const RTLIB::RuntimeLibcallsInfo &RTLCI; + /// Stores the implementation choice for each each libcall. + LLVM_ABI RTLIB::LibcallImpl LibcallImpls[RTLIB::UNKNOWN_LIBCALL + 1] = { + RTLIB::Unsupported}; + +public: + LLVM_ABI LibcallLoweringInfo(const RTLIB::RuntimeLibcallsInfo &RTLCI); + + /// Get the libcall routine name for the specified libcall. + // FIXME: This should be removed. Only LibcallImpl should have a name. + LLVM_ABI const char *getLibcallName(RTLIB::Libcall Call) const { + // FIXME: Return StringRef + return RTLIB::RuntimeLibcallsInfo::getLibcallImplName(LibcallImpls[Call]) + .data(); + } + + /// Return the lowering's selection of implementation call for \p Call + LLVM_ABI RTLIB::LibcallImpl getLibcallImpl(RTLIB::Libcall Call) const { + return LibcallImpls[Call]; + } + + /// Rename the default libcall routine name for the specified libcall. + LLVM_ABI void setLibcallImpl(RTLIB::Libcall Call, RTLIB::LibcallImpl Impl) { + LibcallImpls[Call] = Impl; + } + + // FIXME: Remove this wrapper in favor of directly using + // getLibcallImplCallingConv + LLVM_ABI CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const { + return RTLCI.LibcallImplCallingConvs[LibcallImpls[Call]]; + } + + /// Get the CallingConv that should be used for the specified libcall. + LLVM_ABI CallingConv::ID + getLibcallImplCallingConv(RTLIB::LibcallImpl Call) const { + return RTLCI.LibcallImplCallingConvs[Call]; + } + + /// Return a function impl compatible with RTLIB::MEMCPY, or + /// RTLIB::Unsupported if fully unsupported. + RTLIB::LibcallImpl getMemcpyImpl() const { + RTLIB::LibcallImpl Memcpy = getLibcallImpl(RTLIB::MEMCPY); + if (Memcpy == RTLIB::Unsupported) { + // Fallback to memmove if memcpy isn't available. + return getLibcallImpl(RTLIB::MEMMOVE); + } + + return Memcpy; + } +}; + +} // end namespace llvm diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index b229659415d55..8aeaa9cdacfc1 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -29,6 +29,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/DAGCombine.h" #include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/LibcallLoweringInfo.h" #include "llvm/CodeGen/LowLevelTypeUtils.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RuntimeLibcallUtil.h" @@ -3232,6 +3233,11 @@ class LLVM_ABI TargetLoweringBase { /// Default to be the minimum interleave factor: 2. virtual unsigned getMaxSupportedInterleaveFactor() const { return 2; } + /// Return true if the target interleave with shuffles are cheaper + virtual bool isProfitableToInterleaveWithGatherScatter() const { + return false; + } + /// Lower an interleaved load to target specific intrinsics. Return /// true on success. /// @@ -3597,7 +3603,7 @@ class LLVM_ABI TargetLoweringBase { } const RTLIB::RuntimeLibcallsInfo &getRuntimeLibcallsInfo() const { - return Libcalls; + return RuntimeLibcallInfo; } void setLibcallImpl(RTLIB::Libcall Call, RTLIB::LibcallImpl Impl) { @@ -3610,9 +3616,9 @@ class LLVM_ABI TargetLoweringBase { } /// Get the libcall routine name for the specified libcall. + // FIXME: This should be removed. Only LibcallImpl should have a name. const char *getLibcallName(RTLIB::Libcall Call) const { - // FIXME: Return StringRef - return Libcalls.getLibcallName(Call).data(); + return Libcalls.getLibcallName(Call); } /// Get the libcall routine name for the specified libcall implementation @@ -3625,7 +3631,7 @@ class LLVM_ABI TargetLoweringBase { /// Check if this is valid libcall for the current module, otherwise /// RTLIB::Unsupported. RTLIB::LibcallImpl getSupportedLibcallImpl(StringRef FuncName) const { - return Libcalls.getSupportedLibcallImpl(FuncName); + return RuntimeLibcallInfo.getSupportedLibcallImpl(FuncName); } /// Get the comparison predicate that's to be used to test the result of the @@ -3633,11 +3639,6 @@ class LLVM_ABI TargetLoweringBase { /// floating-point compare libcalls. ISD::CondCode getSoftFloatCmpLibcallPredicate(RTLIB::LibcallImpl Call) const; - /// Set the CallingConv that should be used for the specified libcall. - void setLibcallImplCallingConv(RTLIB::LibcallImpl Call, CallingConv::ID CC) { - Libcalls.setLibcallImplCallingConv(Call, CC); - } - /// Get the CallingConv that should be used for the specified libcall /// implementation. CallingConv::ID getLibcallImplCallingConv(RTLIB::LibcallImpl Call) const { @@ -3834,8 +3835,11 @@ class LLVM_ABI TargetLoweringBase { std::map, MVT::SimpleValueType> PromoteToType; + /// FIXME: This should not live here; it should come from an analysis. + const RTLIB::RuntimeLibcallsInfo RuntimeLibcallInfo; + /// The list of libcalls that the target will use. - RTLIB::RuntimeLibcallsInfo Libcalls; + LibcallLoweringInfo Libcalls; /// The bits of IndexedModeActions used to store the legalisation actions /// We store the data as | ML | MS | L | S | each taking 4 bits. diff --git a/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h b/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h index 155cfe8dd3a98..711aa70a4a8d3 100644 --- a/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h +++ b/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h @@ -708,7 +708,7 @@ struct DEMANGLE_ABI SpecialTableSymbolNode : public SymbolNode { return N->kind() == NodeKind::SpecialTableSymbol; } - QualifiedNameNode *TargetName = nullptr; + NodeArrayNode *TargetNames = nullptr; Qualifiers Quals = Qualifiers::Q_None; }; diff --git a/llvm/include/llvm/IR/ProfDataUtils.h b/llvm/include/llvm/IR/ProfDataUtils.h index a0876b169e0b8..a7bcbf010d1bf 100644 --- a/llvm/include/llvm/IR/ProfDataUtils.h +++ b/llvm/include/llvm/IR/ProfDataUtils.h @@ -194,10 +194,11 @@ LLVM_ABI void setExplicitlyUnknownBranchWeights(Instruction &I, /// Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch /// weights in the new instruction if the parent function of the original /// instruction has an entry count. This is to not confuse users by injecting -/// profile data into non-profiled functions. -LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, - Function &F, - StringRef PassName); +/// profile data into non-profiled functions. If \p F is nullptr, we will fetch +/// the function from \p I. +LLVM_ABI void +setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, StringRef PassName, + const Function *F = nullptr); /// Analogous to setExplicitlyUnknownBranchWeights, but for functions and their /// entry counts. diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.h b/llvm/include/llvm/IR/RuntimeLibcalls.h index bae760b3f981d..78e4b1723aafa 100644 --- a/llvm/include/llvm/IR/RuntimeLibcalls.h +++ b/llvm/include/llvm/IR/RuntimeLibcalls.h @@ -42,6 +42,8 @@ template <> struct enum_iteration_traits { static constexpr bool is_iterable = true; }; +class LibcallLoweringInfo; + namespace RTLIB { // Return an iterator over all Libcall values. @@ -70,6 +72,8 @@ struct RuntimeLibcallsInfo { LibcallImplBitset AvailableLibcallImpls; public: + friend class llvm::LibcallLoweringInfo; + explicit RuntimeLibcallsInfo( const Triple &TT, ExceptionHandling ExceptionModel = ExceptionHandling::None, @@ -85,17 +89,6 @@ struct RuntimeLibcallsInfo { initLibcalls(TT, ExceptionModel, FloatABI, EABIVersion, ABIName); } - /// Rename the default libcall routine name for the specified libcall. - void setLibcallImpl(RTLIB::Libcall Call, RTLIB::LibcallImpl Impl) { - LibcallImpls[Call] = Impl; - } - - /// Get the libcall routine name for the specified libcall. - // FIXME: This should be removed. Only LibcallImpl should have a name. - StringRef getLibcallName(RTLIB::Libcall Call) const { - return getLibcallImplName(LibcallImpls[Call]); - } - /// Get the libcall routine name for the specified libcall implementation. static StringRef getLibcallImplName(RTLIB::LibcallImpl CallImpl) { if (CallImpl == RTLIB::Unsupported) @@ -105,42 +98,24 @@ struct RuntimeLibcallsInfo { RuntimeLibcallNameSizeTable[CallImpl]); } - /// Return the lowering's selection of implementation call for \p Call - RTLIB::LibcallImpl getLibcallImpl(RTLIB::Libcall Call) const { - return LibcallImpls[Call]; - } - /// Set the CallingConv that should be used for the specified libcall /// implementation void setLibcallImplCallingConv(RTLIB::LibcallImpl Call, CallingConv::ID CC) { LibcallImplCallingConvs[Call] = CC; } - // FIXME: Remove this wrapper in favor of directly using - // getLibcallImplCallingConv - CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const { - return LibcallImplCallingConvs[LibcallImpls[Call]]; - } - /// Get the CallingConv that should be used for the specified libcall. CallingConv::ID getLibcallImplCallingConv(RTLIB::LibcallImpl Call) const { return LibcallImplCallingConvs[Call]; } - ArrayRef getLibcallImpls() const { - // Trim UNKNOWN_LIBCALL from the back - return ArrayRef(LibcallImpls).drop_back(); + /// Return the libcall provided by \p Impl + static RTLIB::Libcall getLibcallFromImpl(RTLIB::LibcallImpl Impl) { + return ImplToLibcall[Impl]; } - /// Return a function name compatible with RTLIB::MEMCPY, or nullptr if fully - /// unsupported. - RTLIB::LibcallImpl getMemcpyImpl() const { - RTLIB::LibcallImpl Memcpy = getLibcallImpl(RTLIB::MEMCPY); - if (Memcpy != RTLIB::Unsupported) - return Memcpy; - - // Fallback to memmove if memcpy isn't available. - return getLibcallImpl(RTLIB::MEMMOVE); + unsigned getNumAvailableLibcallImpls() const { + return AvailableLibcallImpls.count(); } bool isAvailable(RTLIB::LibcallImpl Impl) const { @@ -151,11 +126,6 @@ struct RuntimeLibcallsInfo { AvailableLibcallImpls.set(Impl); } - /// Return the libcall provided by \p Impl - static RTLIB::Libcall getLibcallFromImpl(RTLIB::LibcallImpl Impl) { - return ImplToLibcall[Impl]; - } - /// Check if a function name is a recognized runtime call of any kind. This /// does not consider if this call is available for any current compilation, /// just that it is a known call somewhere. This returns the set of all @@ -176,11 +146,8 @@ struct RuntimeLibcallsInfo { LLVM_ABI RTLIB::LibcallImpl getSupportedLibcallImpl(StringRef FuncName) const { for (RTLIB::LibcallImpl Impl : lookupLibcallImplName(FuncName)) { - // FIXME: This should not depend on looking up ImplToLibcall, only the - // list of libcalls for the module. - RTLIB::LibcallImpl Recognized = LibcallImpls[ImplToLibcall[Impl]]; - if (Recognized != RTLIB::Unsupported) - return Recognized; + if (isAvailable(Impl)) + return Impl; } return RTLIB::Unsupported; @@ -197,10 +164,6 @@ struct RuntimeLibcallsInfo { LLVM_ABI static iota_range lookupLibcallImplNameImpl(StringRef Name); - /// Stores the implementation choice for each each libcall. - RTLIB::LibcallImpl LibcallImpls[RTLIB::UNKNOWN_LIBCALL + 1] = { - RTLIB::Unsupported}; - static_assert(static_cast(CallingConv::C) == 0, "default calling conv should be encoded as 0"); @@ -274,6 +237,7 @@ struct RuntimeLibcallsInfo { }; } // namespace RTLIB + } // namespace llvm #endif // LLVM_IR_RUNTIME_LIBCALLS_H diff --git a/llvm/include/llvm/IR/RuntimeLibcallsImpl.td b/llvm/include/llvm/IR/RuntimeLibcallsImpl.td index b5752c1b69ad8..92853125379f5 100644 --- a/llvm/include/llvm/IR/RuntimeLibcallsImpl.td +++ b/llvm/include/llvm/IR/RuntimeLibcallsImpl.td @@ -61,7 +61,6 @@ class RuntimeLibcall { class RuntimeLibcallImpl { RuntimeLibcall Provides = P; string LibCallFuncName = Name; - list LoweringPredicates; bit IsDefault = false; } diff --git a/llvm/include/llvm/Support/MathExtras.h b/llvm/include/llvm/Support/MathExtras.h index 9bbb8a2a30541..0a253efc2abcb 100644 --- a/llvm/include/llvm/Support/MathExtras.h +++ b/llvm/include/llvm/Support/MathExtras.h @@ -225,7 +225,7 @@ inline constexpr int64_t minIntN(int64_t N) { if (N == 0) return 0; - return UINT64_C(1) + ~(UINT64_C(1) << (N - 1)); + return UINT64_MAX << (N - 1); } /// Gets the maximum value for a N-bit signed integer. @@ -241,7 +241,7 @@ inline constexpr int64_t maxIntN(int64_t N) { /// Checks if an unsigned integer fits into the given (dynamic) bit width. inline constexpr bool isUIntN(unsigned N, uint64_t x) { - return N >= 64 || x <= maxUIntN(N); + return N >= 64 || (x >> N) == 0; } /// Checks if an signed integer fits into the given (dynamic) bit width. diff --git a/llvm/lib/Analysis/DXILMetadataAnalysis.cpp b/llvm/lib/Analysis/DXILMetadataAnalysis.cpp index 23f1aa82ae8a3..bd77cba385667 100644 --- a/llvm/lib/Analysis/DXILMetadataAnalysis.cpp +++ b/llvm/lib/Analysis/DXILMetadataAnalysis.cpp @@ -66,6 +66,22 @@ static ModuleMetadataInfo collectMetadataInfo(Module &M) { Success = llvm::to_integer(NumThreadsVec[2], EFP.NumThreadsZ, 10); assert(Success && "Failed to parse Z component of numthreads"); } + // Get wavesize attribute value, if one exists + StringRef WaveSizeStr = + F.getFnAttribute("hlsl.wavesize").getValueAsString(); + if (!WaveSizeStr.empty()) { + SmallVector WaveSizeVec; + WaveSizeStr.split(WaveSizeVec, ','); + assert(WaveSizeVec.size() == 3 && "Invalid wavesize specified"); + // Read in the three component values of numthreads + [[maybe_unused]] bool Success = + llvm::to_integer(WaveSizeVec[0], EFP.WaveSizeMin, 10); + assert(Success && "Failed to parse Min component of wavesize"); + Success = llvm::to_integer(WaveSizeVec[1], EFP.WaveSizeMax, 10); + assert(Success && "Failed to parse Max component of wavesize"); + Success = llvm::to_integer(WaveSizeVec[2], EFP.WaveSizePref, 10); + assert(Success && "Failed to parse Preferred component of wavesize"); + } MMDAI.EntryPropertyVec.push_back(EFP); } return MMDAI; diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index f0ec9bf885c12..3f5387738c328 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1755,7 +1755,6 @@ void AsmPrinter::emitCallGraphSection(const MachineFunction &MF, OutStreamer->pushSection(); OutStreamer->switchSection(FuncCGSection); - const MCSymbol *FunctionSymbol = getFunctionBegin(); const Function &F = MF.getFunction(); // If this function has external linkage or has its address taken and // it is not a callback, then anything could call it. @@ -1794,7 +1793,7 @@ void AsmPrinter::emitCallGraphSection(const MachineFunction &MF, // 8) Each unique indirect target type id. OutStreamer->emitInt8(CallGraphSectionFormatVersion::V_0); OutStreamer->emitInt8(static_cast(CGFlags)); - OutStreamer->emitSymbolValue(FunctionSymbol, TM.getProgramPointerSize()); + OutStreamer->emitSymbolValue(getSymbol(&F), TM.getProgramPointerSize()); const auto *TypeId = extractNumericCGTypeId(F); if (IsIndirectTarget && TypeId) OutStreamer->emitInt64(TypeId->getZExtValue()); diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index 6412949948c07..d9bc042d6807e 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -1301,7 +1301,7 @@ Value *AtomicExpandImpl::insertRMWLLSCLoop( // Atomic RMW expands to a Load-linked / Store-Conditional loop, because it is // hard to predict precise branch weigths we mark the branch as "unknown" // (50/50) to prevent misleading optimizations. - setExplicitlyUnknownBranchWeightsIfProfiled(*CondBr, *F, DEBUG_TYPE); + setExplicitlyUnknownBranchWeightsIfProfiled(*CondBr, DEBUG_TYPE); Builder.SetInsertPoint(ExitBB, ExitBB->begin()); return Loaded; @@ -1686,7 +1686,12 @@ Value *AtomicExpandImpl::insertRMWCmpXchgLoop( Loaded->addIncoming(NewLoaded, LoopBB); - Builder.CreateCondBr(Success, ExitBB, LoopBB); + Instruction *CondBr = Builder.CreateCondBr(Success, ExitBB, LoopBB); + + // Atomic RMW expands to a cmpxchg loop, Since precise branch weights + // cannot be easily determined here, we mark the branch as "unknown" (50/50) + // to prevent misleading optimizations. + setExplicitlyUnknownBranchWeightsIfProfiled(*CondBr, DEBUG_TYPE); Builder.SetInsertPoint(ExitBB, ExitBB->begin()); return NewLoaded; diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt index 4373c5397a3c6..1cf0b4964760b 100644 --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -88,6 +88,7 @@ add_llvm_component_library(LLVMCodeGen LatencyPriorityQueue.cpp LazyMachineBlockFrequencyInfo.cpp LexicalScopes.cpp + LibcallLoweringInfo.cpp LiveDebugVariables.cpp LiveIntervals.cpp LiveInterval.cpp diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp index 5c27a20869f81..45eca28ffb8a2 100644 --- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp +++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp @@ -239,7 +239,8 @@ static bool isDeInterleaveMask(ArrayRef Mask, unsigned &Factor, /// I.e. <0, LaneLen, ... , LaneLen*(Factor - 1), 1, LaneLen + 1, ...> /// E.g. For a Factor of 2 (LaneLen=4): <0, 4, 1, 5, 2, 6, 3, 7> static bool isReInterleaveMask(ShuffleVectorInst *SVI, unsigned &Factor, - unsigned MaxFactor) { + unsigned MaxFactor, + bool InterleaveWithShuffles) { unsigned NumElts = SVI->getShuffleMask().size(); if (NumElts < 4) return false; @@ -250,6 +251,13 @@ static bool isReInterleaveMask(ShuffleVectorInst *SVI, unsigned &Factor, return true; } + if (InterleaveWithShuffles) { + for (unsigned i = 1; MaxFactor * i <= 16; i *= 2) { + Factor = i * MaxFactor; + if (SVI->isInterleave(Factor)) + return true; + } + } return false; } @@ -528,7 +536,8 @@ bool InterleavedAccessImpl::lowerInterleavedStore( cast(SVI->getType())->getNumElements(); // Check if the shufflevector is RE-interleave shuffle. unsigned Factor; - if (!isReInterleaveMask(SVI, Factor, MaxFactor)) + if (!isReInterleaveMask(SVI, Factor, MaxFactor, + TLI->isProfitableToInterleaveWithGatherScatter())) return false; assert(NumStoredElements % Factor == 0 && "number of stored element should be a multiple of Factor"); diff --git a/llvm/lib/CodeGen/LibcallLoweringInfo.cpp b/llvm/lib/CodeGen/LibcallLoweringInfo.cpp new file mode 100644 index 0000000000000..5c1698cb6060e --- /dev/null +++ b/llvm/lib/CodeGen/LibcallLoweringInfo.cpp @@ -0,0 +1,26 @@ +//===- LibcallLoweringInfo.cpp - Interface for runtime libcalls -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/LibcallLoweringInfo.h" + +using namespace llvm; + +LibcallLoweringInfo::LibcallLoweringInfo( + const RTLIB::RuntimeLibcallsInfo &RTLCI) + : RTLCI(RTLCI) { + // TODO: This should be generated with lowering predicates, and assert the + // call is available. + for (RTLIB::LibcallImpl Impl : RTLIB::libcall_impls()) { + if (RTLCI.isAvailable(Impl)) { + RTLIB::Libcall LC = RTLIB::RuntimeLibcallsInfo::getLibcallFromImpl(Impl); + // FIXME: Hack, assume the first available libcall wins. + if (LibcallImpls[LC] == RTLIB::Unsupported) + LibcallImpls[LC] = Impl; + } + } +} diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index b3535eaca5e9d..1cc591c17f9c3 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -697,9 +697,11 @@ ISD::CondCode TargetLoweringBase::getSoftFloatCmpLibcallPredicate( /// NOTE: The TargetMachine owns TLOF. TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) - : TM(tm), Libcalls(TM.getTargetTriple(), TM.Options.ExceptionModel, - TM.Options.FloatABIType, TM.Options.EABIVersion, - TM.Options.MCOptions.getABIName()) { + : TM(tm), + RuntimeLibcallInfo(TM.getTargetTriple(), TM.Options.ExceptionModel, + TM.Options.FloatABIType, TM.Options.EABIVersion, + TM.Options.MCOptions.getABIName()), + Libcalls(RuntimeLibcallInfo) { initActions(); // Perform these initializations only once. diff --git a/llvm/lib/Demangle/MicrosoftDemangle.cpp b/llvm/lib/Demangle/MicrosoftDemangle.cpp index b22928be3be50..0aefe6e077c24 100644 --- a/llvm/lib/Demangle/MicrosoftDemangle.cpp +++ b/llvm/lib/Demangle/MicrosoftDemangle.cpp @@ -277,6 +277,18 @@ demanglePointerCVQualifiers(std::string_view &MangledName) { DEMANGLE_UNREACHABLE; } +static NodeArrayNode *nodeListToNodeArray(ArenaAllocator &Arena, NodeList *Head, + size_t Count) { + NodeArrayNode *N = Arena.alloc(); + N->Count = Count; + N->Nodes = Arena.allocArray(Count); + for (size_t I = 0; I < Count; ++I) { + N->Nodes[I] = Head->N; + Head = Head->Next; + } + return N; +} + std::string_view Demangler::copyString(std::string_view Borrowed) { char *Stable = Arena.allocUnalignedBuffer(Borrowed.size()); // This is not a micro-optimization, it avoids UB, should Borrowed be an null @@ -323,8 +335,30 @@ Demangler::demangleSpecialTableSymbolNode(std::string_view &MangledName, } std::tie(STSN->Quals, IsMember) = demangleQualifiers(MangledName); - if (!consumeFront(MangledName, '@')) - STSN->TargetName = demangleFullyQualifiedTypeName(MangledName); + + NodeList *TargetCurrent = nullptr; + NodeList *TargetHead = nullptr; + size_t Count = 0; + while (!consumeFront(MangledName, '@')) { + ++Count; + + NodeList *Next = Arena.alloc(); + if (TargetCurrent) + TargetCurrent->Next = Next; + else + TargetHead = Next; + + TargetCurrent = Next; + QualifiedNameNode *QN = demangleFullyQualifiedTypeName(MangledName); + if (Error) + return nullptr; + assert(QN); + TargetCurrent->N = QN; + } + + if (Count > 0) + STSN->TargetNames = nodeListToNodeArray(Arena, TargetHead, Count); + return STSN; } @@ -1605,18 +1639,6 @@ Demangler::demangleNameScopePiece(std::string_view &MangledName) { return demangleSimpleName(MangledName, /*Memorize=*/true); } -static NodeArrayNode *nodeListToNodeArray(ArenaAllocator &Arena, NodeList *Head, - size_t Count) { - NodeArrayNode *N = Arena.alloc(); - N->Count = Count; - N->Nodes = Arena.allocArray(Count); - for (size_t I = 0; I < Count; ++I) { - N->Nodes[I] = Head->N; - Head = Head->Next; - } - return N; -} - QualifiedNameNode * Demangler::demangleNameScopeChain(std::string_view &MangledName, IdentifierNode *UnqualifiedName) { diff --git a/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp b/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp index 61e4961c714bc..17c6aab500049 100644 --- a/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp +++ b/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp @@ -662,9 +662,9 @@ void VcallThunkIdentifierNode::output(OutputBuffer &OB, void SpecialTableSymbolNode::output(OutputBuffer &OB, OutputFlags Flags) const { outputQualifiers(OB, Quals, false, true); Name->output(OB, Flags); - if (TargetName) { + if (TargetNames) { OB << "{for `"; - TargetName->output(OB, Flags); + TargetNames->output(OB, Flags, "'s `"); OB << "'}"; } } diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index 46cf60be1bafd..98f10a5a60f24 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -758,14 +758,12 @@ void TypePrinting::printStructBody(StructType *STy, raw_ostream &OS) { AbstractSlotTrackerStorage::~AbstractSlotTrackerStorage() = default; -namespace llvm { - //===----------------------------------------------------------------------===// // SlotTracker Class: Enumerate slot numbers for unnamed values //===----------------------------------------------------------------------===// /// This class provides computation of slot numbers for LLVM Assembly writing. /// -class SlotTracker : public AbstractSlotTrackerStorage { +class llvm::SlotTracker : public AbstractSlotTrackerStorage { public: /// ValueMap - A mapping of Values to slot numbers. using ValueMap = DenseMap; @@ -943,8 +941,6 @@ class SlotTracker : public AbstractSlotTrackerStorage { void processDbgRecordMetadata(const DbgRecord &DVR); }; -} // end namespace llvm - ModuleSlotTracker::ModuleSlotTracker(SlotTracker &Machine, const Module *M, const Function *F) : M(M), F(F), Machine(&Machine) {} diff --git a/llvm/lib/IR/DebugLoc.cpp b/llvm/lib/IR/DebugLoc.cpp index 01dafcab94ce9..bfba6e0cab6bf 100644 --- a/llvm/lib/IR/DebugLoc.cpp +++ b/llvm/lib/IR/DebugLoc.cpp @@ -10,10 +10,11 @@ #include "llvm/Config/llvm-config.h" #include "llvm/IR/DebugInfo.h" +using namespace llvm; + #if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN #include "llvm/Support/Signals.h" -namespace llvm { DbgLocOrigin::DbgLocOrigin(bool ShouldCollectTrace) { if (!ShouldCollectTrace) return; @@ -30,11 +31,8 @@ void DbgLocOrigin::addTrace() { auto &[Depth, StackTrace] = StackTraces.emplace_back(); Depth = sys::getStackTrace(StackTrace); } -} // namespace llvm #endif -using namespace llvm; - #if LLVM_ENABLE_DEBUGLOC_TRACKING_COVERAGE DILocAndCoverageTracking::DILocAndCoverageTracking(const DILocation *L) : TrackingMDNodeRef(const_cast(L)), DbgLocOrigin(!L), diff --git a/llvm/lib/IR/DebugProgramInstruction.cpp b/llvm/lib/IR/DebugProgramInstruction.cpp index 3fc3d28ba34fd..926a009b7831f 100644 --- a/llvm/lib/IR/DebugProgramInstruction.cpp +++ b/llvm/lib/IR/DebugProgramInstruction.cpp @@ -12,8 +12,9 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/Compiler.h" -namespace llvm { +using namespace llvm; +namespace llvm { template DbgRecordParamRef::DbgRecordParamRef(const T *Param) : Ref(const_cast(Param)) {} @@ -28,6 +29,7 @@ template T *DbgRecordParamRef::get() const { template class LLVM_EXPORT_TEMPLATE DbgRecordParamRef; template class LLVM_EXPORT_TEMPLATE DbgRecordParamRef; template class LLVM_EXPORT_TEMPLATE DbgRecordParamRef; +} // namespace llvm DbgVariableRecord::DbgVariableRecord(const DbgVariableIntrinsic *DVI) : DbgRecord(ValueKind, DVI->getDebugLoc()), @@ -756,5 +758,3 @@ iterator_range::iterator> DbgMarker::cloneDebugInfoFrom( // We inserted a block at the end, return that range. return {First->getIterator(), StoredDbgRecords.end()}; } - -} // end namespace llvm diff --git a/llvm/lib/IR/FPEnv.cpp b/llvm/lib/IR/FPEnv.cpp index 67f21d3756e93..c41d7b3181a37 100644 --- a/llvm/lib/IR/FPEnv.cpp +++ b/llvm/lib/IR/FPEnv.cpp @@ -19,9 +19,10 @@ #include "llvm/IR/Intrinsics.h" #include -namespace llvm { +using namespace llvm; -std::optional convertStrToRoundingMode(StringRef RoundingArg) { +std::optional +llvm::convertStrToRoundingMode(StringRef RoundingArg) { // For dynamic rounding mode, we use round to nearest but we will set the // 'exact' SDNodeFlag so that the value will not be rounded. return StringSwitch>(RoundingArg) @@ -34,7 +35,8 @@ std::optional convertStrToRoundingMode(StringRef RoundingArg) { .Default(std::nullopt); } -std::optional convertRoundingModeToStr(RoundingMode UseRounding) { +std::optional +llvm::convertRoundingModeToStr(RoundingMode UseRounding) { std::optional RoundingStr; switch (UseRounding) { case RoundingMode::Dynamic: @@ -62,7 +64,7 @@ std::optional convertRoundingModeToStr(RoundingMode UseRounding) { } std::optional -convertStrToExceptionBehavior(StringRef ExceptionArg) { +llvm::convertStrToExceptionBehavior(StringRef ExceptionArg) { return StringSwitch>(ExceptionArg) .Case("fpexcept.ignore", fp::ebIgnore) .Case("fpexcept.maytrap", fp::ebMayTrap) @@ -71,7 +73,7 @@ convertStrToExceptionBehavior(StringRef ExceptionArg) { } std::optional -convertExceptionBehaviorToStr(fp::ExceptionBehavior UseExcept) { +llvm::convertExceptionBehaviorToStr(fp::ExceptionBehavior UseExcept) { std::optional ExceptStr; switch (UseExcept) { case fp::ebStrict: @@ -87,7 +89,7 @@ convertExceptionBehaviorToStr(fp::ExceptionBehavior UseExcept) { return ExceptStr; } -Intrinsic::ID getConstrainedIntrinsicID(const Instruction &Instr) { +Intrinsic::ID llvm::getConstrainedIntrinsicID(const Instruction &Instr) { Intrinsic::ID IID = Intrinsic::not_intrinsic; switch (Instr.getOpcode()) { case Instruction::FCmp: @@ -127,5 +129,3 @@ Intrinsic::ID getConstrainedIntrinsicID(const Instruction &Instr) { return IID; } - -} // namespace llvm diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp index 88dbd176e0d3f..95edb2e8e56d8 100644 --- a/llvm/lib/IR/IRBuilder.cpp +++ b/llvm/lib/IR/IRBuilder.cpp @@ -1019,8 +1019,7 @@ Value *IRBuilderBase::CreateSelectWithUnknownProfile(Value *C, Value *True, const Twine &Name) { Value *Ret = CreateSelectFMF(C, True, False, {}, Name); if (auto *SI = dyn_cast(Ret)) { - setExplicitlyUnknownBranchWeightsIfProfiled( - *SI, *SI->getParent()->getParent(), PassName); + setExplicitlyUnknownBranchWeightsIfProfiled(*SI, PassName); } return Ret; } diff --git a/llvm/lib/IR/Operator.cpp b/llvm/lib/IR/Operator.cpp index 39e5463cb6fc3..c3e54a0fc0c7e 100644 --- a/llvm/lib/IR/Operator.cpp +++ b/llvm/lib/IR/Operator.cpp @@ -17,7 +17,8 @@ #include "ConstantsContext.h" -namespace llvm { +using namespace llvm; + bool Operator::hasPoisonGeneratingFlags() const { switch (getOpcode()) { case Instruction::Add: @@ -288,4 +289,3 @@ void FastMathFlags::print(raw_ostream &O) const { O << " afn"; } } -} // namespace llvm diff --git a/llvm/lib/IR/PassTimingInfo.cpp b/llvm/lib/IR/PassTimingInfo.cpp index 4e27086e97ac5..cb1b91a98b036 100644 --- a/llvm/lib/IR/PassTimingInfo.cpp +++ b/llvm/lib/IR/PassTimingInfo.cpp @@ -32,10 +32,10 @@ using namespace llvm; #define DEBUG_TYPE "time-passes" -namespace llvm { +using namespace llvm; -bool TimePassesIsEnabled = false; -bool TimePassesPerRun = false; +bool llvm::TimePassesIsEnabled = false; +bool llvm::TimePassesPerRun = false; static cl::opt EnableTiming( "time-passes", cl::location(TimePassesIsEnabled), cl::Hidden, @@ -139,7 +139,7 @@ PassTimingInfo *PassTimingInfo::TheTimeInfo; } // namespace legacy } // namespace -Timer *getPassTimer(Pass *P) { +Timer *llvm::getPassTimer(Pass *P) { legacy::PassTimingInfo::init(); if (legacy::PassTimingInfo::TheTimeInfo) return legacy::PassTimingInfo::TheTimeInfo->getPassTimer(P, P); @@ -148,7 +148,7 @@ Timer *getPassTimer(Pass *P) { /// If timing is enabled, report the times collected up to now and then reset /// them. -void reportAndResetTimings(raw_ostream *OutStream) { +void llvm::reportAndResetTimings(raw_ostream *OutStream) { if (legacy::PassTimingInfo::TheTimeInfo) legacy::PassTimingInfo::TheTimeInfo->print(OutStream); } @@ -315,5 +315,3 @@ void TimePassesHandler::registerCallbacks(PassInstrumentationCallbacks &PIC) { PIC.registerAfterAnalysisCallback( [this](StringRef P, Any) { this->stopAnalysisTimer(P); }); } - -} // namespace llvm diff --git a/llvm/lib/IR/ProfDataUtils.cpp b/llvm/lib/IR/ProfDataUtils.cpp index fc2be5188f456..94dbe1f3988b8 100644 --- a/llvm/lib/IR/ProfDataUtils.cpp +++ b/llvm/lib/IR/ProfDataUtils.cpp @@ -274,9 +274,12 @@ void llvm::setExplicitlyUnknownBranchWeights(Instruction &I, } void llvm::setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, - Function &F, - StringRef PassName) { - if (std::optional EC = F.getEntryCount(); + StringRef PassName, + const Function *F) { + F = F ? F : I.getFunction(); + assert(F && "Either pass a instruction attached to a Function, or explicitly " + "pass the Function that it will be attached to"); + if (std::optional EC = F->getEntryCount(); EC && EC->getCount() > 0) setExplicitlyUnknownBranchWeights(I, PassName); } diff --git a/llvm/lib/IR/PseudoProbe.cpp b/llvm/lib/IR/PseudoProbe.cpp index 59f218cc3683b..3c05f4b1f86a2 100644 --- a/llvm/lib/IR/PseudoProbe.cpp +++ b/llvm/lib/IR/PseudoProbe.cpp @@ -19,9 +19,7 @@ using namespace llvm; -namespace llvm { - -std::optional +static std::optional extractProbeFromDiscriminator(const DILocation *DIL) { if (DIL) { auto Discriminator = DIL->getDiscriminator(); @@ -43,7 +41,7 @@ extractProbeFromDiscriminator(const DILocation *DIL) { return std::nullopt; } -std::optional +static std::optional extractProbeFromDiscriminator(const Instruction &Inst) { assert(isa(&Inst) && !isa(&Inst) && "Only call instructions should have pseudo probe encodes as their " @@ -53,7 +51,7 @@ extractProbeFromDiscriminator(const Instruction &Inst) { return std::nullopt; } -std::optional extractProbe(const Instruction &Inst) { +std::optional llvm::extractProbe(const Instruction &Inst) { if (const auto *II = dyn_cast(&Inst)) { PseudoProbe Probe; Probe.Id = II->getIndex()->getZExtValue(); @@ -73,7 +71,7 @@ std::optional extractProbe(const Instruction &Inst) { return std::nullopt; } -void setProbeDistributionFactor(Instruction &Inst, float Factor) { +void llvm::setProbeDistributionFactor(Instruction &Inst, float Factor) { assert(Factor >= 0 && Factor <= 1 && "Distribution factor must be in [0, 1.0]"); if (auto *II = dyn_cast(&Inst)) { @@ -111,5 +109,3 @@ void setProbeDistributionFactor(Instruction &Inst, float Factor) { } } } - -} // namespace llvm diff --git a/llvm/lib/IR/ReplaceConstant.cpp b/llvm/lib/IR/ReplaceConstant.cpp index 962368f061851..b3586b45a23f2 100644 --- a/llvm/lib/IR/ReplaceConstant.cpp +++ b/llvm/lib/IR/ReplaceConstant.cpp @@ -16,7 +16,7 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/Instructions.h" -namespace llvm { +using namespace llvm; static bool isExpandableUser(User *U) { return isa(U) || isa(U); @@ -49,10 +49,10 @@ static SmallVector expandUser(BasicBlock::iterator InsertPt, return NewInsts; } -bool convertUsersOfConstantsToInstructions(ArrayRef Consts, - Function *RestrictToFunc, - bool RemoveDeadConstants, - bool IncludeSelf) { +bool llvm::convertUsersOfConstantsToInstructions(ArrayRef Consts, + Function *RestrictToFunc, + bool RemoveDeadConstants, + bool IncludeSelf) { // Find all expandable direct users of Consts. SmallVector Stack; for (Constant *C : Consts) { @@ -121,5 +121,3 @@ bool convertUsersOfConstantsToInstructions(ArrayRef Consts, return Changed; } - -} // namespace llvm diff --git a/llvm/lib/IR/RuntimeLibcalls.cpp b/llvm/lib/IR/RuntimeLibcalls.cpp index 2ce5719228a0d..2fb01a4f95fea 100644 --- a/llvm/lib/IR/RuntimeLibcalls.cpp +++ b/llvm/lib/IR/RuntimeLibcalls.cpp @@ -19,6 +19,7 @@ using namespace llvm; using namespace RTLIB; +#define GET_RUNTIME_LIBCALLS_INFO #define GET_INIT_RUNTIME_LIBCALL_NAMES #define GET_SET_TARGET_RUNTIME_LIBCALL_SETS #define DEFINE_GET_LOOKUP_LIBCALL_IMPL_NAME diff --git a/llvm/lib/IR/Use.cpp b/llvm/lib/IR/Use.cpp index 67882ba0144b4..504233575594d 100644 --- a/llvm/lib/IR/Use.cpp +++ b/llvm/lib/IR/Use.cpp @@ -9,7 +9,7 @@ #include "llvm/IR/Use.h" #include "llvm/IR/User.h" -namespace llvm { +using namespace llvm; void Use::swap(Use &RHS) { if (Val == RHS.Val) @@ -42,5 +42,3 @@ void Use::zap(Use *Start, const Use *Stop, bool del) { if (del) ::operator delete(Start); } - -} // namespace llvm diff --git a/llvm/lib/IR/User.cpp b/llvm/lib/IR/User.cpp index ab44cb4b8a3f7..9bb7c1298593a 100644 --- a/llvm/lib/IR/User.cpp +++ b/llvm/lib/IR/User.cpp @@ -11,8 +11,11 @@ #include "llvm/IR/GlobalValue.h" #include "llvm/IR/IntrinsicInst.h" +using namespace llvm; + namespace llvm { class BasicBlock; +} //===----------------------------------------------------------------------===// // User Class @@ -214,5 +217,3 @@ LLVM_NO_SANITIZE_MEMORY_ATTRIBUTE void User::operator delete(void *Usr) { ::operator delete(Storage); } } - -} // namespace llvm diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 6d23dad2e185b..9b888927e4e20 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -136,9 +136,7 @@ static cl::opt VerifyNoAliasScopeDomination( cl::desc("Ensure that llvm.experimental.noalias.scope.decl for identical " "scopes are not dominating")); -namespace llvm { - -struct VerifierSupport { +struct llvm::VerifierSupport { raw_ostream *OS; const Module &M; ModuleSlotTracker MST; @@ -318,8 +316,6 @@ struct VerifierSupport { } }; -} // namespace llvm - namespace { class Verifier : public InstVisitor, VerifierSupport { diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index 23be42f9d60ce..fefc733fa7697 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -1396,11 +1396,10 @@ Error LTO::runRegularLTO(AddStreamFn AddStream) { SmallVector LTO::getRuntimeLibcallSymbols(const Triple &TT) { RTLIB::RuntimeLibcallsInfo Libcalls(TT); SmallVector LibcallSymbols; - ArrayRef LibcallImpls = Libcalls.getLibcallImpls(); - LibcallSymbols.reserve(LibcallImpls.size()); + LibcallSymbols.reserve(Libcalls.getNumAvailableLibcallImpls()); - for (RTLIB::LibcallImpl Impl : LibcallImpls) { - if (Impl != RTLIB::Unsupported) + for (RTLIB::LibcallImpl Impl : RTLIB::libcall_impls()) { + if (Libcalls.isAvailable(Impl)) LibcallSymbols.push_back(Libcalls.getLibcallImplName(Impl).data()); } diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp index e5e5fc20728e8..29f291614ffc6 100644 --- a/llvm/lib/ObjectYAML/ELFYAML.cpp +++ b/llvm/lib/ObjectYAML/ELFYAML.cpp @@ -37,8 +37,6 @@ unsigned Object::getMachine() const { return *Header.Machine; return llvm::ELF::EM_NONE; } - -constexpr StringRef SectionHeaderTable::TypeStr; } // namespace ELFYAML namespace yaml { diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index d08f9b94227a2..298746863d221 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -96,6 +96,7 @@ #include #include #include +#include #include #include #include @@ -17989,11 +17990,17 @@ bool AArch64TargetLowering::lowerInterleavedStore(Instruction *Store, unsigned Factor, const APInt &GapMask) const { - assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && - "Invalid interleave factor"); auto *SI = dyn_cast(Store); if (!SI) return false; + + if (isProfitableToInterleaveWithGatherScatter() && + Factor > getMaxSupportedInterleaveFactor()) + return lowerInterleavedStoreWithShuffle(SI, SVI, Factor); + + assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && + "Invalid interleave factor"); + assert(!LaneMask && GapMask.popcount() == Factor && "Unexpected mask on store"); @@ -18139,6 +18146,126 @@ bool AArch64TargetLowering::lowerInterleavedStore(Instruction *Store, return true; } +/// If the interleaved vector elements are greater than supported MaxFactor, +/// interleaving the data with additional shuffles can be used to +/// achieve the same. +/// +/// Consider the following data with 8 interleaves which are shuffled to store +/// stN instructions. Data needs to be stored in this order: +/// [v0, v1, v2, v3, v4, v5, v6, v7] +/// +/// v0 v4 v2 v6 v1 v5 v3 v7 +/// | | | | | | | | +/// \ / \ / \ / \ / +/// [zip v0,v4] [zip v2,v6] [zip v1,v5] [zip v3,v7] ==> stN = 4 +/// | | | | +/// \ / \ / +/// \ / \ / +/// \ / \ / +/// [zip [v0,v2,v4,v6]] [zip [v1,v3,v5,v7]] ==> stN = 2 +/// +/// For stN = 4, upper half of interleaved data V0, V1, V2, V3 is stored +/// with one st4 instruction. Lower half, i.e, V4, V5, V6, V7 is stored with +/// another st4. +/// +/// For stN = 2, upper half of interleaved data V0, V1 is stored +/// with one st2 instruction. Second set V2, V3 is stored with another st2. +/// Total of 4 st2's are required here. +bool AArch64TargetLowering::lowerInterleavedStoreWithShuffle( + StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const { + unsigned MaxSupportedFactor = getMaxSupportedInterleaveFactor(); + + auto *VecTy = cast(SVI->getType()); + assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store"); + + unsigned LaneLen = VecTy->getNumElements() / Factor; + Type *EltTy = VecTy->getElementType(); + auto *SubVecTy = FixedVectorType::get(EltTy, Factor); + + const DataLayout &DL = SI->getModule()->getDataLayout(); + bool UseScalable; + + // Skip if we do not have NEON and skip illegal vector types. We can + // "legalize" wide vector types into multiple interleaved accesses as long as + // the vector types are divisible by 128. + if (!Subtarget->hasNEON() || + !isLegalInterleavedAccessType(SubVecTy, DL, UseScalable)) + return false; + + if (UseScalable) + return false; + + std::deque Shuffles; + Shuffles.push_back(SVI); + unsigned ConcatLevel = Factor; + // Getting all the interleaved operands. + while (ConcatLevel > 1) { + unsigned InterleavedOperands = Shuffles.size(); + for (unsigned i = 0; i < InterleavedOperands; i++) { + ShuffleVectorInst *SFL = dyn_cast(Shuffles.front()); + if (!SFL) + return false; + Shuffles.pop_front(); + + Value *Op0 = SFL->getOperand(0); + Value *Op1 = SFL->getOperand(1); + + Shuffles.push_back(dyn_cast(Op0)); + Shuffles.push_back(dyn_cast(Op1)); + } + ConcatLevel >>= 1; + } + + IRBuilder<> Builder(SI); + auto Mask = createInterleaveMask(LaneLen, 2); + SmallVector UpperHalfMask(LaneLen), LowerHalfMask(LaneLen); + for (unsigned i = 0; i < LaneLen; i++) { + LowerHalfMask[i] = Mask[i]; + UpperHalfMask[i] = Mask[i + LaneLen]; + } + + unsigned InterleaveFactor = Factor >> 1; + while (InterleaveFactor >= MaxSupportedFactor) { + std::deque ShufflesIntermediate; + ShufflesIntermediate.resize(Factor); + for (unsigned j = 0; j < Factor; j += (InterleaveFactor * 2)) { + for (unsigned i = 0; i < InterleaveFactor; i++) { + auto *Shuffle = Builder.CreateShuffleVector( + Shuffles[i + j], Shuffles[i + j + InterleaveFactor], LowerHalfMask); + ShufflesIntermediate[i + j] = Shuffle; + Shuffle = Builder.CreateShuffleVector( + Shuffles[i + j], Shuffles[i + j + InterleaveFactor], UpperHalfMask); + ShufflesIntermediate[i + j + InterleaveFactor] = Shuffle; + } + } + Shuffles = ShufflesIntermediate; + InterleaveFactor >>= 1; + } + + Type *PtrTy = SI->getPointerOperandType(); + auto *STVTy = FixedVectorType::get(SubVecTy->getElementType(), LaneLen); + + Value *BaseAddr = SI->getPointerOperand(); + Function *StNFunc = getStructuredStoreFunction( + SI->getModule(), MaxSupportedFactor, UseScalable, STVTy, PtrTy); + for (unsigned i = 0; i < (Factor / MaxSupportedFactor); i++) { + SmallVector Ops; + for (unsigned j = 0; j < MaxSupportedFactor; j++) + Ops.push_back(Shuffles[i * MaxSupportedFactor + j]); + + if (i > 0) { + // We will compute the pointer operand of each store from the original + // base address using GEPs. Cast the base address to a pointer to the + // scalar element type. + BaseAddr = Builder.CreateConstGEP1_32( + SubVecTy->getElementType(), BaseAddr, LaneLen * MaxSupportedFactor); + } + Ops.push_back(Builder.CreateBitCast(BaseAddr, PtrTy)); + Builder.CreateCall(StNFunc, Ops); + } + return true; +} + bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad( Instruction *Load, Value *Mask, IntrinsicInst *DI) const { const unsigned Factor = getDeinterleaveIntrinsicFactor(DI->getIntrinsicID()); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 70bfae717fb76..bfd8474bfeec9 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -229,6 +229,10 @@ class AArch64TargetLowering : public TargetLowering { bool hasPairedLoad(EVT LoadedType, Align &RequiredAlignment) const override; + bool isProfitableToInterleaveWithGatherScatter() const override { + return true; + } + unsigned getMaxSupportedInterleaveFactor() const override { return 4; } bool lowerInterleavedLoad(Instruction *Load, Value *Mask, @@ -239,6 +243,9 @@ class AArch64TargetLowering : public TargetLowering { ShuffleVectorInst *SVI, unsigned Factor, const APInt &GapMask) const override; + bool lowerInterleavedStoreWithShuffle(StoreInst *SI, ShuffleVectorInst *SVI, + unsigned Factor) const; + bool lowerDeinterleaveIntrinsicToLoad(Instruction *Load, Value *Mask, IntrinsicInst *DI) const override; diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td index 30b7b03f7a69a..52b216c7fe0f0 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td +++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td @@ -197,6 +197,12 @@ def G_SMULL : AArch64GenericInstruction { let hasSideEffects = 0; } +def G_PMULL : AArch64GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type1:$src1, type1:$src2); + let hasSideEffects = 0; +} + def G_UADDLP : AArch64GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1); @@ -273,6 +279,7 @@ def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 197aae6e03cb1..8729ed3890131 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -4922,11 +4922,36 @@ InstructionCost AArch64TTIImpl::getInterleavedMemoryOpCost( if (!VecTy->isScalableTy() && (UseMaskForCond || UseMaskForGaps)) return InstructionCost::getInvalid(); - if (!UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) { + unsigned NumLoadStores = 1; + InstructionCost ShuffleCost = 0; + bool isInterleaveWithShuffle = false; + unsigned MaxSupportedFactor = TLI->getMaxSupportedInterleaveFactor(); + + auto *SubVecTy = + VectorType::get(VecVTy->getElementType(), + VecVTy->getElementCount().divideCoefficientBy(Factor)); + + if (TLI->isProfitableToInterleaveWithGatherScatter() && + Opcode == Instruction::Store && (0 == Factor % MaxSupportedFactor) && + Factor > MaxSupportedFactor) { + isInterleaveWithShuffle = true; + SmallVector Mask; + // preparing interleave Mask. + for (unsigned i = 0; i < VecVTy->getElementCount().getKnownMinValue() / 2; + i++) { + for (unsigned j = 0; j < 2; j++) + Mask.push_back(j * Factor + i); + } + + NumLoadStores = Factor / MaxSupportedFactor; + ShuffleCost = + (Factor * getShuffleCost(TargetTransformInfo::SK_Splice, VecVTy, VecVTy, + Mask, CostKind, 0, SubVecTy)); + } + + if (!UseMaskForGaps && + (Factor <= MaxSupportedFactor || isInterleaveWithShuffle)) { unsigned MinElts = VecVTy->getElementCount().getKnownMinValue(); - auto *SubVecTy = - VectorType::get(VecVTy->getElementType(), - VecVTy->getElementCount().divideCoefficientBy(Factor)); // ldN/stN only support legal vector types of size 64 or 128 in bits. // Accesses having vector types that are a multiple of 128 bits can be @@ -4934,7 +4959,10 @@ InstructionCost AArch64TTIImpl::getInterleavedMemoryOpCost( bool UseScalable; if (MinElts % Factor == 0 && TLI->isLegalInterleavedAccessType(SubVecTy, DL, UseScalable)) - return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL, UseScalable); + return (Factor * + TLI->getNumInterleavedAccesses(SubVecTy, DL, UseScalable) * + NumLoadStores) + + ShuffleCost; } return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 5f93847bc680e..038ad77ae69b2 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -1809,6 +1809,9 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, return LowerBinOp(TargetOpcode::G_FMAXNUM); case Intrinsic::aarch64_neon_fminnm: return LowerBinOp(TargetOpcode::G_FMINNUM); + case Intrinsic::aarch64_neon_pmull: + case Intrinsic::aarch64_neon_pmull64: + return LowerBinOp(AArch64::G_PMULL); case Intrinsic::aarch64_neon_smull: return LowerBinOp(AArch64::G_SMULL); case Intrinsic::aarch64_neon_umull: diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp index 6d2d70511e894..6b920f05227ad 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp @@ -560,6 +560,7 @@ bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI, case TargetOpcode::G_FCMP: case TargetOpcode::G_LROUND: case TargetOpcode::G_LLROUND: + case AArch64::G_PMULL: return true; case TargetOpcode::G_INTRINSIC: switch (cast(MI).getIntrinsicID()) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 54d94b1f8682e..4fe194c813c46 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -2366,6 +2366,18 @@ def isGFX8GFX9NotGFX90A : " Subtarget->getGeneration() == AMDGPUSubtarget::GFX9)">, AssemblerPredicate<(all_of FeatureGFX8Insts, FeatureGCN3Encoding, (not FeatureGFX90AInsts))>; +// Pre-90A GFX9s allow the NV bit in FLAT instructions. +def isNVAllowedInFlat : + Predicate<"!Subtarget->hasGFX90AInsts() &&" + " Subtarget->getGeneration() == AMDGPUSubtarget::GFX9)">, + AssemblerPredicate<(all_of FeatureGFX9Insts, (not FeatureGFX90AInsts), (not FeatureGFX10Insts))>; + +// GFX8 or GFX90A+ do not allow the NV bit in FLAT instructions. +def isNVNotAllowedInFlat : + Predicate<"(Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS) ||" + " ((Subtarget->getGeneration() == AMDGPUSubtarget::GFX9) && Subtarget->hasGFX90AInsts())">, + AssemblerPredicate <(any_of FeatureVolcanicIslands, FeatureGFX90AInsts)>; + def isGFX90AOnly : Predicate<"Subtarget->hasGFX90AInsts() && !Subtarget->hasGFX940Insts()">, AssemblerPredicate<(all_of FeatureGFX90AInsts, (not FeatureGFX940Insts))>; diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp index 0a5913293238a..fdff21b6ef8df 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp @@ -1565,8 +1565,11 @@ void SplitPtrStructs::processConditionals() { } else if (isa(I)) { if (MaybeRsrc) { if (auto *RsrcInst = dyn_cast(Rsrc)) { - ConditionalTemps.push_back(RsrcInst); - RsrcInst->replaceAllUsesWith(*MaybeRsrc); + // Guard against conditionals that were already folded away. + if (RsrcInst != *MaybeRsrc) { + ConditionalTemps.push_back(RsrcInst); + RsrcInst->replaceAllUsesWith(*MaybeRsrc); + } } for (Value *V : Seen) FoundRsrcs[V] = *MaybeRsrc; diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 09338c533fdf2..2808c44c59c11 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1602,6 +1602,11 @@ class AMDGPUAsmParser : public MCTargetAsmParser { bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); } + bool isFlatInstAndNVAllowed(const MCInst &Inst) const { + uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; + return (TSFlags & SIInstrFlags::FLAT) && isGFX9() && !isGFX90A(); + } + AMDGPUTargetStreamer &getTargetStreamer() { MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); return static_cast(TS); @@ -5370,7 +5375,7 @@ bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]); Error(S, "scale_offset is not supported on this GPU"); } - if (CPol & CPol::NV) { + if ((CPol & CPol::NV) && !isFlatInstAndNVAllowed(Inst)) { SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); StringRef CStr(S.getPointer()); S = SMLoc::getFromPointer(&CStr.data()[CStr.find("nv")]); @@ -7145,6 +7150,13 @@ ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) { unsigned Enabled = 0, Seen = 0; for (;;) { SMLoc S = getLoc(); + + if (isGFX9() && trySkipId("nv")) { + Enabled |= CPol::NV; + Seen |= CPol::NV; + continue; + } + bool Disabling; unsigned CPol = getCPolKind(getId(), Mnemo, Disabling); if (!CPol) diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index 21b339f2c6784..95bc9438441b9 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -125,7 +125,7 @@ class FLAT_Real op, FLAT_Pseudo ps, string opName = ps.Mnemonic> : bits<7> saddr; bits<10> vdst; - bits<5> cpol; + bits<6> cpol; // Only valid on gfx9 bits<1> lds = ps.lds; // LDS DMA for global and scratch @@ -2759,29 +2759,52 @@ class FLAT_Real_vi op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> : !subst("$sccb", !if(has_sccb, "$sccb",""), ps.AsmOperands); } +class FLAT_Real_vi_ex_gfx9 op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> : + FLAT_Real_vi { + let AssemblerPredicate = isNVNotAllowedInFlat; +} + +class FLAT_Real_gfx9 op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> : + FLAT_Real_vi { + let AssemblerPredicate = isNVAllowedInFlat; + let Subtarget = SIEncodingFamily.GFX9; + let DecoderNamespace = "GFX9"; + let Inst{55} = cpol{CPolBit.NV}; // nv - GFX9 (pre-90A) uses bit 55 as the non-volatile bit. +} + +multiclass FLAT_Real_mc_vi op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> { + def _vi: FLAT_Real_vi_ex_gfx9; + def _gfx9: FLAT_Real_gfx9; +} + multiclass FLAT_Real_AllAddr_vi op, bit has_sccb = !cast(NAME).has_sccb> { - def _vi : FLAT_Real_vi(NAME), has_sccb>; - def _SADDR_vi : FLAT_Real_vi(NAME#"_SADDR"), has_sccb>; + defm "" : FLAT_Real_mc_vi(NAME), has_sccb>; + defm _SADDR : FLAT_Real_mc_vi(NAME#"_SADDR"), has_sccb>; +} + +multiclass FLAT_Real_AllAddr_vi_ex_gfx9 op, + bit has_sccb = !cast(NAME).has_sccb> { + def _vi : FLAT_Real_vi_ex_gfx9(NAME), has_sccb>; + def _SADDR_vi : FLAT_Real_vi_ex_gfx9(NAME#"_SADDR"), has_sccb>; } class FLAT_Real_gfx940 op, FLAT_Pseudo ps> : FLAT_Real , SIMCInstr { let AssemblerPredicate = isGFX940Plus; - let DecoderNamespace = "GFX9"; + let DecoderNamespace = "GFX940"; let Inst{13} = ps.sve; let Inst{25} = !if(ps.has_sccb, cpol{CPolBit.SCC}, ps.sccbValue); } multiclass FLAT_Real_AllAddr_SVE_vi op> { - def _vi : FLAT_Real_vi(NAME)> { - let AssemblerPredicate = isGFX8GFX9NotGFX940; - let OtherPredicates = [isGFX8GFX9NotGFX940]; - } - def _SADDR_vi : FLAT_Real_vi(NAME#"_SADDR")> { - let DecoderNamespace = "GFX9"; + let OtherPredicates = [isGFX8GFX9NotGFX940] in { + defm "" : FLAT_Real_mc_vi(NAME)>; } + + defm _SADDR_vi : FLAT_Real_mc_vi(NAME#"_SADDR")>; + let AssemblerPredicate = isGFX940Plus in { def _VE_gfx940 : FLAT_Real_gfx940(NAME)>; def _SVS_gfx940 : FLAT_Real_gfx940(NAME#"_SVS")>; @@ -2794,11 +2817,11 @@ multiclass FLAT_Real_AllAddr_LDS op, bits<7> pre_gfx940_op, bit has_sccb = !cast(NAME).has_sccb> { let OtherPredicates = [isGFX8GFX9NotGFX940] in { - def _vi : FLAT_Real_vi(NAME), has_sccb> { - let AsmString = pre_gfx940_name # !cast(NAME).AsmOperands # " lds"; + let AsmString = pre_gfx940_name # !cast(NAME).AsmOperands # " lds" in { + defm "" : FLAT_Real_mc_vi(NAME), has_sccb>; } - def _SADDR_vi : FLAT_Real_vi(NAME#"_SADDR"), has_sccb> { - let AsmString = pre_gfx940_name # !cast(NAME#"_SADDR").AsmOperands # " lds"; + let AsmString = pre_gfx940_name # !cast(NAME#"_SADDR").AsmOperands # " lds" in { + defm _SADDR : FLAT_Real_mc_vi(NAME#"_SADDR"), has_sccb>; } } @@ -2814,47 +2837,66 @@ multiclass FLAT_Real_AllAddr_SVE_LDS op, bits<7> pre_gfx940_op> { def _ST_gfx940 : FLAT_Real_gfx940(NAME#"_ST")>; } -def FLAT_LOAD_UBYTE_vi : FLAT_Real_vi <0x10, FLAT_LOAD_UBYTE>; -def FLAT_LOAD_SBYTE_vi : FLAT_Real_vi <0x11, FLAT_LOAD_SBYTE>; -def FLAT_LOAD_USHORT_vi : FLAT_Real_vi <0x12, FLAT_LOAD_USHORT>; -def FLAT_LOAD_SSHORT_vi : FLAT_Real_vi <0x13, FLAT_LOAD_SSHORT>; -def FLAT_LOAD_DWORD_vi : FLAT_Real_vi <0x14, FLAT_LOAD_DWORD>; -def FLAT_LOAD_DWORDX2_vi : FLAT_Real_vi <0x15, FLAT_LOAD_DWORDX2>; -def FLAT_LOAD_DWORDX4_vi : FLAT_Real_vi <0x17, FLAT_LOAD_DWORDX4>; -def FLAT_LOAD_DWORDX3_vi : FLAT_Real_vi <0x16, FLAT_LOAD_DWORDX3>; - -def FLAT_STORE_BYTE_vi : FLAT_Real_vi <0x18, FLAT_STORE_BYTE>; -def FLAT_STORE_BYTE_D16_HI_vi : FLAT_Real_vi <0x19, FLAT_STORE_BYTE_D16_HI>; -def FLAT_STORE_SHORT_vi : FLAT_Real_vi <0x1a, FLAT_STORE_SHORT>; -def FLAT_STORE_SHORT_D16_HI_vi : FLAT_Real_vi <0x1b, FLAT_STORE_SHORT_D16_HI>; -def FLAT_STORE_DWORD_vi : FLAT_Real_vi <0x1c, FLAT_STORE_DWORD>; -def FLAT_STORE_DWORDX2_vi : FLAT_Real_vi <0x1d, FLAT_STORE_DWORDX2>; -def FLAT_STORE_DWORDX4_vi : FLAT_Real_vi <0x1f, FLAT_STORE_DWORDX4>; -def FLAT_STORE_DWORDX3_vi : FLAT_Real_vi <0x1e, FLAT_STORE_DWORDX3>; - -def FLAT_LOAD_UBYTE_D16_vi : FLAT_Real_vi <0x20, FLAT_LOAD_UBYTE_D16>; -def FLAT_LOAD_UBYTE_D16_HI_vi : FLAT_Real_vi <0x21, FLAT_LOAD_UBYTE_D16_HI>; -def FLAT_LOAD_SBYTE_D16_vi : FLAT_Real_vi <0x22, FLAT_LOAD_SBYTE_D16>; -def FLAT_LOAD_SBYTE_D16_HI_vi : FLAT_Real_vi <0x23, FLAT_LOAD_SBYTE_D16_HI>; -def FLAT_LOAD_SHORT_D16_vi : FLAT_Real_vi <0x24, FLAT_LOAD_SHORT_D16>; -def FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_vi <0x25, FLAT_LOAD_SHORT_D16_HI>; +defm FLAT_LOAD_UBYTE_vi : FLAT_Real_mc_vi <0x10, FLAT_LOAD_UBYTE>; +defm FLAT_LOAD_SBYTE_vi : FLAT_Real_mc_vi <0x11, FLAT_LOAD_SBYTE>; +defm FLAT_LOAD_USHORT_vi : FLAT_Real_mc_vi <0x12, FLAT_LOAD_USHORT>; +defm FLAT_LOAD_SSHORT_vi : FLAT_Real_mc_vi <0x13, FLAT_LOAD_SSHORT>; +defm FLAT_LOAD_DWORD_vi : FLAT_Real_mc_vi <0x14, FLAT_LOAD_DWORD>; +defm FLAT_LOAD_DWORDX2_vi : FLAT_Real_mc_vi <0x15, FLAT_LOAD_DWORDX2>; +defm FLAT_LOAD_DWORDX4_vi : FLAT_Real_mc_vi <0x17, FLAT_LOAD_DWORDX4>; +defm FLAT_LOAD_DWORDX3_vi : FLAT_Real_mc_vi <0x16, FLAT_LOAD_DWORDX3>; + +defm FLAT_STORE_BYTE_vi : FLAT_Real_mc_vi <0x18, FLAT_STORE_BYTE>; +defm FLAT_STORE_BYTE_D16_HI_vi : FLAT_Real_mc_vi <0x19, FLAT_STORE_BYTE_D16_HI>; +defm FLAT_STORE_SHORT_vi : FLAT_Real_mc_vi <0x1a, FLAT_STORE_SHORT>; +defm FLAT_STORE_SHORT_D16_HI_vi : FLAT_Real_mc_vi <0x1b, FLAT_STORE_SHORT_D16_HI>; +defm FLAT_STORE_DWORD_vi : FLAT_Real_mc_vi <0x1c, FLAT_STORE_DWORD>; +defm FLAT_STORE_DWORDX2_vi : FLAT_Real_mc_vi <0x1d, FLAT_STORE_DWORDX2>; +defm FLAT_STORE_DWORDX4_vi : FLAT_Real_mc_vi <0x1f, FLAT_STORE_DWORDX4>; +defm FLAT_STORE_DWORDX3_vi : FLAT_Real_mc_vi <0x1e, FLAT_STORE_DWORDX3>; + +defm FLAT_LOAD_UBYTE_D16_vi : FLAT_Real_mc_vi <0x20, FLAT_LOAD_UBYTE_D16>; +defm FLAT_LOAD_UBYTE_D16_HI_vi : FLAT_Real_mc_vi <0x21, FLAT_LOAD_UBYTE_D16_HI>; +defm FLAT_LOAD_SBYTE_D16_vi : FLAT_Real_mc_vi <0x22, FLAT_LOAD_SBYTE_D16>; +defm FLAT_LOAD_SBYTE_D16_HI_vi : FLAT_Real_mc_vi <0x23, FLAT_LOAD_SBYTE_D16_HI>; +defm FLAT_LOAD_SHORT_D16_vi : FLAT_Real_mc_vi <0x24, FLAT_LOAD_SHORT_D16>; +defm FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_mc_vi <0x25, FLAT_LOAD_SHORT_D16_HI>; multiclass FLAT_Real_Atomics_vi op, bit has_sccb = !cast(NAME).has_sccb> { defvar ps = !cast(NAME); - def _vi : FLAT_Real_vi(ps.PseudoInstr), has_sccb>; - def _RTN_vi : FLAT_Real_vi(ps.PseudoInstr # "_RTN"), has_sccb>; - def _RTN_agpr_vi : FLAT_Real_vi(ps.PseudoInstr # "_RTN_agpr"), has_sccb>; + defm "" : FLAT_Real_mc_vi(ps.PseudoInstr), has_sccb>; + defm _RTN : FLAT_Real_mc_vi(ps.PseudoInstr # "_RTN"), has_sccb>; + def _RTN_agpr_vi : FLAT_Real_vi_ex_gfx9(ps.PseudoInstr # "_RTN_agpr"), has_sccb>; +} + +multiclass FLAT_Real_Atomics_vi_ex_gfx9 op, + bit has_sccb = !cast(NAME).has_sccb> { + defvar ps = !cast(NAME); + def _vi : FLAT_Real_vi_ex_gfx9(ps.PseudoInstr), has_sccb>; + def _RTN_vi : FLAT_Real_vi_ex_gfx9(ps.PseudoInstr # "_RTN"), has_sccb>; + + def _RTN_agpr_vi : FLAT_Real_vi_ex_gfx9(ps.PseudoInstr # "_RTN_agpr"), has_sccb>; } multiclass FLAT_Global_Real_Atomics_vi op, bit has_sccb = !cast(NAME).has_sccb> : FLAT_Real_AllAddr_vi { - def _RTN_vi : FLAT_Real_vi (NAME#"_RTN"), has_sccb>; - def _SADDR_RTN_vi : FLAT_Real_vi (NAME#"_SADDR_RTN"), has_sccb>; + defm _RTN : FLAT_Real_mc_vi (NAME#"_RTN"), has_sccb>; + defm _SADDR_RTN : FLAT_Real_mc_vi (NAME#"_SADDR_RTN"), has_sccb>; + + def _RTN_agpr_vi : FLAT_Real_vi_ex_gfx9 (NAME#"_RTN_agpr"), has_sccb>; + def _SADDR_RTN_agpr_vi : FLAT_Real_vi_ex_gfx9 (NAME#"_SADDR_RTN_agpr"), has_sccb>; +} + +multiclass FLAT_Global_Real_Atomics_vi_ex_gfx9 op, + bit has_sccb = !cast(NAME).has_sccb> : + FLAT_Real_AllAddr_vi_ex_gfx9 { + def _RTN_vi : FLAT_Real_vi_ex_gfx9 (NAME#"_RTN"), has_sccb>; + def _SADDR_RTN_vi : FLAT_Real_vi_ex_gfx9 (NAME#"_SADDR_RTN"), has_sccb>; - def _RTN_agpr_vi : FLAT_Real_vi (NAME#"_RTN_agpr"), has_sccb>; - def _SADDR_RTN_agpr_vi : FLAT_Real_vi (NAME#"_SADDR_RTN_agpr"), has_sccb>; + def _RTN_agpr_vi : FLAT_Real_vi_ex_gfx9 (NAME#"_RTN_agpr"), has_sccb>; + def _SADDR_RTN_agpr_vi : FLAT_Real_vi_ex_gfx9 (NAME#"_SADDR_RTN_agpr"), has_sccb>; } defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_vi <0x40>; @@ -3016,10 +3058,10 @@ let AssemblerPredicate = isGFX940Plus in { defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Real_Atomics_gfx940<0x4f>; defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Real_Atomics_gfx940<0x50>; defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_gfx940<0x51>; - defm FLAT_ATOMIC_ADD_F32 : FLAT_Real_Atomics_vi<0x4d>; - defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Real_Atomics_vi<0x4e>; - defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Real_Atomics_vi<0x52>; - defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Real_Atomics_vi<0x52>; + defm FLAT_ATOMIC_ADD_F32 : FLAT_Real_Atomics_vi_ex_gfx9<0x4d>; + defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Real_Atomics_vi_ex_gfx9<0x4e>; + defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Real_Atomics_vi_ex_gfx9<0x52>; + defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Real_Atomics_vi_ex_gfx9<0x52>; } // End AssemblerPredicate = isGFX940Plus //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp index 703ec0a4befa5..3e6f35dbf5e54 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -186,8 +186,12 @@ void AMDGPUInstPrinter::printCPol(const MCInst *MI, unsigned OpNo, O << " dlc"; if ((Imm & CPol::SCC) && AMDGPU::isGFX90A(STI)) O << (AMDGPU::isGFX940(STI) ? " sc1" : " scc"); - if (Imm & ~CPol::ALL_pregfx12) - O << " /* unexpected cache policy bit */"; + if (Imm & ~CPol::ALL_pregfx12) { + if ((Imm & CPol::NV) && AMDGPU::isGFX9(STI) && !AMDGPU::isGFX90A(STI)) + O << " nv"; + else + O << " /* unexpected cache policy bit */"; + } } void AMDGPUInstPrinter::printTH(const MCInst *MI, int64_t TH, int64_t Scope, diff --git a/llvm/lib/Target/DirectX/DXContainerGlobals.cpp b/llvm/lib/Target/DirectX/DXContainerGlobals.cpp index eb4c8846441a2..677203d1c016b 100644 --- a/llvm/lib/Target/DirectX/DXContainerGlobals.cpp +++ b/llvm/lib/Target/DirectX/DXContainerGlobals.cpp @@ -285,6 +285,13 @@ void DXContainerGlobals::addPipelineStateValidationInfo( PSV.BaseData.NumThreadsX = MMI.EntryPropertyVec[0].NumThreadsX; PSV.BaseData.NumThreadsY = MMI.EntryPropertyVec[0].NumThreadsY; PSV.BaseData.NumThreadsZ = MMI.EntryPropertyVec[0].NumThreadsZ; + if (MMI.EntryPropertyVec[0].WaveSizeMin) { + PSV.BaseData.MinimumWaveLaneCount = MMI.EntryPropertyVec[0].WaveSizeMin; + PSV.BaseData.MaximumWaveLaneCount = + MMI.EntryPropertyVec[0].WaveSizeMax + ? MMI.EntryPropertyVec[0].WaveSizeMax + : MMI.EntryPropertyVec[0].WaveSizeMin; + } break; default: break; diff --git a/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp b/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp index cf8b833b3e42e..e1a472fe57642 100644 --- a/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp +++ b/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp @@ -82,6 +82,7 @@ enum class EntryPropsTag { ASStateTag, WaveSize, EntryRootSig, + WaveRange = 23, }; } // namespace @@ -177,14 +178,15 @@ getTagValueAsMetadata(EntryPropsTag Tag, uint64_t Value, LLVMContext &Ctx) { case EntryPropsTag::ASStateTag: case EntryPropsTag::WaveSize: case EntryPropsTag::EntryRootSig: + case EntryPropsTag::WaveRange: llvm_unreachable("NYI: Unhandled entry property tag"); } return MDVals; } -static MDTuple * -getEntryPropAsMetadata(const EntryProperties &EP, uint64_t EntryShaderFlags, - const Triple::EnvironmentType ShaderProfile) { +static MDTuple *getEntryPropAsMetadata(Module &M, const EntryProperties &EP, + uint64_t EntryShaderFlags, + const ModuleMetadataInfo &MMDI) { SmallVector MDVals; LLVMContext &Ctx = EP.Entry->getContext(); if (EntryShaderFlags != 0) @@ -195,12 +197,13 @@ getEntryPropAsMetadata(const EntryProperties &EP, uint64_t EntryShaderFlags, // FIXME: support more props. // See https://github.com/llvm/llvm-project/issues/57948. // Add shader kind for lib entries. - if (ShaderProfile == Triple::EnvironmentType::Library && + if (MMDI.ShaderProfile == Triple::EnvironmentType::Library && EP.ShaderStage != Triple::EnvironmentType::Library) MDVals.append(getTagValueAsMetadata(EntryPropsTag::ShaderKind, getShaderStage(EP.ShaderStage), Ctx)); if (EP.ShaderStage == Triple::EnvironmentType::Compute) { + // Handle mandatory "hlsl.numthreads" MDVals.emplace_back(ConstantAsMetadata::get(ConstantInt::get( Type::getInt32Ty(Ctx), static_cast(EntryPropsTag::NumThreads)))); Metadata *NumThreadVals[] = {ConstantAsMetadata::get(ConstantInt::get( @@ -210,8 +213,48 @@ getEntryPropAsMetadata(const EntryProperties &EP, uint64_t EntryShaderFlags, ConstantAsMetadata::get(ConstantInt::get( Type::getInt32Ty(Ctx), EP.NumThreadsZ))}; MDVals.emplace_back(MDNode::get(Ctx, NumThreadVals)); + + // Handle optional "hlsl.wavesize". The fields are optionally represented + // if they are non-zero. + if (EP.WaveSizeMin != 0) { + bool IsWaveRange = VersionTuple(6, 8) <= MMDI.ShaderModelVersion; + bool IsWaveSize = + !IsWaveRange && VersionTuple(6, 6) <= MMDI.ShaderModelVersion; + + if (!IsWaveRange && !IsWaveSize) { + reportError(M, "Shader model 6.6 or greater is required to specify " + "the \"hlsl.wavesize\" function attribute"); + return nullptr; + } + + // A range is being specified if EP.WaveSizeMax != 0 + if (EP.WaveSizeMax && !IsWaveRange) { + reportError( + M, "Shader model 6.8 or greater is required to specify " + "wave size range values of the \"hlsl.wavesize\" function " + "attribute"); + return nullptr; + } + + EntryPropsTag Tag = + IsWaveSize ? EntryPropsTag::WaveSize : EntryPropsTag::WaveRange; + MDVals.emplace_back(ConstantAsMetadata::get( + ConstantInt::get(Type::getInt32Ty(Ctx), static_cast(Tag)))); + + SmallVector WaveSizeVals = {ConstantAsMetadata::get( + ConstantInt::get(Type::getInt32Ty(Ctx), EP.WaveSizeMin))}; + if (IsWaveRange) { + WaveSizeVals.push_back(ConstantAsMetadata::get( + ConstantInt::get(Type::getInt32Ty(Ctx), EP.WaveSizeMax))); + WaveSizeVals.push_back(ConstantAsMetadata::get( + ConstantInt::get(Type::getInt32Ty(Ctx), EP.WaveSizePref))); + } + + MDVals.emplace_back(MDNode::get(Ctx, WaveSizeVals)); + } } } + if (MDVals.empty()) return nullptr; return MDNode::get(Ctx, MDVals); @@ -236,12 +279,11 @@ static MDTuple *constructEntryMetadata(const Function *EntryFn, return MDNode::get(Ctx, MDVals); } -static MDTuple *emitEntryMD(const EntryProperties &EP, MDTuple *Signatures, - MDNode *MDResources, +static MDTuple *emitEntryMD(Module &M, const EntryProperties &EP, + MDTuple *Signatures, MDNode *MDResources, const uint64_t EntryShaderFlags, - const Triple::EnvironmentType ShaderProfile) { - MDTuple *Properties = - getEntryPropAsMetadata(EP, EntryShaderFlags, ShaderProfile); + const ModuleMetadataInfo &MMDI) { + MDTuple *Properties = getEntryPropAsMetadata(M, EP, EntryShaderFlags, MMDI); return constructEntryMetadata(EP.Entry, Signatures, MDResources, Properties, EP.Entry->getContext()); } @@ -523,10 +565,8 @@ static void translateGlobalMetadata(Module &M, DXILResourceMap &DRM, Twine(Triple::getEnvironmentTypeName(MMDI.ShaderProfile) + "'")); } - - EntryFnMDNodes.emplace_back(emitEntryMD(EntryProp, Signatures, ResourceMD, - EntryShaderFlags, - MMDI.ShaderProfile)); + EntryFnMDNodes.emplace_back(emitEntryMD( + M, EntryProp, Signatures, ResourceMD, EntryShaderFlags, MMDI)); } NamedMDNode *EntryPointsNamedMD = diff --git a/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp b/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp index 479ac90b7d526..f29a739cb5c07 100644 --- a/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp +++ b/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp @@ -104,13 +104,6 @@ const std::map QFPInstMap{ {Hexagon::V6_vmpy_qf32_sf, Hexagon::V6_vmpy_qf32}}; } // namespace -namespace llvm { - -FunctionPass *createHexagonQFPOptimizer(); -void initializeHexagonQFPOptimizerPass(PassRegistry &); - -} // namespace llvm - namespace { struct HexagonQFPOptimizer : public MachineFunctionPass { diff --git a/llvm/lib/Target/PowerPC/PPCInstrFuture.td b/llvm/lib/Target/PowerPC/PPCInstrFuture.td index 0c2e44e18f463..dfbbba0116f25 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrFuture.td +++ b/llvm/lib/Target/PowerPC/PPCInstrFuture.td @@ -420,6 +420,9 @@ let Predicates = [HasVSX, IsISAFuture] in { : VXForm_VRTAB5<323, (outs vrrc:$VRT), (ins vrrc:$VRA, vrrc:$VRB), "vucmprlh $VRT, $VRA, $VRB", []>; + def XVRLW: XX3Form_XTAB6<60, 184, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvrlw $XT, $XA, $XB", []>; + // AES Acceleration Instructions def XXAESENCP : XX3Form_XTABp5_M2<194, (outs vsrprc:$XTp), (ins vsrprc:$XAp, vsrprc:$XBp, u2imm:$M), diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp index 45b0e7dc12263..f3c236ca8c9ce 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp @@ -532,13 +532,19 @@ struct StaticLibcallNameMap { // FIXME: This is broken if there are ever different triples compiled with // different libcalls. RTLIB::RuntimeLibcallsInfo RTCI(TT); - for (RTLIB::Libcall LC : RTLIB::libcalls()) { - StringRef NameLibcall = RTCI.getLibcallName(LC); - if (!NameLibcall.empty() && - getRuntimeLibcallSignatures().Table[LC] != unsupported) { - assert(!Map.contains(NameLibcall) && - "duplicate libcall names in name map"); - Map[NameLibcall] = LC; + + ArrayRef Table = + getRuntimeLibcallSignatures().Table; + for (RTLIB::LibcallImpl Impl : RTLIB::libcall_impls()) { + if (!RTCI.isAvailable(Impl)) + continue; + RTLIB::Libcall LC = RTLIB::RuntimeLibcallsInfo::getLibcallFromImpl(Impl); + if (Table[LC] != unsupported) { + StringRef NameLibcall = + RTLIB::RuntimeLibcallsInfo::getLibcallImplName(Impl); + // FIXME: Map should be to LibcallImpl + if (!Map.insert({NameLibcall, LC}).second) + llvm_unreachable("duplicate libcall names in name map"); } } } diff --git a/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCCodeEmitter.cpp b/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCCodeEmitter.cpp index bd4d4ebd2a729..5977a276b1236 100644 --- a/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCCodeEmitter.cpp +++ b/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCCodeEmitter.cpp @@ -320,7 +320,7 @@ XtensaMCCodeEmitter::getMemRegEncoding(const MCInst &MI, unsigned OpNo, case Xtensa::SSIP: case Xtensa::LSI: case Xtensa::LSIP: - + case Xtensa::S32C1I: if (Res & 0x3) { report_fatal_error("Unexpected operand value!"); } diff --git a/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCTargetDesc.cpp b/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCTargetDesc.cpp index 4e730707dcb78..8d0fd078b2696 100644 --- a/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCTargetDesc.cpp +++ b/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCTargetDesc.cpp @@ -202,7 +202,7 @@ bool Xtensa::checkRegister(MCRegister RegNo, const FeatureBitset &FeatureBits, return FeatureBits[Xtensa::FeatureWindowed]; case Xtensa::ATOMCTL: case Xtensa::SCOMPARE1: - return FeatureBits[Xtensa::FeatureWindowed]; + return FeatureBits[Xtensa::FeatureS32C1I]; case Xtensa::NoRegister: return false; } diff --git a/llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp b/llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp index b0f924f2cd58e..be69cefb5b78f 100644 --- a/llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp +++ b/llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp @@ -114,14 +114,31 @@ void XtensaInstrInfo::copyPhysReg(MachineBasicBlock &MBB, const DebugLoc &DL, Register DestReg, Register SrcReg, bool KillSrc, bool RenamableDest, bool RenamableSrc) const { - // The MOV instruction is not present in core ISA, + unsigned Opcode; + + // The MOV instruction is not present in core ISA for AR registers, // so use OR instruction. - if (Xtensa::ARRegClass.contains(DestReg, SrcReg)) + if (Xtensa::ARRegClass.contains(DestReg, SrcReg)) { BuildMI(MBB, MBBI, DL, get(Xtensa::OR), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)) .addReg(SrcReg, getKillRegState(KillSrc)); + return; + } + + if (STI.hasSingleFloat() && Xtensa::FPRRegClass.contains(SrcReg) && + Xtensa::FPRRegClass.contains(DestReg)) + Opcode = Xtensa::MOV_S; + else if (STI.hasSingleFloat() && Xtensa::FPRRegClass.contains(SrcReg) && + Xtensa::ARRegClass.contains(DestReg)) + Opcode = Xtensa::RFR; + else if (STI.hasSingleFloat() && Xtensa::ARRegClass.contains(SrcReg) && + Xtensa::FPRRegClass.contains(DestReg)) + Opcode = Xtensa::WFR; else report_fatal_error("Impossible reg-to-reg copy"); + + BuildMI(MBB, MBBI, DL, get(Opcode), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); } void XtensaInstrInfo::storeRegToStackSlot( diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp index 7a95df4b2a47c..b575d76e897d2 100644 --- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -1378,8 +1378,7 @@ static bool foldMemChr(CallInst *Call, DomTreeUpdater *DTU, IRB.CreateTrunc(Call->getArgOperand(1), ByteTy), BBNext, N); // We can't know the precise weights here, as they would depend on the value // distribution of Call->getArgOperand(1). So we just mark it as "unknown". - setExplicitlyUnknownBranchWeightsIfProfiled(*SI, *Call->getFunction(), - DEBUG_TYPE); + setExplicitlyUnknownBranchWeightsIfProfiled(*SI, DEBUG_TYPE); Type *IndexTy = DL.getIndexType(Call->getType()); SmallVector Updates; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index d85e4f7590197..9bdd8cb71f7f3 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -479,7 +479,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final const Twine &NameStr = "", InsertPosition InsertBefore = nullptr) { auto *Sel = SelectInst::Create(C, S1, S2, NameStr, InsertBefore, nullptr); - setExplicitlyUnknownBranchWeightsIfProfiled(*Sel, F, DEBUG_TYPE); + setExplicitlyUnknownBranchWeightsIfProfiled(*Sel, DEBUG_TYPE, &F); return Sel; } diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp index 0577ddbd2353c..0f3e66476f055 100644 --- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -330,8 +330,7 @@ static void buildPartialUnswitchConditionalBranch( HasBranchWeights ? ComputeProfFrom.getMetadata(LLVMContext::MD_prof) : nullptr); if (!HasBranchWeights) - setExplicitlyUnknownBranchWeightsIfProfiled( - *BR, *BR->getParent()->getParent(), DEBUG_TYPE); + setExplicitlyUnknownBranchWeightsIfProfiled(*BR, DEBUG_TYPE); } /// Copy a set of loop invariant values, and conditionally branch on them. @@ -389,8 +388,7 @@ static void buildPartialInvariantUnswitchConditionalBranch( IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc, Direction ? &NormalSucc : &UnswitchedSucc, ProfData); if (!ProfData) - setExplicitlyUnknownBranchWeightsIfProfiled(*BR, *BR->getFunction(), - DEBUG_TYPE); + setExplicitlyUnknownBranchWeightsIfProfiled(*BR, DEBUG_TYPE); } /// Rewrite the PHI nodes in an unswitched loop exit basic block. @@ -3204,8 +3202,7 @@ injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate, Loop &L, auto *InvariantBr = Builder.CreateCondBr(InjectedCond, InLoopSucc, CheckBlock); // We don't know anything about the relation between the limits. - setExplicitlyUnknownBranchWeightsIfProfiled( - *InvariantBr, *InvariantBr->getParent()->getParent(), DEBUG_TYPE); + setExplicitlyUnknownBranchWeightsIfProfiled(*InvariantBr, DEBUG_TYPE); Builder.SetInsertPoint(CheckBlock); Builder.CreateCondBr( diff --git a/llvm/lib/Transforms/Utils/DeclareRuntimeLibcalls.cpp b/llvm/lib/Transforms/Utils/DeclareRuntimeLibcalls.cpp index 6d4436b92c119..dd8706cfb2855 100644 --- a/llvm/lib/Transforms/Utils/DeclareRuntimeLibcalls.cpp +++ b/llvm/lib/Transforms/Utils/DeclareRuntimeLibcalls.cpp @@ -54,8 +54,8 @@ PreservedAnalyses DeclareRuntimeLibcallsPass::run(Module &M, const DataLayout &DL = M.getDataLayout(); const Triple &TT = M.getTargetTriple(); - for (RTLIB::LibcallImpl Impl : RTLCI.getLibcallImpls()) { - if (Impl == RTLIB::Unsupported) + for (RTLIB::LibcallImpl Impl : RTLIB::libcall_impls()) { + if (!RTLCI.isAvailable(Impl)) continue; auto [FuncTy, FuncAttrs] = RTLCI.getFunctionTy(Ctx, TT, DL, Impl); diff --git a/llvm/lib/Transforms/Utils/LoopVersioning.cpp b/llvm/lib/Transforms/Utils/LoopVersioning.cpp index ec2e6c1ab796b..9c8b6ef83e56d 100644 --- a/llvm/lib/Transforms/Utils/LoopVersioning.cpp +++ b/llvm/lib/Transforms/Utils/LoopVersioning.cpp @@ -23,6 +23,7 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/PassManager.h" +#include "llvm/IR/ProfDataUtils.h" #include "llvm/Support/CommandLine.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" @@ -109,8 +110,12 @@ void LoopVersioning::versionLoop( // Insert the conditional branch based on the result of the memchecks. Instruction *OrigTerm = RuntimeCheckBB->getTerminator(); Builder.SetInsertPoint(OrigTerm); - Builder.CreateCondBr(RuntimeCheck, NonVersionedLoop->getLoopPreheader(), - VersionedLoop->getLoopPreheader()); + auto *BI = + Builder.CreateCondBr(RuntimeCheck, NonVersionedLoop->getLoopPreheader(), + VersionedLoop->getLoopPreheader()); + // We don't know what the probability of executing the versioned vs the + // unversioned variants is. + setExplicitlyUnknownBranchWeightsIfProfiled(*BI, DEBUG_TYPE); OrigTerm->eraseFromParent(); // The loops merge in the original exit block. This is now dominated by the diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 3a3e3ade20212..37c048f421f1a 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -5214,8 +5214,7 @@ bool SimplifyCFGOpt::simplifyBranchOnICmpChain(BranchInst *BI, // We don't have any info about this condition. auto *Br = TrueWhenEqual ? Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB) : Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB); - setExplicitlyUnknownBranchWeightsIfProfiled(*Br, *NewBB->getParent(), - DEBUG_TYPE); + setExplicitlyUnknownBranchWeightsIfProfiled(*Br, DEBUG_TYPE); OldTI->eraseFromParent(); @@ -7732,19 +7731,24 @@ static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder, // label. The other is those powers of 2 that don't appear in the case // statement. We don't know the distribution of the values coming in, so // the safest is to split 50-50 the original probability to `default`. - uint64_t OrigDenominator = sum_of(map_range( - Weights, [](const auto &V) { return static_cast(V); })); + uint64_t OrigDenominator = + sum_of(map_range(Weights, StaticCastTo)); SmallVector NewWeights(2); NewWeights[1] = Weights[0] / 2; NewWeights[0] = OrigDenominator - NewWeights[1]; setFittedBranchWeights(*BI, NewWeights, /*IsExpected=*/false); - - // For the original switch, we reduce the weight of the default by the - // amount by which the previous branch contributes to getting to default, - // and then make sure the remaining weights have the same relative ratio - // wrt eachother. + // The probability of executing the default block stays constant. It was + // p_d = Weights[0] / OrigDenominator + // we rewrite as W/D + // We want to find the probability of the default branch of the switch + // statement. Let's call it X. We have W/D = W/2D + X * (1-W/2D) + // i.e. the original probability is the probability we go to the default + // branch from the BI branch, or we take the default branch on the SI. + // Meaning X = W / (2D - W), or (W/2) / (D - W/2) + // This matches using W/2 for the default branch probability numerator and + // D-W/2 as the denominator. + Weights[0] = NewWeights[1]; uint64_t CasesDenominator = OrigDenominator - Weights[0]; - Weights[0] /= 2; for (auto &W : drop_begin(Weights)) W = NewWeights[0] * static_cast(W) / CasesDenominator; diff --git a/llvm/test/CodeGen/AArch64/aarch64-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-smull.ll index 0cd885e599817..e85e808921c87 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-smull.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-smull.ll @@ -1,10 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NEON ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+sve < %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SVE -; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI - -; CHECK-GI: warning: Instruction selection used fallback path for pmlsl2_v8i16_uzp1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pmlsl_pmlsl2_v8i16_uzp1 +; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI define <8 x i16> @smull_v8i8_v8i16(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: smull_v8i8_v8i16: @@ -1832,14 +1829,33 @@ entry: } define void @pmlsl2_v8i16_uzp1(<16 x i8> %0, <8 x i16> %1, ptr %2, ptr %3) { -; CHECK-LABEL: pmlsl2_v8i16_uzp1: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q2, [x1, #16] -; CHECK-NEXT: uzp1 v2.16b, v0.16b, v2.16b -; CHECK-NEXT: pmull2 v0.8h, v0.16b, v2.16b -; CHECK-NEXT: sub v0.8h, v1.8h, v0.8h -; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: ret +; CHECK-NEON-LABEL: pmlsl2_v8i16_uzp1: +; CHECK-NEON: // %bb.0: +; CHECK-NEON-NEXT: ldr q2, [x1, #16] +; CHECK-NEON-NEXT: uzp1 v2.16b, v0.16b, v2.16b +; CHECK-NEON-NEXT: pmull2 v0.8h, v0.16b, v2.16b +; CHECK-NEON-NEXT: sub v0.8h, v1.8h, v0.8h +; CHECK-NEON-NEXT: str q0, [x0] +; CHECK-NEON-NEXT: ret +; +; CHECK-SVE-LABEL: pmlsl2_v8i16_uzp1: +; CHECK-SVE: // %bb.0: +; CHECK-SVE-NEXT: ldr q2, [x1, #16] +; CHECK-SVE-NEXT: uzp1 v2.16b, v0.16b, v2.16b +; CHECK-SVE-NEXT: pmull2 v0.8h, v0.16b, v2.16b +; CHECK-SVE-NEXT: sub v0.8h, v1.8h, v0.8h +; CHECK-SVE-NEXT: str q0, [x0] +; CHECK-SVE-NEXT: ret +; +; CHECK-GI-LABEL: pmlsl2_v8i16_uzp1: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr q2, [x1, #16] +; CHECK-GI-NEXT: mov d0, v0.d[1] +; CHECK-GI-NEXT: xtn v2.8b, v2.8h +; CHECK-GI-NEXT: pmull v0.8h, v0.8b, v2.8b +; CHECK-GI-NEXT: sub v0.8h, v1.8h, v0.8h +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret %5 = getelementptr inbounds i32, ptr %3, i64 4 %6 = load <8 x i16>, ptr %5, align 4 %7 = trunc <8 x i16> %6 to <8 x i8> @@ -1991,16 +2007,40 @@ define void @umlsl2_v4i32_uzp1(<8 x i16> %0, <4 x i32> %1, ptr %2, ptr %3) { } define void @pmlsl_pmlsl2_v8i16_uzp1(<16 x i8> %0, <8 x i16> %1, ptr %2, ptr %3, i32 %4) { -; CHECK-LABEL: pmlsl_pmlsl2_v8i16_uzp1: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: uzp1 v2.16b, v2.16b, v3.16b -; CHECK-NEXT: pmull v3.8h, v0.8b, v2.8b -; CHECK-NEXT: pmull2 v0.8h, v0.16b, v2.16b -; CHECK-NEXT: add v0.8h, v3.8h, v0.8h -; CHECK-NEXT: sub v0.8h, v1.8h, v0.8h -; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: ret +; CHECK-NEON-LABEL: pmlsl_pmlsl2_v8i16_uzp1: +; CHECK-NEON: // %bb.0: // %entry +; CHECK-NEON-NEXT: ldp q2, q3, [x1] +; CHECK-NEON-NEXT: uzp1 v2.16b, v2.16b, v3.16b +; CHECK-NEON-NEXT: pmull v3.8h, v0.8b, v2.8b +; CHECK-NEON-NEXT: pmull2 v0.8h, v0.16b, v2.16b +; CHECK-NEON-NEXT: add v0.8h, v3.8h, v0.8h +; CHECK-NEON-NEXT: sub v0.8h, v1.8h, v0.8h +; CHECK-NEON-NEXT: str q0, [x0] +; CHECK-NEON-NEXT: ret +; +; CHECK-SVE-LABEL: pmlsl_pmlsl2_v8i16_uzp1: +; CHECK-SVE: // %bb.0: // %entry +; CHECK-SVE-NEXT: ldp q2, q3, [x1] +; CHECK-SVE-NEXT: uzp1 v2.16b, v2.16b, v3.16b +; CHECK-SVE-NEXT: pmull v3.8h, v0.8b, v2.8b +; CHECK-SVE-NEXT: pmull2 v0.8h, v0.16b, v2.16b +; CHECK-SVE-NEXT: add v0.8h, v3.8h, v0.8h +; CHECK-SVE-NEXT: sub v0.8h, v1.8h, v0.8h +; CHECK-SVE-NEXT: str q0, [x0] +; CHECK-SVE-NEXT: ret +; +; CHECK-GI-LABEL: pmlsl_pmlsl2_v8i16_uzp1: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: ldp q2, q3, [x1] +; CHECK-GI-NEXT: mov d4, v0.d[1] +; CHECK-GI-NEXT: xtn v2.8b, v2.8h +; CHECK-GI-NEXT: xtn v3.8b, v3.8h +; CHECK-GI-NEXT: pmull v0.8h, v0.8b, v2.8b +; CHECK-GI-NEXT: pmull v2.8h, v4.8b, v3.8b +; CHECK-GI-NEXT: add v0.8h, v0.8h, v2.8h +; CHECK-GI-NEXT: sub v0.8h, v1.8h, v0.8h +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret entry: %5 = load <8 x i16>, ptr %3, align 4 %6 = trunc <8 x i16> %5 to <8 x i8> diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll b/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll index 2a8b3ce2ae10b..8cb319b2c3368 100644 --- a/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll @@ -1,11 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon,+aes | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon,+aes -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI - -; CHECK-GI: warning: Instruction selection used fallback path for test_vmull_p8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vmull_high_p8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vmull_p64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vmull_high_p64 +; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon,+aes -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI declare <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8>, <8 x i8>) declare <16 x i8> @llvm.aarch64.neon.pmull64(i64, i64) #5 @@ -2721,14 +2716,24 @@ entry: } define i128 @test_vmull_p64(i64 %a, i64 %b) #4 { -; CHECK-LABEL: test_vmull_p64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmov d0, x1 -; CHECK-NEXT: fmov d1, x0 -; CHECK-NEXT: pmull v0.1q, v1.1d, v0.1d -; CHECK-NEXT: mov x1, v0.d[1] -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vmull_p64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: fmov d0, x1 +; CHECK-SD-NEXT: fmov d1, x0 +; CHECK-SD-NEXT: pmull v0.1q, v1.1d, v0.1d +; CHECK-SD-NEXT: mov x1, v0.d[1] +; CHECK-SD-NEXT: fmov x0, d0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vmull_p64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fmov d0, x0 +; CHECK-GI-NEXT: fmov d1, x1 +; CHECK-GI-NEXT: pmull v0.1q, v0.1d, v1.1d +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: fmov x0, d0 +; CHECK-GI-NEXT: fmov x1, d1 +; CHECK-GI-NEXT: ret entry: %vmull2.i = tail call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %a, i64 %b) %vmull3.i = bitcast <16 x i8> %vmull2.i to i128 @@ -2736,12 +2741,22 @@ entry: } define i128 @test_vmull_high_p64(<2 x i64> %a, <2 x i64> %b) #4 { -; CHECK-LABEL: test_vmull_high_p64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: pmull2 v0.1q, v0.2d, v1.2d -; CHECK-NEXT: mov x1, v0.d[1] -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vmull_high_p64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: pmull2 v0.1q, v0.2d, v1.2d +; CHECK-SD-NEXT: mov x1, v0.d[1] +; CHECK-SD-NEXT: fmov x0, d0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vmull_high_p64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov d0, v0.d[1] +; CHECK-GI-NEXT: mov d1, v1.d[1] +; CHECK-GI-NEXT: pmull v0.1q, v0.1d, v1.1d +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: fmov x0, d0 +; CHECK-GI-NEXT: fmov x1, d1 +; CHECK-GI-NEXT: ret entry: %0 = extractelement <2 x i64> %a, i32 1 %1 = extractelement <2 x i64> %b, i32 1 diff --git a/llvm/test/CodeGen/AArch64/arm64-vmul.ll b/llvm/test/CodeGen/AArch64/arm64-vmul.ll index e6df9f2fb2c56..90abc7d389c13 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vmul.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vmul.ll @@ -2,44 +2,35 @@ ; RUN: llc -mtriple=aarch64-none-elf -mattr=+aes < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc -mtriple=aarch64-none-elf -mattr=+aes -global-isel -global-isel-abort=2 2>&1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI -; CHECK-GI: warning: Instruction selection used fallback path for pmull8h -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for commutable_pmull8h -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmulh_1s -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_2s -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_4s -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_2d -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_commuted_neg_2s -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_commuted_neg_4s -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_commuted_neg_2d -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2s -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_4s -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2d -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2s_strict -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_4s_strict -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2d_strict -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmla_indexed_scalar_2s_strict -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmla_indexed_scalar_4s_strict -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmla_indexed_scalar_2d_strict -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmulh_lane_1s -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlal_lane_1d -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlsl_lane_1d -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pmull_from_extract_dup_low -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pmull_from_extract_dup_high -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pmull_from_extract_duplane_low -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pmull_from_extract_duplane_high -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for scalar_fmls_from_extract_v4f32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for scalar_fmls_from_extract_v2f32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for scalar_fmls_from_extract_v2f64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v2f32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v2f32_1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v4f32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v4f32_1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v2f64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlal_d -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlsl_d -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_pmull_64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_pmull_high_64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_commutable_pmull_64 +; CHECK-GI: warning: Instruction selection used fallback path for sqdmulh_1s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_2s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_4s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_2d +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_commuted_neg_2s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_commuted_neg_4s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_commuted_neg_2d +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_4s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2d +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2s_strict +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_4s_strict +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2d_strict +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmla_indexed_scalar_2s_strict +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmla_indexed_scalar_4s_strict +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmla_indexed_scalar_2d_strict +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmulh_lane_1s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlal_lane_1d +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlsl_lane_1d +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for scalar_fmls_from_extract_v4f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for scalar_fmls_from_extract_v2f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for scalar_fmls_from_extract_v2f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v2f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v2f32_1 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v4f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v4f32_1 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v2f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlal_d +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlsl_d define <8 x i16> @smull8h(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: smull8h: @@ -2895,11 +2886,18 @@ define <8 x i16> @pmull_from_extract_dup_low(<16 x i8> %lhs, i8 %rhs) { } define <8 x i16> @pmull_from_extract_dup_high(<16 x i8> %lhs, i8 %rhs) { -; CHECK-LABEL: pmull_from_extract_dup_high: -; CHECK: // %bb.0: -; CHECK-NEXT: dup v1.16b, w0 -; CHECK-NEXT: pmull2 v0.8h, v0.16b, v1.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: pmull_from_extract_dup_high: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: dup v1.16b, w0 +; CHECK-SD-NEXT: pmull2 v0.8h, v0.16b, v1.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: pmull_from_extract_dup_high: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: dup v1.8b, w0 +; CHECK-GI-NEXT: mov d0, v0.d[1] +; CHECK-GI-NEXT: pmull v0.8h, v0.8b, v1.8b +; CHECK-GI-NEXT: ret %rhsvec.0 = insertelement <8 x i8> undef, i8 %rhs, i32 0 %rhsvec = shufflevector <8 x i8> %rhsvec.0, <8 x i8> undef, <8 x i32> @@ -2924,12 +2922,20 @@ define <8 x i16> @pmull_from_extract_duplane_low(<16 x i8> %lhs, <8 x i8> %rhs) } define <8 x i16> @pmull_from_extract_duplane_high(<16 x i8> %lhs, <8 x i8> %rhs) { -; CHECK-LABEL: pmull_from_extract_duplane_high: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: dup v1.16b, v1.b[0] -; CHECK-NEXT: pmull2 v0.8h, v0.16b, v1.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: pmull_from_extract_duplane_high: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-SD-NEXT: dup v1.16b, v1.b[0] +; CHECK-SD-NEXT: pmull2 v0.8h, v0.16b, v1.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: pmull_from_extract_duplane_high: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-GI-NEXT: mov d0, v0.d[1] +; CHECK-GI-NEXT: dup v1.8b, v1.b[0] +; CHECK-GI-NEXT: pmull v0.8h, v0.8b, v1.8b +; CHECK-GI-NEXT: ret %lhs.high = shufflevector <16 x i8> %lhs, <16 x i8> undef, <8 x i32> %rhs.high = shufflevector <8 x i8> %rhs, <8 x i8> undef, <8 x i32> @@ -3245,21 +3251,35 @@ define i64 @sqdmlsl_d(i32 %A, i32 %B, i64 %C) nounwind { } define <16 x i8> @test_pmull_64(i64 %l, i64 %r) nounwind { -; CHECK-LABEL: test_pmull_64: -; CHECK: // %bb.0: -; CHECK-NEXT: fmov d0, x1 -; CHECK-NEXT: fmov d1, x0 -; CHECK-NEXT: pmull v0.1q, v1.1d, v0.1d -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_pmull_64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fmov d0, x1 +; CHECK-SD-NEXT: fmov d1, x0 +; CHECK-SD-NEXT: pmull v0.1q, v1.1d, v0.1d +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_pmull_64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fmov d0, x0 +; CHECK-GI-NEXT: fmov d1, x1 +; CHECK-GI-NEXT: pmull v0.1q, v0.1d, v1.1d +; CHECK-GI-NEXT: ret %val = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %l, i64 %r) ret <16 x i8> %val } define <16 x i8> @test_pmull_high_64(<2 x i64> %l, <2 x i64> %r) nounwind { -; CHECK-LABEL: test_pmull_high_64: -; CHECK: // %bb.0: -; CHECK-NEXT: pmull2 v0.1q, v0.2d, v1.2d -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_pmull_high_64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: pmull2 v0.1q, v0.2d, v1.2d +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_pmull_high_64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov d0, v0.d[1] +; CHECK-GI-NEXT: mov d1, v1.d[1] +; CHECK-GI-NEXT: pmull v0.1q, v0.1d, v1.1d +; CHECK-GI-NEXT: ret %l_hi = extractelement <2 x i64> %l, i32 1 %r_hi = extractelement <2 x i64> %r, i32 1 %val = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %l_hi, i64 %r_hi) @@ -3267,13 +3287,22 @@ define <16 x i8> @test_pmull_high_64(<2 x i64> %l, <2 x i64> %r) nounwind { } define <16 x i8> @test_commutable_pmull_64(i64 %l, i64 %r) nounwind { -; CHECK-LABEL: test_commutable_pmull_64: -; CHECK: // %bb.0: -; CHECK-NEXT: fmov d0, x1 -; CHECK-NEXT: fmov d1, x0 -; CHECK-NEXT: pmull v0.1q, v1.1d, v0.1d -; CHECK-NEXT: add v0.16b, v0.16b, v0.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_commutable_pmull_64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fmov d0, x1 +; CHECK-SD-NEXT: fmov d1, x0 +; CHECK-SD-NEXT: pmull v0.1q, v1.1d, v0.1d +; CHECK-SD-NEXT: add v0.16b, v0.16b, v0.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_commutable_pmull_64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fmov d0, x0 +; CHECK-GI-NEXT: fmov d1, x1 +; CHECK-GI-NEXT: pmull v2.1q, v0.1d, v1.1d +; CHECK-GI-NEXT: pmull v0.1q, v1.1d, v0.1d +; CHECK-GI-NEXT: add v0.16b, v2.16b, v0.16b +; CHECK-GI-NEXT: ret %1 = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %l, i64 %r) %2 = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %r, i64 %l) %3 = add <16 x i8> %1, %2 diff --git a/llvm/test/CodeGen/AArch64/highextractbitcast.ll b/llvm/test/CodeGen/AArch64/highextractbitcast.ll index df4889b6f09de..bd6c168ce8776 100644 --- a/llvm/test/CodeGen/AArch64/highextractbitcast.ll +++ b/llvm/test/CodeGen/AArch64/highextractbitcast.ll @@ -1,10 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes CHECK,CHECK-LE ; RUN: llc -mtriple=aarch64_be-unknown-linux-gnu < %s | FileCheck %s --check-prefix CHECK-BE -; RUN: llc -mtriple=aarch64-unknown-linux-gnu -global-isel -global-isel -global-isel-abort=2 2>&1 < %s | FileCheck %s --check-prefixes CHECK,CHECK-GI - -; CHECK-GI: warning: Instruction selection used fallback path for test_pmull_high_p8_128 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_pmull_high_p8_64 +; RUN: llc -mtriple=aarch64-unknown-linux-gnu -global-isel < %s | FileCheck %s --check-prefixes CHECK,CHECK-GI declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>) declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>) @@ -521,12 +518,12 @@ entry: } define <8 x i16> @test_pmull_high_p8_128(i128 %aa, i128 %bb) { -; CHECK-LABEL: test_pmull_high_p8_128: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmov d0, x3 -; CHECK-NEXT: fmov d1, x1 -; CHECK-NEXT: pmull v0.8h, v1.8b, v0.8b -; CHECK-NEXT: ret +; CHECK-LE-LABEL: test_pmull_high_p8_128: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: fmov d0, x3 +; CHECK-LE-NEXT: fmov d1, x1 +; CHECK-LE-NEXT: pmull v0.8h, v1.8b, v0.8b +; CHECK-LE-NEXT: ret ; ; CHECK-BE-LABEL: test_pmull_high_p8_128: ; CHECK-BE: // %bb.0: // %entry @@ -538,6 +535,15 @@ define <8 x i16> @test_pmull_high_p8_128(i128 %aa, i128 %bb) { ; CHECK-BE-NEXT: rev64 v0.8h, v0.8h ; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-BE-NEXT: ret +; +; CHECK-GI-LABEL: test_pmull_high_p8_128: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov v0.d[0], x0 +; CHECK-GI-NEXT: mov v1.d[0], x2 +; CHECK-GI-NEXT: mov v0.d[1], x1 +; CHECK-GI-NEXT: mov v1.d[1], x3 +; CHECK-GI-NEXT: pmull2 v0.8h, v0.16b, v1.16b +; CHECK-GI-NEXT: ret entry: %a = bitcast i128 %aa to <16 x i8> %b = bitcast i128 %bb to <16 x i8> diff --git a/llvm/test/CodeGen/AArch64/vldn_shuffle.ll b/llvm/test/CodeGen/AArch64/vldn_shuffle.ll index 3685e9cf85bd6..b2635d3d9f1a5 100644 --- a/llvm/test/CodeGen/AArch64/vldn_shuffle.ll +++ b/llvm/test/CodeGen/AArch64/vldn_shuffle.ll @@ -730,6 +730,111 @@ entry: ret void } +define void @store_factor8(ptr %ptr, <4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, <4 x i32> %a3, + <4 x i32> %a4, <4 x i32> %a5, <4 x i32> %a6, <4 x i32> %a7) { +; CHECK-LABEL: store_factor8: +; CHECK: .Lfunc_begin17: +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: // %bb.0: +; CHECK: zip1 [[V1:.*s]], [[I1:.*s]], [[I5:.*s]] +; CHECK-NEXT: zip2 [[V5:.*s]], [[I1]], [[I5]] +; CHECK-NEXT: zip1 [[V2:.*s]], [[I2:.*s]], [[I6:.*s]] +; CHECK-NEXT: zip2 [[V6:.*s]], [[I2]], [[I6]] +; CHECK-NEXT: zip1 [[V3:.*s]], [[I3:.*s]], [[I7:.*s]] +; CHECK-NEXT: zip2 [[V7:.*s]], [[I3]], [[I7]] +; CHECK-NEXT: zip1 [[V4:.*s]], [[I4:.*s]], [[I8:.*s]] +; CHECK-NEXT: zip2 [[V8:.*s]], [[I4]], [[I8]] +; CHECK-NEXT: st4 { [[V1]], [[V2]], [[V3]], [[V4]] }, [x0], #64 +; CHECK-NEXT: st4 { [[V5]], [[V6]], [[V7]], [[V8]] }, [x0] +; CHECK-NEXT: ret + + %v0 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <8 x i32> + %v1 = shufflevector <4 x i32> %a2, <4 x i32> %a3, <8 x i32> + %v2 = shufflevector <4 x i32> %a4, <4 x i32> %a5, <8 x i32> + %v3 = shufflevector <4 x i32> %a6, <4 x i32> %a7, <8 x i32> + + %s0 = shufflevector <8 x i32> %v0, <8 x i32> %v1, <16 x i32> + %s1 = shufflevector <8 x i32> %v2, <8 x i32> %v3, <16 x i32> + + %interleaved.vec = shufflevector <16 x i32> %s0, <16 x i32> %s1, <32 x i32> + store <32 x i32> %interleaved.vec, ptr %ptr, align 4 + ret void +} + +define void @store_factor16(ptr %ptr, <4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, <4 x i32> %a3, + <4 x i32> %a4, <4 x i32> %a5, <4 x i32> %a6, <4 x i32> %a7, + <4 x i32> %a8, <4 x i32> %a9, <4 x i32> %a10, <4 x i32> %a11, + <4 x i32> %a12, <4 x i32> %a13, <4 x i32> %a14, <4 x i32> %a15) { +; CHECK-LABEL: store_factor16: +; CHECK: .Lfunc_begin18: +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: // %bb.0: +; CHECK: zip1 [[V05:.*s]], [[I05:.*s]], [[I13:.*s]] +; CHECK-NEXT: zip1 [[V01:.*s]], [[I01:.*s]], [[I09:.*s]] +; CHECK-NEXT: zip1 [[V02:.*s]], [[I02:.*s]], [[I10:.*s]] +; CHECK-NEXT: zip1 [[V06:.*s]], [[I06:.*s]], [[I14:.*s]] +; CHECK-NEXT: zip1 [[V07:.*s]], [[I07:.*s]], [[I15:.*s]] +; CHECK-NEXT: zip2 [[V09:.*s]], [[I01]], [[I09]] +; CHECK-NEXT: zip2 [[V13:.*s]], [[I05]], [[I13]] +; CHECK-NEXT: zip1 [[V03:.*s]], [[I03:.*s]], [[I11:.*s]] +; CHECK-NEXT: zip1 [[V04:.*s]], [[I04:.*s]], [[I12:.*s]] +; CHECK-NEXT: zip1 [[V08:.*s]], [[I08:.*s]], [[I16:.*s]] +; CHECK-NEXT: zip2 [[V10:.*s]], [[I02]], [[I10]] +; CHECK-NEXT: zip2 [[V14:.*s]], [[I06]], [[I14]] +; CHECK-NEXT: zip2 [[V11:.*s]], [[I03]], [[I11]] +; CHECK-NEXT: zip1 [[V17:.*s]], [[V01]], [[V05]] +; CHECK-NEXT: zip2 [[V15:.*s]], [[I07]], [[I15]] +; CHECK-NEXT: zip2 [[V21:.*s]], [[V01]], [[V05]] +; CHECK-NEXT: zip1 [[V18:.*s]], [[V02]], [[V06]] +; CHECK-NEXT: zip2 [[V12:.*s]], [[I04]], [[I12]] +; CHECK-NEXT: zip2 [[V16:.*s]], [[I08]], [[I16]] +; CHECK-NEXT: zip1 [[V19:.*s]], [[V03]], [[V07]] +; CHECK-NEXT: zip2 [[V22:.*s]], [[V02]], [[V06]] +; CHECK-NEXT: zip1 [[V25:.*s]], [[V09]], [[V13]] +; CHECK-NEXT: zip1 [[V20:.*s]], [[V04]], [[V08]] +; CHECK-NEXT: zip2 [[V23:.*s]], [[V03]], [[V07]] +; CHECK-NEXT: zip1 [[V26:.*s]], [[V10]], [[V14]] +; CHECK-NEXT: zip2 [[V29:.*s]], [[V09]], [[V13]] +; CHECK-NEXT: zip2 [[V24:.*s]], [[V04]], [[V08]] +; CHECK-NEXT: zip1 [[V27:.*s]], [[V11]], [[V15]] +; CHECK-NEXT: zip2 [[V30:.*s]], [[V10]], [[V14]] +; CHECK-NEXT: zip1 [[V28:.*s]], [[V12]], [[V16]] +; CHECK-NEXT: zip2 [[V31:.*s]], [[V11]], [[V15]] +; CHECK-NEXT: zip2 [[V32:.*s]], [[V12]], [[V16]] +; CHECK-NEXT: st4 { [[V17]], [[V18]], [[V19]], [[V20]] }, [x8], #64 +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: st4 { [[V21]], [[V22]], [[V23]], [[V24]] }, [x8] +; CHECK-NEXT: add x8, x0, #128 +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: st4 { [[V25]], [[V26]], [[V27]], [[V28]] }, [x8] +; CHECK-NEXT: add x8, x0, #192 +; CHECK-NEXT: st4 { [[V29]], [[V30]], [[V31]], [[V32]] }, [x8] +; CHECK-NEXT: ldp d15, d14, [sp], #64 // 16-byte Folded Reload +; CHECK-NEXT: ret + + %v0 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <8 x i32> + %v1 = shufflevector <4 x i32> %a2, <4 x i32> %a3, <8 x i32> + %v2 = shufflevector <4 x i32> %a4, <4 x i32> %a5, <8 x i32> + %v3 = shufflevector <4 x i32> %a6, <4 x i32> %a7, <8 x i32> + %v4 = shufflevector <4 x i32> %a8, <4 x i32> %a9, <8 x i32> + %v5 = shufflevector <4 x i32> %a10, <4 x i32> %a11, <8 x i32> + %v6 = shufflevector <4 x i32> %a12, <4 x i32> %a13, <8 x i32> + %v7 = shufflevector <4 x i32> %a14, <4 x i32> %a15, <8 x i32> + + %s0 = shufflevector <8 x i32> %v0, <8 x i32> %v1, <16 x i32> + %s1 = shufflevector <8 x i32> %v2, <8 x i32> %v3, <16 x i32> + %s2 = shufflevector <8 x i32> %v4, <8 x i32> %v5, <16 x i32> + %s3 = shufflevector <8 x i32> %v6, <8 x i32> %v7, <16 x i32> + + %d0 = shufflevector <16 x i32> %s0, <16 x i32> %s1, <32 x i32> + %d1 = shufflevector <16 x i32> %s2, <16 x i32> %s3, <32 x i32> + + %interleaved.vec = shufflevector <32 x i32> %d0, <32 x i32> %d1, <64 x i32> + store <64 x i32> %interleaved.vec, ptr %ptr, align 4 + ret void +} + declare void @llvm.dbg.value(metadata, metadata, metadata) !llvm.dbg.cu = !{!0} diff --git a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-control-flow.ll b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-control-flow.ll index 4fa7c29bfde02..71005224dd1e5 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-control-flow.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-control-flow.ll @@ -481,3 +481,15 @@ define void @dominance_not_in_program_order(ptr addrspace(7) inreg %arg) { %lsr.iv11 = phi ptr addrspace(7) [ %arg, %.loopexit ], [ %arg, %.preheader15 ] br label %.loopexit } + +;; iree-org/iree#22551 - crash on something that reduces to the below non-canonical select. +define ptr addrspace(7) @noncanonical_const_cond(ptr addrspace(7) %x) { +; CHECK-LABEL: define { ptr addrspace(8), i32 } @noncanonical_const_cond +; CHECK-SAME: ({ ptr addrspace(8), i32 } [[RET:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[X_RSRC:%.*]] = extractvalue { ptr addrspace(8), i32 } [[RET]], 0 +; CHECK-NEXT: [[X_OFF:%.*]] = extractvalue { ptr addrspace(8), i32 } [[RET]], 1 +; CHECK-NEXT: ret { ptr addrspace(8), i32 } [[RET]] +; + %ret = select i1 false, ptr addrspace(7) %x, ptr addrspace(7) %x + ret ptr addrspace(7) %ret +} diff --git a/llvm/test/CodeGen/AMDGPU/packetizer.ll b/llvm/test/CodeGen/AMDGPU/packetizer.ll index aab035f811434..b9bf13886d366 100644 --- a/llvm/test/CodeGen/AMDGPU/packetizer.ll +++ b/llvm/test/CodeGen/AMDGPU/packetizer.ll @@ -1,13 +1,49 @@ -; RUN: llc < %s -mtriple=r600 -mcpu=redwood | FileCheck %s -; RUN: llc < %s -mtriple=r600 -mcpu=cayman | FileCheck %s - -; CHECK: {{^}}test: -; CHECK: BIT_ALIGN_INT T{{[0-9]}}.X -; CHECK: BIT_ALIGN_INT T{{[0-9]}}.Y -; CHECK: BIT_ALIGN_INT T{{[0-9]}}.Z -; CHECK: BIT_ALIGN_INT * T{{[0-9]}}.W +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc < %s -mtriple=r600 -mcpu=redwood | FileCheck %s -check-prefix=R600 +; RUN: llc < %s -mtriple=r600 -mcpu=cayman | FileCheck %s -check-prefix=CM define amdgpu_kernel void @test(ptr addrspace(1) %out, i32 %x_arg, i32 %y_arg, i32 %z_arg, i32 %w_arg, i32 %e) { +; R600-LABEL: test: +; R600: ; %bb.0: ; %entry +; R600-NEXT: ALU 12, @4, KC0[CB0:0-32], KC1[] +; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 +; R600-NEXT: CF_END +; R600-NEXT: PAD +; R600-NEXT: ALU clause starting at 4: +; R600-NEXT: ADD_INT T0.Y, KC0[3].X, 1, +; R600-NEXT: ADD_INT T0.Z, KC0[3].Y, 1, +; R600-NEXT: ADD_INT T0.W, KC0[2].Z, 1, +; R600-NEXT: ADD_INT * T1.W, KC0[2].W, 1, +; R600-NEXT: BIT_ALIGN_INT T0.X, PS, PS, KC0[3].Z, +; R600-NEXT: BIT_ALIGN_INT T1.Y, PV.W, PV.W, KC0[3].Z, +; R600-NEXT: BIT_ALIGN_INT T0.Z, PV.Z, PV.Z, KC0[3].Z, +; R600-NEXT: BIT_ALIGN_INT * T0.W, PV.Y, PV.Y, KC0[3].Z, +; R600-NEXT: OR_INT T0.W, PV.W, PV.Z, +; R600-NEXT: OR_INT * T1.W, PV.Y, PV.X, +; R600-NEXT: OR_INT T0.X, PS, PV.W, +; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; +; CM-LABEL: test: +; CM: ; %bb.0: ; %entry +; CM-NEXT: ALU 12, @4, KC0[CB0:0-32], KC1[] +; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X +; CM-NEXT: CF_END +; CM-NEXT: PAD +; CM-NEXT: ALU clause starting at 4: +; CM-NEXT: ADD_INT T0.X, KC0[3].X, 1, +; CM-NEXT: ADD_INT T0.Y, KC0[3].Y, 1, +; CM-NEXT: ADD_INT T0.Z, KC0[2].Z, 1, +; CM-NEXT: ADD_INT * T0.W, KC0[2].W, 1, +; CM-NEXT: BIT_ALIGN_INT T1.X, PV.W, PV.W, KC0[3].Z, +; CM-NEXT: BIT_ALIGN_INT T1.Y, PV.Z, PV.Z, KC0[3].Z, +; CM-NEXT: BIT_ALIGN_INT T0.Z, PV.Y, PV.Y, KC0[3].Z, +; CM-NEXT: BIT_ALIGN_INT * T0.W, PV.X, PV.X, KC0[3].Z, +; CM-NEXT: OR_INT T0.Z, PV.W, PV.Z, +; CM-NEXT: OR_INT * T0.W, PV.Y, PV.X, +; CM-NEXT: OR_INT * T0.X, PV.W, PV.Z, +; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) entry: %shl = sub i32 32, %e %x = add i32 %x_arg, 1 diff --git a/llvm/test/CodeGen/ARM/call-graph-section-addrtaken.ll b/llvm/test/CodeGen/ARM/call-graph-section-addrtaken.ll index cabd43edff9d6..9e243aec1128d 100644 --- a/llvm/test/CodeGen/ARM/call-graph-section-addrtaken.ll +++ b/llvm/test/CodeGen/ARM/call-graph-section-addrtaken.ll @@ -14,7 +14,6 @@ entry: } ; CHECK: _ZL10myCallbacki: -; CHECK-NEXT: [[LABEL_FUNC:\.Lfunc_begin[0-9]+]]: define internal void @_ZL10myCallbacki(i32 %value) !type !2 { entry: %sink = alloca i32, align 4 @@ -33,7 +32,7 @@ entry: ;; Flags -- Potential indirect target so LSB is set to 1. Other bits are 0. ; CHECK-NEXT: .byte 1 ;; Function Entry PC -; CHECK-NEXT: .long [[LABEL_FUNC]] +; CHECK-NEXT: .long _ZL10myCallbacki ;; Function type ID -5212364466660467813 ; CHECK-NEXT: .long 1154849691 ; CHECK-NEXT: .long 3081369122 diff --git a/llvm/test/CodeGen/ARM/call-graph-section-assembly.ll b/llvm/test/CodeGen/ARM/call-graph-section-assembly.ll index 3d3974ee6ba3b..8e8881ee722fb 100644 --- a/llvm/test/CodeGen/ARM/call-graph-section-assembly.ll +++ b/llvm/test/CodeGen/ARM/call-graph-section-assembly.ll @@ -11,7 +11,6 @@ declare !type !1 i32 @direct_bar(i8) declare !type !2 ptr @direct_baz(ptr) ; CHECK: ball: -; CHECK-NEXT: [[LABEL_FUNC:\.Lfunc_begin[0-9]+]]: define ptr @ball() { entry: call void @direct_foo() @@ -42,7 +41,7 @@ entry: ;; Flags ; CHECK-NEXT: .byte 7 ;; Function Entry PC -; CHECK-NEXT: .long [[LABEL_FUNC]] +; CHECK-NEXT: .long ball ;; Function type ID -- set to 0 as no type metadata attached to function. ; CHECK-NEXT: .long 0 ; CHECK-NEXT: .long 0 diff --git a/llvm/test/CodeGen/ARM/call-graph-section-tailcall.ll b/llvm/test/CodeGen/ARM/call-graph-section-tailcall.ll index 80360041c106a..35e570bdde405 100644 --- a/llvm/test/CodeGen/ARM/call-graph-section-tailcall.ll +++ b/llvm/test/CodeGen/ARM/call-graph-section-tailcall.ll @@ -29,6 +29,6 @@ declare !type !2 i32 @bar(i8 signext) ; CHECK: Hex dump of section '.llvm.callgraph': ; CHECK-NEXT: 0x00000000 00050000 00008e19 0b7f3326 e3000154 -; CHECK-NEXT: 0x00000010 86bc5981 4b8e3000 05100000 00a150b8 +; CHECK-NEXT: 0x00000010 86bc5981 4b8e3000 05000000 00a150b8 ;; Verify that the type id 0x308e4b8159bc8654 is in section. ; CHECK-NEXT: 0x00000020 3e0cfe3c b2015486 bc59814b 8e30 diff --git a/llvm/test/CodeGen/DirectX/wavesize-md-errs.ll b/llvm/test/CodeGen/DirectX/wavesize-md-errs.ll new file mode 100644 index 0000000000000..9016c5d7e8d44 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/wavesize-md-errs.ll @@ -0,0 +1,31 @@ +; RUN: split-file %s %t +; RUN: not opt -S --dxil-translate-metadata %t/low-sm.ll 2>&1 | FileCheck %t/low-sm.ll +; RUN: not opt -S --dxil-translate-metadata %t/low-sm-for-range.ll 2>&1 | FileCheck %t/low-sm-for-range.ll + +; Test that wavesize metadata is only allowed on applicable shader model versions + +;--- low-sm.ll + +; CHECK: Shader model 6.6 or greater is required to specify the "hlsl.wavesize" function attribute + +target triple = "dxil-unknown-shadermodel6.5-compute" + +define void @main() #0 { +entry: + ret void +} + +attributes #0 = { "hlsl.wavesize"="16,0,0" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" } + +;--- low-sm-for-range.ll + +; CHECK: Shader model 6.8 or greater is required to specify wave size range values of the "hlsl.wavesize" function attribute + +target triple = "dxil-unknown-shadermodel6.7-compute" + +define void @main() #0 { +entry: + ret void +} + +attributes #0 = { "hlsl.wavesize"="16,32,0" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" } diff --git a/llvm/test/CodeGen/DirectX/wavesize-md-valid.ll b/llvm/test/CodeGen/DirectX/wavesize-md-valid.ll new file mode 100644 index 0000000000000..3ad6c1d034252 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/wavesize-md-valid.ll @@ -0,0 +1,96 @@ +; RUN: split-file %s %t +; RUN: opt -S --dxil-translate-metadata %t/only.ll | FileCheck %t/only.ll +; RUN: opt -S --dxil-translate-metadata %t/min.ll | FileCheck %t/min.ll +; RUN: opt -S --dxil-translate-metadata %t/max.ll | FileCheck %t/max.ll +; RUN: opt -S --dxil-translate-metadata %t/pref.ll | FileCheck %t/pref.ll + +; RUN: llc --filetype=obj %t/only.ll -o - | obj2yaml | FileCheck %t/only.ll --check-prefix=OBJ +; RUN: llc --filetype=obj %t/min.ll -o - | obj2yaml | FileCheck %t/min.ll --check-prefix=OBJ +; RUN: llc --filetype=obj %t/max.ll -o - | obj2yaml | FileCheck %t/max.ll --check-prefix=OBJ +; RUN: llc --filetype=obj %t/pref.ll -o - | obj2yaml | FileCheck %t/pref.ll --check-prefix=OBJ + +; Test that wave size/range metadata is correctly generated with the correct tag + +;--- only.ll + +; CHECK: !dx.entryPoints = !{![[#ENTRY:]]} +; CHECK: ![[#ENTRY]] = !{ptr @main, !"main", null, null, ![[#PROPS:]]} +; CHECK: ![[#PROPS]] = !{{{.*}}i32 11, ![[#WAVE_SIZE:]]{{.*}}} +; CHECK: ![[#WAVE_SIZE]] = !{i32 16} + +; OBJ: - Name: PSV0 +; OBJ: PSVInfo: +; OBJ: MinimumWaveLaneCount: 16 +; OBJ: MaximumWaveLaneCount: 16 + +target triple = "dxil-unknown-shadermodel6.6-compute" + +define void @main() #0 { +entry: + ret void +} + +attributes #0 = { "hlsl.wavesize"="16,0,0" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" } + +;--- min.ll + +; CHECK: !dx.entryPoints = !{![[#ENTRY:]]} +; CHECK: ![[#ENTRY]] = !{ptr @main, !"main", null, null, ![[#PROPS:]]} +; CHECK: ![[#PROPS]] = !{{{.*}}i32 23, ![[#WAVE_SIZE:]]{{.*}}} +; CHECK: ![[#WAVE_SIZE]] = !{i32 16, i32 0, i32 0} + +; OBJ: - Name: PSV0 +; OBJ: PSVInfo: +; OBJ: MinimumWaveLaneCount: 16 +; OBJ: MaximumWaveLaneCount: 16 + +target triple = "dxil-unknown-shadermodel6.8-compute" + +define void @main() #0 { +entry: + ret void +} + +attributes #0 = { "hlsl.wavesize"="16,0,0" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" } + +;--- max.ll + +; CHECK: !dx.entryPoints = !{![[#ENTRY:]]} +; CHECK: ![[#ENTRY]] = !{ptr @main, !"main", null, null, ![[#PROPS:]]} +; CHECK: ![[#PROPS]] = !{{{.*}}i32 23, ![[#WAVE_SIZE:]]{{.*}}} +; CHECK: ![[#WAVE_SIZE]] = !{i32 16, i32 32, i32 0} + +; OBJ: - Name: PSV0 +; OBJ: PSVInfo: +; OBJ: MinimumWaveLaneCount: 16 +; OBJ: MaximumWaveLaneCount: 32 + +target triple = "dxil-unknown-shadermodel6.8-compute" + +define void @main() #0 { +entry: + ret void +} + +attributes #0 = { "hlsl.wavesize"="16,32,0" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" } + +;--- pref.ll + +; CHECK: !dx.entryPoints = !{![[#ENTRY:]]} +; CHECK: ![[#ENTRY]] = !{ptr @main, !"main", null, null, ![[#PROPS:]]} +; CHECK: ![[#PROPS]] = !{{{.*}}i32 23, ![[#WAVE_SIZE:]]{{.*}}} +; CHECK: ![[#WAVE_SIZE]] = !{i32 16, i32 64, i32 32} + +; OBJ: - Name: PSV0 +; OBJ: PSVInfo: +; OBJ: MinimumWaveLaneCount: 16 +; OBJ: MaximumWaveLaneCount: 64 + +target triple = "dxil-unknown-shadermodel6.8-compute" + +define void @main() #0 { +entry: + ret void +} + +attributes #0 = { "hlsl.wavesize"="16,64,32" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" } diff --git a/llvm/test/CodeGen/PowerPC/vec_rounding.ll b/llvm/test/CodeGen/PowerPC/vec_rounding.ll index 2f16a435440ff..438c8ebdc099e 100644 --- a/llvm/test/CodeGen/PowerPC/vec_rounding.ll +++ b/llvm/test/CodeGen/PowerPC/vec_rounding.ll @@ -1,172 +1,251 @@ -; RUN: llc -verify-machineinstrs -mcpu=pwr6 -mattr=+altivec < %s | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr6 -mattr=+altivec < %s | FileCheck %s ; Check vector round to single-precision toward -infinity (vrfim) ; instruction generation using Altivec. -target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" -target triple = "powerpc64-unknown-linux-gnu" - declare <2 x double> @llvm.floor.v2f64(<2 x double> %p) define <2 x double> @floor_v2f64(<2 x double> %p) +; CHECK-LABEL: floor_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: frim 1, 1 +; CHECK-NEXT: frim 2, 2 +; CHECK-NEXT: blr { %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p) ret <2 x double> %t } -; CHECK-LABEL: floor_v2f64: -; CHECK: frim -; CHECK: frim declare <4 x double> @llvm.floor.v4f64(<4 x double> %p) define <4 x double> @floor_v4f64(<4 x double> %p) +; CHECK-LABEL: floor_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: frim 1, 1 +; CHECK-NEXT: frim 2, 2 +; CHECK-NEXT: frim 3, 3 +; CHECK-NEXT: frim 4, 4 +; CHECK-NEXT: blr { %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p) ret <4 x double> %t } -; CHECK-LABEL: floor_v4f64: -; CHECK: frim -; CHECK: frim -; CHECK: frim -; CHECK: frim declare <2 x double> @llvm.ceil.v2f64(<2 x double> %p) define <2 x double> @ceil_v2f64(<2 x double> %p) +; CHECK-LABEL: ceil_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: frip 1, 1 +; CHECK-NEXT: frip 2, 2 +; CHECK-NEXT: blr { %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p) ret <2 x double> %t } -; CHECK-LABEL: ceil_v2f64: -; CHECK: frip -; CHECK: frip declare <4 x double> @llvm.ceil.v4f64(<4 x double> %p) define <4 x double> @ceil_v4f64(<4 x double> %p) +; CHECK-LABEL: ceil_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: frip 1, 1 +; CHECK-NEXT: frip 2, 2 +; CHECK-NEXT: frip 3, 3 +; CHECK-NEXT: frip 4, 4 +; CHECK-NEXT: blr { %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p) ret <4 x double> %t } -; CHECK-LABEL: ceil_v4f64: -; CHECK: frip -; CHECK: frip -; CHECK: frip -; CHECK: frip declare <2 x double> @llvm.trunc.v2f64(<2 x double> %p) define <2 x double> @trunc_v2f64(<2 x double> %p) +; CHECK-LABEL: trunc_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: friz 1, 1 +; CHECK-NEXT: friz 2, 2 +; CHECK-NEXT: blr { %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p) ret <2 x double> %t } -; CHECK-LABEL: trunc_v2f64: -; CHECK: friz -; CHECK: friz declare <4 x double> @llvm.trunc.v4f64(<4 x double> %p) define <4 x double> @trunc_v4f64(<4 x double> %p) +; CHECK-LABEL: trunc_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: friz 1, 1 +; CHECK-NEXT: friz 2, 2 +; CHECK-NEXT: friz 3, 3 +; CHECK-NEXT: friz 4, 4 +; CHECK-NEXT: blr { %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p) ret <4 x double> %t } -; CHECK-LABEL: trunc_v4f64: -; CHECK: friz -; CHECK: friz -; CHECK: friz -; CHECK: friz declare <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p) -define <2 x double> @nearbyint_v2f64(<2 x double> %p) +define <2 x double> @nearbyint_v2f64(<2 x double> %p) nounwind +; CHECK-LABEL: nearbyint_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr 0 +; CHECK-NEXT: stdu 1, -128(1) +; CHECK-NEXT: std 0, 144(1) +; CHECK-NEXT: stfd 30, 112(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 31, 120(1) # 8-byte Folded Spill +; CHECK-NEXT: fmr 31, 2 +; CHECK-NEXT: bl nearbyint +; CHECK-NEXT: nop +; CHECK-NEXT: fmr 30, 1 +; CHECK-NEXT: fmr 1, 31 +; CHECK-NEXT: bl nearbyint +; CHECK-NEXT: nop +; CHECK-NEXT: fmr 2, 1 +; CHECK-NEXT: fmr 1, 30 +; CHECK-NEXT: lfd 31, 120(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 30, 112(1) # 8-byte Folded Reload +; CHECK-NEXT: addi 1, 1, 128 +; CHECK-NEXT: ld 0, 16(1) +; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: blr { %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p) ret <2 x double> %t } -; CHECK-LABEL: nearbyint_v2f64: -; CHECK: bl nearbyint -; CHECK: bl nearbyint declare <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p) -define <4 x double> @nearbyint_v4f64(<4 x double> %p) +define <4 x double> @nearbyint_v4f64(<4 x double> %p) nounwind +; CHECK-LABEL: nearbyint_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr 0 +; CHECK-NEXT: stdu 1, -144(1) +; CHECK-NEXT: std 0, 160(1) +; CHECK-NEXT: stfd 28, 112(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 29, 120(1) # 8-byte Folded Spill +; CHECK-NEXT: fmr 29, 2 +; CHECK-NEXT: stfd 30, 128(1) # 8-byte Folded Spill +; CHECK-NEXT: fmr 30, 3 +; CHECK-NEXT: stfd 31, 136(1) # 8-byte Folded Spill +; CHECK-NEXT: fmr 31, 4 +; CHECK-NEXT: bl nearbyint +; CHECK-NEXT: nop +; CHECK-NEXT: fmr 28, 1 +; CHECK-NEXT: fmr 1, 29 +; CHECK-NEXT: bl nearbyint +; CHECK-NEXT: nop +; CHECK-NEXT: fmr 29, 1 +; CHECK-NEXT: fmr 1, 30 +; CHECK-NEXT: bl nearbyint +; CHECK-NEXT: nop +; CHECK-NEXT: fmr 30, 1 +; CHECK-NEXT: fmr 1, 31 +; CHECK-NEXT: bl nearbyint +; CHECK-NEXT: nop +; CHECK-NEXT: fmr 4, 1 +; CHECK-NEXT: fmr 1, 28 +; CHECK-NEXT: lfd 31, 136(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 28, 112(1) # 8-byte Folded Reload +; CHECK-NEXT: fmr 2, 29 +; CHECK-NEXT: fmr 3, 30 +; CHECK-NEXT: lfd 30, 128(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 29, 120(1) # 8-byte Folded Reload +; CHECK-NEXT: addi 1, 1, 144 +; CHECK-NEXT: ld 0, 16(1) +; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: blr { %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p) ret <4 x double> %t } -; CHECK-LABEL: nearbyint_v4f64: -; CHECK: bl nearbyint -; CHECK: bl nearbyint -; CHECK: bl nearbyint -; CHECK: bl nearbyint declare <4 x float> @llvm.floor.v4f32(<4 x float> %p) define <4 x float> @floor_v4f32(<4 x float> %p) +; CHECK-LABEL: floor_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vrfim 2, 2 +; CHECK-NEXT: blr { %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p) ret <4 x float> %t } -; CHECK-LABEL: floor_v4f32: -; CHECK: vrfim declare <8 x float> @llvm.floor.v8f32(<8 x float> %p) define <8 x float> @floor_v8f32(<8 x float> %p) +; CHECK-LABEL: floor_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vrfim 2, 2 +; CHECK-NEXT: vrfim 3, 3 +; CHECK-NEXT: blr { %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p) ret <8 x float> %t } -; CHECK-LABEL: floor_v8f32: -; CHECK: vrfim -; CHECK: vrfim declare <4 x float> @llvm.ceil.v4f32(<4 x float> %p) define <4 x float> @ceil_v4f32(<4 x float> %p) +; CHECK-LABEL: ceil_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vrfip 2, 2 +; CHECK-NEXT: blr { %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p) ret <4 x float> %t } -; CHECK-LABEL: ceil_v4f32: -; CHECK: vrfip declare <8 x float> @llvm.ceil.v8f32(<8 x float> %p) define <8 x float> @ceil_v8f32(<8 x float> %p) +; CHECK-LABEL: ceil_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vrfip 2, 2 +; CHECK-NEXT: vrfip 3, 3 +; CHECK-NEXT: blr { %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p) ret <8 x float> %t } -; CHECK-LABEL: ceil_v8f32: -; CHECK: vrfip -; CHECK: vrfip declare <4 x float> @llvm.trunc.v4f32(<4 x float> %p) define <4 x float> @trunc_v4f32(<4 x float> %p) +; CHECK-LABEL: trunc_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vrfiz 2, 2 +; CHECK-NEXT: blr { %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p) ret <4 x float> %t } -; CHECK-LABEL: trunc_v4f32: -; CHECK: vrfiz declare <8 x float> @llvm.trunc.v8f32(<8 x float> %p) define <8 x float> @trunc_v8f32(<8 x float> %p) +; CHECK-LABEL: trunc_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vrfiz 2, 2 +; CHECK-NEXT: vrfiz 3, 3 +; CHECK-NEXT: blr { %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p) ret <8 x float> %t } -; CHECK-LABEL: trunc_v8f32: -; CHECK: vrfiz -; CHECK: vrfiz declare <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p) define <4 x float> @nearbyint_v4f32(<4 x float> %p) +; CHECK-LABEL: nearbyint_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vrfin 2, 2 +; CHECK-NEXT: blr { %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p) ret <4 x float> %t } -; CHECK-LABEL: nearbyint_v4f32: -; CHECK: vrfin declare <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p) define <8 x float> @nearbyint_v8f32(<8 x float> %p) +; CHECK-LABEL: nearbyint_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vrfin 2, 2 +; CHECK-NEXT: vrfin 3, 3 +; CHECK-NEXT: blr { %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p) ret <8 x float> %t } -; CHECK-LABEL: nearbyint_v8f32: -; CHECK: vrfin -; CHECK: vrfin diff --git a/llvm/test/CodeGen/X86/call-graph-section-addrtaken.ll b/llvm/test/CodeGen/X86/call-graph-section-addrtaken.ll index f36baba402421..ab8498d8d3451 100644 --- a/llvm/test/CodeGen/X86/call-graph-section-addrtaken.ll +++ b/llvm/test/CodeGen/X86/call-graph-section-addrtaken.ll @@ -14,7 +14,6 @@ entry: } ; CHECK: _ZL10myCallbacki: -; CHECK-NEXT: [[LABEL_FUNC:\.Lfunc_begin[0-9]+]]: define internal void @_ZL10myCallbacki(i32 %value) !type !2 { entry: %sink = alloca i32, align 4 @@ -33,6 +32,6 @@ entry: ;; Flags -- Potential indirect target so LSB is set to 1. Other bits are 0. ; CHECK-NEXT: .byte 1 ;; Function Entry PC -; CHECK-NEXT: .quad [[LABEL_FUNC]] +; CHECK-NEXT: .quad _ZL10myCallbacki ;; Function type ID ; CHECK-NEXT: .quad -5212364466660467813 diff --git a/llvm/test/CodeGen/X86/call-graph-section-assembly.ll b/llvm/test/CodeGen/X86/call-graph-section-assembly.ll index cdbad668aec54..02d71073b65c5 100644 --- a/llvm/test/CodeGen/X86/call-graph-section-assembly.ll +++ b/llvm/test/CodeGen/X86/call-graph-section-assembly.ll @@ -11,7 +11,6 @@ declare !type !1 i32 @direct_bar(i8) declare !type !2 ptr @direct_baz(ptr) ; CHECK: ball: -; CHECK-NEXT: [[LABEL_FUNC:\.Lfunc_begin[0-9]+]]: define ptr @ball() { entry: call void @direct_foo() @@ -42,7 +41,7 @@ entry: ;; Flags ; CHECK-NEXT: .byte 7 ;; Function Entry PC -; CHECK-NEXT: .quad [[LABEL_FUNC]] +; CHECK-NEXT: .quad ball ;; Function type ID -- set to 0 as no type metadata attached to function. ; CHECK-NEXT: .quad 0 ;; Number of unique direct callees. diff --git a/llvm/test/CodeGen/Xtensa/s32c1i.ll b/llvm/test/CodeGen/Xtensa/s32c1i.ll new file mode 100644 index 0000000000000..aad738abe6a4c --- /dev/null +++ b/llvm/test/CodeGen/Xtensa/s32c1i.ll @@ -0,0 +1,7 @@ +; RUN: llc -mtriple=xtensa -mattr=+s32c1i -filetype=obj %s -o - | llvm-objdump --arch=xtensa --mattr=s32c1i -d - | FileCheck %s -check-prefix=XTENSA + +define i32 @constraint_i(i32 %a) { +; XTENSA: 0: 22 e2 01 s32c1i a2, a2, 4 + %res = tail call i32 asm "s32c1i $0, $1, $2", "=r,r,i"(i32 %a, i32 4) + ret i32 %res +} diff --git a/llvm/test/Demangle/ms-operators.test b/llvm/test/Demangle/ms-operators.test index b940488786631..cafa1ae3c0663 100644 --- a/llvm/test/Demangle/ms-operators.test +++ b/llvm/test/Demangle/ms-operators.test @@ -143,9 +143,24 @@ ??_7A@B@@6BC@D@@@ ; CHECK: const B::A::`vftable'{for `D::C'} +??_7A@B@@6BC@D@@E@F@@@ +; CHECK: const B::A::`vftable'{for `D::C's `F::E'} + +??_7A@B@@6BC@D@@E@F@@G@H@@@ +; CHECK: const B::A::`vftable'{for `D::C's `F::E's `H::G'} + ??_8Middle2@@7B@ ; CHECK: const Middle2::`vbtable' +??_7A@@6BB@@@ +; CHECK: const A::`vftable'{for `B'} + +??_7A@@6BB@@C@@@ +; CHECK: const A::`vftable'{for `B's `C'} + +??_7A@@6BB@@C@@D@@@ +; CHECK: const A::`vftable'{for `B's `C's `D'} + ??_9Base@@$B7AA ; CHECK: [thunk]: __cdecl Base::`vcall'{8, {flat}} diff --git a/llvm/test/MC/AMDGPU/gfx90a_err.s b/llvm/test/MC/AMDGPU/gfx90a_err.s index 6e84e9132a55d..567d41df6d9ef 100644 --- a/llvm/test/MC/AMDGPU/gfx90a_err.s +++ b/llvm/test/MC/AMDGPU/gfx90a_err.s @@ -1,5 +1,5 @@ // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx90a %s 2>&1 | FileCheck --check-prefix=GFX90A --implicit-check-not=error: %s - +// XFAIL: * ds_add_src2_u32 v1 // GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU @@ -239,3 +239,481 @@ scratch_load_lds_dword v2, off ds_read_b32 v0, v1 gds // GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: gds modifier is not supported on this GPU + +// op_sel not allowed in dot opcodes with 4- or 8-bit packed data + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +// nv bit in FLAT instructions +flat_load_ubyte v5, v[2:3] offset:4095 nv +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU + +flat_load_ubyte a5, v[2:3] offset:4095 nv +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU + +flat_store_dword v[2:3], v5 offset:4095 nv +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU + +flat_store_dword v[2:3], a5 offset:4095 nv +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU + +flat_atomic_add_f64 v[0:1], v[2:3] offset:4095 nv +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU + +global_load_ubyte v5, v[2:3], off offset:-1 nv +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU + +global_store_byte v[2:3], v5, off offset:-1 nv +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU + +global_atomic_add v[2:3], v5, off nv +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU + +global_atomic_swap a1, v[2:3], a2, off glc nv +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU + +global_atomic_swap_x2 v[2:3], v[4:5], off nv +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU + +global_atomic_swap_x2 v[2:3], a[4:5], off nv +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU + +scratch_load_ubyte v5, off, s2 offset:-1 nv +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU + +scratch_load_ubyte a5, off, s2 offset:-1 nv +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU + +scratch_store_dword v2, v3, off nv +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU + diff --git a/llvm/test/MC/AMDGPU/gfx90a_ldst_acc.s b/llvm/test/MC/AMDGPU/gfx90a_ldst_acc.s index c96a72ddc2573..3af0d83fb3056 100644 --- a/llvm/test/MC/AMDGPU/gfx90a_ldst_acc.s +++ b/llvm/test/MC/AMDGPU/gfx90a_ldst_acc.s @@ -706,107 +706,107 @@ flat_load_short_d16_hi a5, v[2:3] offset:4095 glc flat_load_short_d16_hi a5, v[2:3] offset:4095 slc // GFX90A: flat_atomic_swap a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x01,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode flat_atomic_swap a0, v[2:3], a2 offset:4095 glc // GFX90A: flat_atomic_cmpswap a0, v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x05,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode flat_atomic_cmpswap a0, v[2:3], a[2:3] offset:4095 glc // GFX90A: flat_atomic_add a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x09,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode flat_atomic_add a0, v[2:3], a2 offset:4095 glc // GFX90A: flat_atomic_sub a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x0d,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode flat_atomic_sub a0, v[2:3], a2 offset:4095 glc // GFX90A: flat_atomic_smin a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x11,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode flat_atomic_smin a0, v[2:3], a2 offset:4095 glc // GFX90A: flat_atomic_umin a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x15,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode flat_atomic_umin a0, v[2:3], a2 offset:4095 glc // GFX90A: flat_atomic_smax a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x19,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode flat_atomic_smax a0, v[2:3], a2 offset:4095 glc // GFX90A: flat_atomic_umax a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x1d,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode flat_atomic_umax a0, v[2:3], a2 offset:4095 glc // GFX90A: flat_atomic_and a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x21,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode flat_atomic_and a0, v[2:3], a2 offset:4095 glc // GFX90A: flat_atomic_or a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x25,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode flat_atomic_or a0, v[2:3], a2 offset:4095 glc // GFX90A: flat_atomic_xor a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x29,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode flat_atomic_xor a0, v[2:3], a2 offset:4095 glc // GFX90A: flat_atomic_inc a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x2d,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode flat_atomic_inc a0, v[2:3], a2 offset:4095 glc // GFX90A: flat_atomic_dec a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x31,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode flat_atomic_dec a0, v[2:3], a2 offset:4095 glc // GFX90A: flat_atomic_swap_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x81,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode flat_atomic_swap_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc // GFX90A: flat_atomic_cmpswap_x2 a[0:1], v[2:3], a[2:5] offset:4095 glc ; encoding: [0xff,0x0f,0x85,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode flat_atomic_cmpswap_x2 a[0:1], v[2:3], a[2:5] offset:4095 glc // GFX90A: flat_atomic_add_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x89,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode flat_atomic_add_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc // GFX90A: flat_atomic_sub_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x8d,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode flat_atomic_sub_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc // GFX90A: flat_atomic_smin_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x91,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode flat_atomic_smin_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc // GFX90A: flat_atomic_umin_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x95,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode flat_atomic_umin_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc // GFX90A: flat_atomic_smax_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x99,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode flat_atomic_smax_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc // GFX90A: flat_atomic_umax_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x9d,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode flat_atomic_umax_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc // GFX90A: flat_atomic_and_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0xa1,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode flat_atomic_and_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc // GFX90A: flat_atomic_or_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0xa5,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode flat_atomic_or_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc // GFX90A: flat_atomic_xor_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0xa9,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode flat_atomic_xor_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc // GFX90A: flat_atomic_inc_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0xad,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode flat_atomic_inc_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc // GFX90A: flat_atomic_dec_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0xb1,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode flat_atomic_dec_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc // GFX90A: flat_atomic_swap v[2:3], a2 offset:4095 ; encoding: [0xff,0x0f,0x00,0xdd,0x02,0x02,0x80,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx942_err.s b/llvm/test/MC/AMDGPU/gfx942_err.s index fd59a01b34a04..dc51bab65aa04 100644 --- a/llvm/test/MC/AMDGPU/gfx942_err.s +++ b/llvm/test/MC/AMDGPU/gfx942_err.s @@ -125,3 +125,31 @@ global_load_dword v[2:3], off lds scratch_load_dword v2, off lds // GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +// nv bit in FLAT instructions +flat_load_ubyte v5, v[2:3] offset:4095 nv +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU + +flat_store_dword v[2:3], v5 offset:4095 nv +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU + +flat_atomic_add_f32 v[2:3], v5 nv +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU + +global_load_dword v2, v[2:3], off sc0 nv +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU + +global_store_dword v[2:3], v5 off sc0 nv +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU + +global_atomic_add_f64 v[0:1], v[2:3], off sc1 nv +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU + +global_atomic_swap v0, v[2:3], v5 off sc0 nv +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU + +scratch_load_lds_dword v2, off nv +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU + +scratch_store_dword v2, v3, off nv +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx9_asm_flat.s b/llvm/test/MC/AMDGPU/gfx9_asm_flat.s index 5cc3d2533a149..7687c0a478bd9 100644 --- a/llvm/test/MC/AMDGPU/gfx9_asm_flat.s +++ b/llvm/test/MC/AMDGPU/gfx9_asm_flat.s @@ -24,6 +24,18 @@ flat_load_ubyte v5, v[1:2] offset:4095 glc flat_load_ubyte v5, v[1:2] offset:4095 slc // CHECK: [0xff,0x0f,0x42,0xdc,0x01,0x00,0x00,0x05] +flat_load_ubyte v5, v[1:2] nv +// CHECK: [0x00,0x00,0x40,0xdc,0x01,0x00,0x80,0x05] + +flat_load_ubyte v5, v[1:2] offset:7 nv +// CHECK: [0x07,0x00,0x40,0xdc,0x01,0x00,0x80,0x05] + +flat_load_ubyte v5, v[1:2] offset:4095 glc nv +// CHECK: [0xff,0x0f,0x41,0xdc,0x01,0x00,0x80,0x05] + +flat_load_ubyte v5, v[1:2] offset:4095 slc nv +// CHECK: [0xff,0x0f,0x42,0xdc,0x01,0x00,0x80,0x05] + flat_load_sbyte v5, v[1:2] offset:4095 // CHECK: [0xff,0x0f,0x44,0xdc,0x01,0x00,0x00,0x05] @@ -48,6 +60,18 @@ flat_load_sbyte v5, v[1:2] offset:4095 glc flat_load_sbyte v5, v[1:2] offset:4095 slc // CHECK: [0xff,0x0f,0x46,0xdc,0x01,0x00,0x00,0x05] +flat_load_sbyte v5, v[1:2] nv +// CHECK: [0x00,0x00,0x44,0xdc,0x01,0x00,0x80,0x05] + +flat_load_sbyte v5, v[1:2] offset:7 nv +// CHECK: [0x07,0x00,0x44,0xdc,0x01,0x00,0x80,0x05] + +flat_load_sbyte v5, v[1:2] offset:4095 glc nv +// CHECK: [0xff,0x0f,0x45,0xdc,0x01,0x00,0x80,0x05] + +flat_load_sbyte v5, v[1:2] offset:4095 slc nv +// CHECK: [0xff,0x0f,0x46,0xdc,0x01,0x00,0x80,0x05] + flat_load_ushort v5, v[1:2] offset:4095 // CHECK: [0xff,0x0f,0x48,0xdc,0x01,0x00,0x00,0x05] @@ -72,6 +96,18 @@ flat_load_ushort v5, v[1:2] offset:4095 glc flat_load_ushort v5, v[1:2] offset:4095 slc // CHECK: [0xff,0x0f,0x4a,0xdc,0x01,0x00,0x00,0x05] +flat_load_ushort v5, v[1:2] nv +// CHECK: [0x00,0x00,0x48,0xdc,0x01,0x00,0x80,0x05] + +flat_load_ushort v5, v[1:2] offset:7 nv +// CHECK: [0x07,0x00,0x48,0xdc,0x01,0x00,0x80,0x05] + +flat_load_ushort v5, v[1:2] offset:4095 glc nv +// CHECK: [0xff,0x0f,0x49,0xdc,0x01,0x00,0x80,0x05] + +flat_load_ushort v5, v[1:2] offset:4095 slc nv +// CHECK: [0xff,0x0f,0x4a,0xdc,0x01,0x00,0x80,0x05] + flat_load_sshort v5, v[1:2] offset:4095 // CHECK: [0xff,0x0f,0x4c,0xdc,0x01,0x00,0x00,0x05] @@ -96,6 +132,18 @@ flat_load_sshort v5, v[1:2] offset:4095 glc flat_load_sshort v5, v[1:2] offset:4095 slc // CHECK: [0xff,0x0f,0x4e,0xdc,0x01,0x00,0x00,0x05] +flat_load_sshort v5, v[1:2] nv +// CHECK: [0x00,0x00,0x4c,0xdc,0x01,0x00,0x80,0x05] + +flat_load_sshort v5, v[1:2] offset:7 nv +// CHECK: [0x07,0x00,0x4c,0xdc,0x01,0x00,0x80,0x05] + +flat_load_sshort v5, v[1:2] offset:4095 glc nv +// CHECK: [0xff,0x0f,0x4d,0xdc,0x01,0x00,0x80,0x05] + +flat_load_sshort v5, v[1:2] offset:4095 slc nv +// CHECK: [0xff,0x0f,0x4e,0xdc,0x01,0x00,0x80,0x05] + flat_load_dword v5, v[1:2] offset:4095 // CHECK: [0xff,0x0f,0x50,0xdc,0x01,0x00,0x00,0x05] @@ -120,6 +168,18 @@ flat_load_dword v5, v[1:2] offset:4095 glc flat_load_dword v5, v[1:2] offset:4095 slc // CHECK: [0xff,0x0f,0x52,0xdc,0x01,0x00,0x00,0x05] +flat_load_dword v5, v[1:2] nv +// CHECK: [0x00,0x00,0x50,0xdc,0x01,0x00,0x80,0x05] + +flat_load_dword v5, v[1:2] offset:7 nv +// CHECK: [0x07,0x00,0x50,0xdc,0x01,0x00,0x80,0x05] + +flat_load_dword v5, v[1:2] offset:4095 glc nv +// CHECK: [0xff,0x0f,0x51,0xdc,0x01,0x00,0x80,0x05] + +flat_load_dword v5, v[1:2] offset:4095 slc nv +// CHECK: [0xff,0x0f,0x52,0xdc,0x01,0x00,0x80,0x05] + flat_load_dwordx2 v[5:6], v[1:2] offset:4095 // CHECK: [0xff,0x0f,0x54,0xdc,0x01,0x00,0x00,0x05] @@ -144,6 +204,18 @@ flat_load_dwordx2 v[5:6], v[1:2] offset:4095 glc flat_load_dwordx2 v[5:6], v[1:2] offset:4095 slc // CHECK: [0xff,0x0f,0x56,0xdc,0x01,0x00,0x00,0x05] +flat_load_dwordx2 v[5:6], v[1:2] nv +// CHECK: [0x00,0x00,0x54,0xdc,0x01,0x00,0x80,0x05] + +flat_load_dwordx2 v[5:6], v[1:2] offset:7 nv +// CHECK: [0x07,0x00,0x54,0xdc,0x01,0x00,0x80,0x05] + +flat_load_dwordx2 v[5:6], v[1:2] offset:4095 glc nv +// CHECK: [0xff,0x0f,0x55,0xdc,0x01,0x00,0x80,0x05] + +flat_load_dwordx2 v[5:6], v[1:2] offset:4095 slc nv +// CHECK: [0xff,0x0f,0x56,0xdc,0x01,0x00,0x80,0x05] + flat_load_dwordx3 v[5:7], v[1:2] offset:4095 // CHECK: [0xff,0x0f,0x58,0xdc,0x01,0x00,0x00,0x05] @@ -168,6 +240,18 @@ flat_load_dwordx3 v[5:7], v[1:2] offset:4095 glc flat_load_dwordx3 v[5:7], v[1:2] offset:4095 slc // CHECK: [0xff,0x0f,0x5a,0xdc,0x01,0x00,0x00,0x05] +flat_load_dwordx3 v[5:7], v[1:2] nv +// CHECK: [0x00,0x00,0x58,0xdc,0x01,0x00,0x80,0x05] + +flat_load_dwordx3 v[5:7], v[1:2] offset:7 nv +// CHECK: [0x07,0x00,0x58,0xdc,0x01,0x00,0x80,0x05] + +flat_load_dwordx3 v[5:7], v[1:2] offset:4095 glc nv +// CHECK: [0xff,0x0f,0x59,0xdc,0x01,0x00,0x80,0x05] + +flat_load_dwordx3 v[5:7], v[1:2] offset:4095 slc nv +// CHECK: [0xff,0x0f,0x5a,0xdc,0x01,0x00,0x80,0x05] + flat_load_dwordx4 v[5:8], v[1:2] offset:4095 // CHECK: [0xff,0x0f,0x5c,0xdc,0x01,0x00,0x00,0x05] @@ -192,6 +276,18 @@ flat_load_dwordx4 v[5:8], v[1:2] offset:4095 glc flat_load_dwordx4 v[5:8], v[1:2] offset:4095 slc // CHECK: [0xff,0x0f,0x5e,0xdc,0x01,0x00,0x00,0x05] +flat_load_dwordx4 v[5:8], v[1:2] nv +// CHECK: [0x00,0x00,0x5c,0xdc,0x01,0x00,0x80,0x05] + +flat_load_dwordx4 v[5:8], v[1:2] offset:7 nv +// CHECK: [0x07,0x00,0x5c,0xdc,0x01,0x00,0x80,0x05] + +flat_load_dwordx4 v[5:8], v[1:2] offset:4095 glc nv +// CHECK: [0xff,0x0f,0x5d,0xdc,0x01,0x00,0x80,0x05] + +flat_load_dwordx4 v[5:8], v[1:2] offset:4095 slc nv +// CHECK: [0xff,0x0f,0x5e,0xdc,0x01,0x00,0x80,0x05] + flat_store_byte v[1:2], v2 offset:4095 // CHECK: [0xff,0x0f,0x60,0xdc,0x01,0x02,0x00,0x00] @@ -216,6 +312,18 @@ flat_store_byte v[1:2], v2 offset:4095 glc flat_store_byte v[1:2], v2 offset:4095 slc // CHECK: [0xff,0x0f,0x62,0xdc,0x01,0x02,0x00,0x00] +flat_store_byte v[1:2], v2 nv +// CHECK: [0x00,0x00,0x60,0xdc,0x01,0x02,0x80,0x00] + +flat_store_byte v[1:2], v2 offset:7 nv +// CHECK: [0x07,0x00,0x60,0xdc,0x01,0x02,0x80,0x00] + +flat_store_byte v[1:2], v2 offset:4095 glc nv +// CHECK: [0xff,0x0f,0x61,0xdc,0x01,0x02,0x80,0x00] + +flat_store_byte v[1:2], v2 offset:4095 slc nv +// CHECK: [0xff,0x0f,0x62,0xdc,0x01,0x02,0x80,0x00] + flat_store_byte_d16_hi v[1:2], v2 offset:4095 // CHECK: [0xff,0x0f,0x64,0xdc,0x01,0x02,0x00,0x00] @@ -240,6 +348,18 @@ flat_store_byte_d16_hi v[1:2], v2 offset:4095 glc flat_store_byte_d16_hi v[1:2], v2 offset:4095 slc // CHECK: [0xff,0x0f,0x66,0xdc,0x01,0x02,0x00,0x00] +flat_store_byte_d16_hi v[1:2], v2 nv +// CHECK: [0x00,0x00,0x64,0xdc,0x01,0x02,0x80,0x00] + +flat_store_byte_d16_hi v[1:2], v2 offset:7 nv +// CHECK: [0x07,0x00,0x64,0xdc,0x01,0x02,0x80,0x00] + +flat_store_byte_d16_hi v[1:2], v2 offset:4095 glc nv +// CHECK: [0xff,0x0f,0x65,0xdc,0x01,0x02,0x80,0x00] + +flat_store_byte_d16_hi v[1:2], v2 offset:4095 slc nv +// CHECK: [0xff,0x0f,0x66,0xdc,0x01,0x02,0x80,0x00] + flat_store_short v[1:2], v2 offset:4095 // CHECK: [0xff,0x0f,0x68,0xdc,0x01,0x02,0x00,0x00] @@ -264,6 +384,18 @@ flat_store_short v[1:2], v2 offset:4095 glc flat_store_short v[1:2], v2 offset:4095 slc // CHECK: [0xff,0x0f,0x6a,0xdc,0x01,0x02,0x00,0x00] +flat_store_short v[1:2], v2 nv +// CHECK: [0x00,0x00,0x68,0xdc,0x01,0x02,0x80,0x00] + +flat_store_short v[1:2], v2 offset:7 nv +// CHECK: [0x07,0x00,0x68,0xdc,0x01,0x02,0x80,0x00] + +flat_store_short v[1:2], v2 offset:4095 glc nv +// CHECK: [0xff,0x0f,0x69,0xdc,0x01,0x02,0x80,0x00] + +flat_store_short v[1:2], v2 offset:4095 slc nv +// CHECK: [0xff,0x0f,0x6a,0xdc,0x01,0x02,0x80,0x00] + flat_store_short_d16_hi v[1:2], v2 offset:4095 // CHECK: [0xff,0x0f,0x6c,0xdc,0x01,0x02,0x00,0x00] @@ -288,6 +420,18 @@ flat_store_short_d16_hi v[1:2], v2 offset:4095 glc flat_store_short_d16_hi v[1:2], v2 offset:4095 slc // CHECK: [0xff,0x0f,0x6e,0xdc,0x01,0x02,0x00,0x00] +flat_store_short_d16_hi v[1:2], v2 nv +// CHECK: [0x00,0x00,0x6c,0xdc,0x01,0x02,0x80,0x00] + +flat_store_short_d16_hi v[1:2], v2 offset:7 nv +// CHECK: [0x07,0x00,0x6c,0xdc,0x01,0x02,0x80,0x00] + +flat_store_short_d16_hi v[1:2], v2 offset:4095 glc nv +// CHECK: [0xff,0x0f,0x6d,0xdc,0x01,0x02,0x80,0x00] + +flat_store_short_d16_hi v[1:2], v2 offset:4095 slc nv +// CHECK: [0xff,0x0f,0x6e,0xdc,0x01,0x02,0x80,0x00] + flat_store_dword v[1:2], v2 offset:4095 // CHECK: [0xff,0x0f,0x70,0xdc,0x01,0x02,0x00,0x00] @@ -312,6 +456,18 @@ flat_store_dword v[1:2], v2 offset:4095 glc flat_store_dword v[1:2], v2 offset:4095 slc // CHECK: [0xff,0x0f,0x72,0xdc,0x01,0x02,0x00,0x00] +flat_store_dword v[1:2], v2 nv +// CHECK: [0x00,0x00,0x70,0xdc,0x01,0x02,0x80,0x00] + +flat_store_dword v[1:2], v2 offset:7 nv +// CHECK: [0x07,0x00,0x70,0xdc,0x01,0x02,0x80,0x00] + +flat_store_dword v[1:2], v2 offset:4095 glc nv +// CHECK: [0xff,0x0f,0x71,0xdc,0x01,0x02,0x80,0x00] + +flat_store_dword v[1:2], v2 offset:4095 slc nv +// CHECK: [0xff,0x0f,0x72,0xdc,0x01,0x02,0x80,0x00] + flat_store_dwordx2 v[1:2], v[2:3] offset:4095 // CHECK: [0xff,0x0f,0x74,0xdc,0x01,0x02,0x00,0x00] @@ -336,6 +492,18 @@ flat_store_dwordx2 v[1:2], v[2:3] offset:4095 glc flat_store_dwordx2 v[1:2], v[2:3] offset:4095 slc // CHECK: [0xff,0x0f,0x76,0xdc,0x01,0x02,0x00,0x00] +flat_store_dwordx2 v[1:2], v[2:3] nv +// CHECK: [0x00,0x00,0x74,0xdc,0x01,0x02,0x80,0x00] + +flat_store_dwordx2 v[1:2], v[2:3] offset:7 nv +// CHECK: [0x07,0x00,0x74,0xdc,0x01,0x02,0x80,0x00] + +flat_store_dwordx2 v[1:2], v[2:3] offset:4095 glc nv +// CHECK: [0xff,0x0f,0x75,0xdc,0x01,0x02,0x80,0x00] + +flat_store_dwordx2 v[1:2], v[2:3] offset:4095 slc nv +// CHECK: [0xff,0x0f,0x76,0xdc,0x01,0x02,0x80,0x00] + flat_store_dwordx3 v[1:2], v[2:4] offset:4095 // CHECK: [0xff,0x0f,0x78,0xdc,0x01,0x02,0x00,0x00] @@ -360,6 +528,18 @@ flat_store_dwordx3 v[1:2], v[2:4] offset:4095 glc flat_store_dwordx3 v[1:2], v[2:4] offset:4095 slc // CHECK: [0xff,0x0f,0x7a,0xdc,0x01,0x02,0x00,0x00] +flat_store_dwordx3 v[1:2], v[2:4] nv +// CHECK: [0x00,0x00,0x78,0xdc,0x01,0x02,0x80,0x00] + +flat_store_dwordx3 v[1:2], v[2:4] offset:7 nv +// CHECK: [0x07,0x00,0x78,0xdc,0x01,0x02,0x80,0x00] + +flat_store_dwordx3 v[1:2], v[2:4] offset:4095 glc nv +// CHECK: [0xff,0x0f,0x79,0xdc,0x01,0x02,0x80,0x00] + +flat_store_dwordx3 v[1:2], v[2:4] offset:4095 slc nv +// CHECK: [0xff,0x0f,0x7a,0xdc,0x01,0x02,0x80,0x00] + flat_store_dwordx4 v[1:2], v[2:5] offset:4095 // CHECK: [0xff,0x0f,0x7c,0xdc,0x01,0x02,0x00,0x00] @@ -384,6 +564,18 @@ flat_store_dwordx4 v[1:2], v[2:5] offset:4095 glc flat_store_dwordx4 v[1:2], v[2:5] offset:4095 slc // CHECK: [0xff,0x0f,0x7e,0xdc,0x01,0x02,0x00,0x00] +flat_store_dwordx4 v[1:2], v[2:5] nv +// CHECK: [0x00,0x00,0x7c,0xdc,0x01,0x02,0x80,0x00] + +flat_store_dwordx4 v[1:2], v[2:5] offset:7 nv +// CHECK: [0x07,0x00,0x7c,0xdc,0x01,0x02,0x80,0x00] + +flat_store_dwordx4 v[1:2], v[2:5] offset:4095 glc nv +// CHECK: [0xff,0x0f,0x7d,0xdc,0x01,0x02,0x80,0x00] + +flat_store_dwordx4 v[1:2], v[2:5] offset:4095 slc nv +// CHECK: [0xff,0x0f,0x7e,0xdc,0x01,0x02,0x80,0x00] + flat_load_ubyte_d16 v5, v[1:2] offset:4095 // CHECK: [0xff,0x0f,0x80,0xdc,0x01,0x00,0x00,0x05] @@ -408,6 +600,18 @@ flat_load_ubyte_d16 v5, v[1:2] offset:4095 glc flat_load_ubyte_d16 v5, v[1:2] offset:4095 slc // CHECK: [0xff,0x0f,0x82,0xdc,0x01,0x00,0x00,0x05] +flat_load_ubyte_d16 v5, v[1:2] nv +// CHECK: [0x00,0x00,0x80,0xdc,0x01,0x00,0x80,0x05] + +flat_load_ubyte_d16 v5, v[1:2] offset:7 nv +// CHECK: [0x07,0x00,0x80,0xdc,0x01,0x00,0x80,0x05] + +flat_load_ubyte_d16 v5, v[1:2] offset:4095 glc nv +// CHECK: [0xff,0x0f,0x81,0xdc,0x01,0x00,0x80,0x05] + +flat_load_ubyte_d16 v5, v[1:2] offset:4095 slc nv +// CHECK: [0xff,0x0f,0x82,0xdc,0x01,0x00,0x80,0x05] + flat_load_ubyte_d16_hi v5, v[1:2] offset:4095 // CHECK: [0xff,0x0f,0x84,0xdc,0x01,0x00,0x00,0x05] @@ -432,6 +636,18 @@ flat_load_ubyte_d16_hi v5, v[1:2] offset:4095 glc flat_load_ubyte_d16_hi v5, v[1:2] offset:4095 slc // CHECK: [0xff,0x0f,0x86,0xdc,0x01,0x00,0x00,0x05] +flat_load_ubyte_d16_hi v5, v[1:2] nv +// CHECK: [0x00,0x00,0x84,0xdc,0x01,0x00,0x80,0x05] + +flat_load_ubyte_d16_hi v5, v[1:2] offset:7 nv +// CHECK: [0x07,0x00,0x84,0xdc,0x01,0x00,0x80,0x05] + +flat_load_ubyte_d16_hi v5, v[1:2] offset:4095 glc nv +// CHECK: [0xff,0x0f,0x85,0xdc,0x01,0x00,0x80,0x05] + +flat_load_ubyte_d16_hi v5, v[1:2] offset:4095 slc nv +// CHECK: [0xff,0x0f,0x86,0xdc,0x01,0x00,0x80,0x05] + flat_load_sbyte_d16 v5, v[1:2] offset:4095 // CHECK: [0xff,0x0f,0x88,0xdc,0x01,0x00,0x00,0x05] @@ -456,6 +672,18 @@ flat_load_sbyte_d16 v5, v[1:2] offset:4095 glc flat_load_sbyte_d16 v5, v[1:2] offset:4095 slc // CHECK: [0xff,0x0f,0x8a,0xdc,0x01,0x00,0x00,0x05] +flat_load_sbyte_d16 v5, v[1:2] nv +// CHECK: [0x00,0x00,0x88,0xdc,0x01,0x00,0x80,0x05] + +flat_load_sbyte_d16 v5, v[1:2] offset:7 nv +// CHECK: [0x07,0x00,0x88,0xdc,0x01,0x00,0x80,0x05] + +flat_load_sbyte_d16 v5, v[1:2] offset:4095 glc nv +// CHECK: [0xff,0x0f,0x89,0xdc,0x01,0x00,0x80,0x05] + +flat_load_sbyte_d16 v5, v[1:2] offset:4095 slc nv +// CHECK: [0xff,0x0f,0x8a,0xdc,0x01,0x00,0x80,0x05] + flat_load_sbyte_d16_hi v5, v[1:2] offset:4095 // CHECK: [0xff,0x0f,0x8c,0xdc,0x01,0x00,0x00,0x05] @@ -480,6 +708,18 @@ flat_load_sbyte_d16_hi v5, v[1:2] offset:4095 glc flat_load_sbyte_d16_hi v5, v[1:2] offset:4095 slc // CHECK: [0xff,0x0f,0x8e,0xdc,0x01,0x00,0x00,0x05] +flat_load_sbyte_d16_hi v5, v[1:2] nv +// CHECK: [0x00,0x00,0x8c,0xdc,0x01,0x00,0x80,0x05] + +flat_load_sbyte_d16_hi v5, v[1:2] offset:7 nv +// CHECK: [0x07,0x00,0x8c,0xdc,0x01,0x00,0x80,0x05] + +flat_load_sbyte_d16_hi v5, v[1:2] offset:4095 glc nv +// CHECK: [0xff,0x0f,0x8d,0xdc,0x01,0x00,0x80,0x05] + +flat_load_sbyte_d16_hi v5, v[1:2] offset:4095 slc nv +// CHECK: [0xff,0x0f,0x8e,0xdc,0x01,0x00,0x80,0x05] + flat_load_short_d16 v5, v[1:2] offset:4095 // CHECK: [0xff,0x0f,0x90,0xdc,0x01,0x00,0x00,0x05] @@ -504,6 +744,18 @@ flat_load_short_d16 v5, v[1:2] offset:4095 glc flat_load_short_d16 v5, v[1:2] offset:4095 slc // CHECK: [0xff,0x0f,0x92,0xdc,0x01,0x00,0x00,0x05] +flat_load_short_d16 v5, v[1:2] nv +// CHECK: [0x00,0x00,0x90,0xdc,0x01,0x00,0x80,0x05] + +flat_load_short_d16 v5, v[1:2] offset:7 nv +// CHECK: [0x07,0x00,0x90,0xdc,0x01,0x00,0x80,0x05] + +flat_load_short_d16 v5, v[1:2] offset:4095 glc nv +// CHECK: [0xff,0x0f,0x91,0xdc,0x01,0x00,0x80,0x05] + +flat_load_short_d16 v5, v[1:2] offset:4095 slc nv +// CHECK: [0xff,0x0f,0x92,0xdc,0x01,0x00,0x80,0x05] + flat_load_short_d16_hi v5, v[1:2] offset:4095 // CHECK: [0xff,0x0f,0x94,0xdc,0x01,0x00,0x00,0x05] @@ -528,6 +780,18 @@ flat_load_short_d16_hi v5, v[1:2] offset:4095 glc flat_load_short_d16_hi v5, v[1:2] offset:4095 slc // CHECK: [0xff,0x0f,0x96,0xdc,0x01,0x00,0x00,0x05] +flat_load_short_d16_hi v5, v[1:2] nv +// CHECK: [0x00,0x00,0x94,0xdc,0x01,0x00,0x80,0x05] + +flat_load_short_d16_hi v5, v[1:2] offset:7 nv +// CHECK: [0x07,0x00,0x94,0xdc,0x01,0x00,0x80,0x05] + +flat_load_short_d16_hi v5, v[1:2] offset:4095 glc nv +// CHECK: [0xff,0x0f,0x95,0xdc,0x01,0x00,0x80,0x05] + +flat_load_short_d16_hi v5, v[1:2] offset:4095 slc nv +// CHECK: [0xff,0x0f,0x96,0xdc,0x01,0x00,0x80,0x05] + flat_atomic_swap v[1:2], v2 offset:4095 // CHECK: [0xff,0x0f,0x00,0xdd,0x01,0x02,0x00,0x00] @@ -552,6 +816,18 @@ flat_atomic_swap v0, v[1:2], v2 offset:4095 glc flat_atomic_swap v[1:2], v2 offset:4095 slc // CHECK: [0xff,0x0f,0x02,0xdd,0x01,0x02,0x00,0x00] +flat_atomic_swap v[1:2], v2 nv +// CHECK: [0x00,0x00,0x00,0xdd,0x01,0x02,0x80,0x00] + +flat_atomic_swap v[1:2], v2 offset:7 nv +// CHECK: [0x07,0x00,0x00,0xdd,0x01,0x02,0x80,0x00] + +flat_atomic_swap v0, v[1:2], v2 offset:4095 glc nv +// CHECK: [0xff,0x0f,0x01,0xdd,0x01,0x02,0x80,0x00] + +flat_atomic_swap v[1:2], v2 offset:4095 slc nv +// CHECK: [0xff,0x0f,0x02,0xdd,0x01,0x02,0x80,0x00] + flat_atomic_cmpswap v[1:2], v[2:3] offset:4095 // CHECK: [0xff,0x0f,0x04,0xdd,0x01,0x02,0x00,0x00] @@ -576,6 +852,18 @@ flat_atomic_cmpswap v0, v[1:2], v[2:3] offset:4095 glc flat_atomic_cmpswap v[1:2], v[2:3] offset:4095 slc // CHECK: [0xff,0x0f,0x06,0xdd,0x01,0x02,0x00,0x00] +flat_atomic_cmpswap v[1:2], v[2:3] nv +// CHECK: [0x00,0x00,0x04,0xdd,0x01,0x02,0x80,0x00] + +flat_atomic_cmpswap v[1:2], v[2:3] offset:7 nv +// CHECK: [0x07,0x00,0x04,0xdd,0x01,0x02,0x80,0x00] + +flat_atomic_cmpswap v0, v[1:2], v[2:3] offset:4095 glc nv +// CHECK: [0xff,0x0f,0x05,0xdd,0x01,0x02,0x80,0x00] + +flat_atomic_cmpswap v[1:2], v[2:3] offset:4095 slc nv +// CHECK: [0xff,0x0f,0x06,0xdd,0x01,0x02,0x80,0x00] + flat_atomic_add v[1:2], v2 offset:4095 // CHECK: [0xff,0x0f,0x08,0xdd,0x01,0x02,0x00,0x00] @@ -600,6 +888,18 @@ flat_atomic_add v0, v[1:2], v2 offset:4095 glc flat_atomic_add v[1:2], v2 offset:4095 slc // CHECK: [0xff,0x0f,0x0a,0xdd,0x01,0x02,0x00,0x00] +flat_atomic_add v[1:2], v2 nv +// CHECK: [0x00,0x00,0x08,0xdd,0x01,0x02,0x80,0x00] + +flat_atomic_add v[1:2], v2 offset:7 nv +// CHECK: [0x07,0x00,0x08,0xdd,0x01,0x02,0x80,0x00] + +flat_atomic_add v0, v[1:2], v2 offset:4095 glc nv +// CHECK: [0xff,0x0f,0x09,0xdd,0x01,0x02,0x80,0x00] + +flat_atomic_add v[1:2], v2 offset:4095 slc nv +// CHECK: [0xff,0x0f,0x0a,0xdd,0x01,0x02,0x80,0x00] + flat_atomic_sub v[1:2], v2 offset:4095 // CHECK: [0xff,0x0f,0x0c,0xdd,0x01,0x02,0x00,0x00] @@ -1197,6 +1497,18 @@ global_load_ubyte v5, v1, s[4:5] offset:-1 glc global_load_ubyte v5, v1, s[4:5] offset:-1 slc // CHECK: [0xff,0x9f,0x42,0xdc,0x01,0x00,0x04,0x05] +global_load_ubyte v5, v1, s[4:5] nv +// CHECK: [0x00,0x80,0x40,0xdc,0x01,0x00,0x84,0x05] + +global_load_ubyte v5, v1, s[4:5] offset:-1 nv +// CHECK: [0xff,0x9f,0x40,0xdc,0x01,0x00,0x84,0x05] + +global_load_ubyte v5, v1, s[4:5] offset:-1 glc nv +// CHECK: [0xff,0x9f,0x41,0xdc,0x01,0x00,0x84,0x05] + +global_load_ubyte v5, v1, s[4:5] offset:-1 slc nv +// CHECK: [0xff,0x9f,0x42,0xdc,0x01,0x00,0x84,0x05] + global_load_sbyte v5, v1, s[4:5] offset:-1 // CHECK: [0xff,0x9f,0x44,0xdc,0x01,0x00,0x04,0x05] @@ -1242,6 +1554,18 @@ global_load_sbyte v5, v1, s[4:5] offset:-1 glc global_load_sbyte v5, v1, s[4:5] offset:-1 slc // CHECK: [0xff,0x9f,0x46,0xdc,0x01,0x00,0x04,0x05] +global_load_sbyte v5, v1, s[4:5] nv +// CHECK: [0x00,0x80,0x44,0xdc,0x01,0x00,0x84,0x05] + +global_load_sbyte v5, v1, s[4:5] offset:-1 nv +// CHECK: [0xff,0x9f,0x44,0xdc,0x01,0x00,0x84,0x05] + +global_load_sbyte v5, v1, s[4:5] offset:-1 glc nv +// CHECK: [0xff,0x9f,0x45,0xdc,0x01,0x00,0x84,0x05] + +global_load_sbyte v5, v1, s[4:5] offset:-1 slc nv +// CHECK: [0xff,0x9f,0x46,0xdc,0x01,0x00,0x84,0x05] + global_load_ushort v5, v1, s[4:5] offset:-1 // CHECK: [0xff,0x9f,0x48,0xdc,0x01,0x00,0x04,0x05] @@ -1287,6 +1611,18 @@ global_load_ushort v5, v1, s[4:5] offset:-1 glc global_load_ushort v5, v1, s[4:5] offset:-1 slc // CHECK: [0xff,0x9f,0x4a,0xdc,0x01,0x00,0x04,0x05] +global_load_ushort v5, v1, s[4:5] nv +// CHECK: [0x00,0x80,0x48,0xdc,0x01,0x00,0x84,0x05] + +global_load_ushort v5, v1, s[4:5] offset:-1 nv +// CHECK: [0xff,0x9f,0x48,0xdc,0x01,0x00,0x84,0x05] + +global_load_ushort v5, v1, s[4:5] offset:-1 glc nv +// CHECK: [0xff,0x9f,0x49,0xdc,0x01,0x00,0x84,0x05] + +global_load_ushort v5, v1, s[4:5] offset:-1 slc nv +// CHECK: [0xff,0x9f,0x4a,0xdc,0x01,0x00,0x84,0x05] + global_load_sshort v5, v1, s[4:5] offset:-1 // CHECK: [0xff,0x9f,0x4c,0xdc,0x01,0x00,0x04,0x05] @@ -1332,6 +1668,18 @@ global_load_sshort v5, v1, s[4:5] offset:-1 glc global_load_sshort v5, v1, s[4:5] offset:-1 slc // CHECK: [0xff,0x9f,0x4e,0xdc,0x01,0x00,0x04,0x05] +global_load_sshort v5, v1, s[4:5] nv +// CHECK: [0x00,0x80,0x4c,0xdc,0x01,0x00,0x84,0x05] + +global_load_sshort v5, v1, s[4:5] offset:-1 nv +// CHECK: [0xff,0x9f,0x4c,0xdc,0x01,0x00,0x84,0x05] + +global_load_sshort v5, v1, s[4:5] offset:-1 glc nv +// CHECK: [0xff,0x9f,0x4d,0xdc,0x01,0x00,0x84,0x05] + +global_load_sshort v5, v1, s[4:5] offset:-1 slc nv +// CHECK: [0xff,0x9f,0x4e,0xdc,0x01,0x00,0x84,0x05] + global_load_dword v5, v1, s[4:5] offset:-1 // CHECK: [0xff,0x9f,0x50,0xdc,0x01,0x00,0x04,0x05] @@ -1377,6 +1725,18 @@ global_load_dword v5, v1, s[4:5] offset:-1 glc global_load_dword v5, v1, s[4:5] offset:-1 slc // CHECK: [0xff,0x9f,0x52,0xdc,0x01,0x00,0x04,0x05] +global_load_dword v5, v1, s[4:5] nv +// CHECK: [0x00,0x80,0x50,0xdc,0x01,0x00,0x84,0x05] + +global_load_dword v5, v1, s[4:5] offset:-1 nv +// CHECK: [0xff,0x9f,0x50,0xdc,0x01,0x00,0x84,0x05] + +global_load_dword v5, v1, s[4:5] offset:-1 glc nv +// CHECK: [0xff,0x9f,0x51,0xdc,0x01,0x00,0x84,0x05] + +global_load_dword v5, v1, s[4:5] offset:-1 slc nv +// CHECK: [0xff,0x9f,0x52,0xdc,0x01,0x00,0x84,0x05] + global_load_dwordx2 v[5:6], v1, s[4:5] offset:-1 // CHECK: [0xff,0x9f,0x54,0xdc,0x01,0x00,0x04,0x05] @@ -1422,6 +1782,18 @@ global_load_dwordx2 v[5:6], v1, s[4:5] offset:-1 glc global_load_dwordx2 v[5:6], v1, s[4:5] offset:-1 slc // CHECK: [0xff,0x9f,0x56,0xdc,0x01,0x00,0x04,0x05] +global_load_dwordx2 v[5:6], v1, s[4:5] nv +// CHECK: [0x00,0x80,0x54,0xdc,0x01,0x00,0x84,0x05] + +global_load_dwordx2 v[5:6], v1, s[4:5] offset:-1 nv +// CHECK: [0xff,0x9f,0x54,0xdc,0x01,0x00,0x84,0x05] + +global_load_dwordx2 v[5:6], v1, s[4:5] offset:-1 glc nv +// CHECK: [0xff,0x9f,0x55,0xdc,0x01,0x00,0x84,0x05] + +global_load_dwordx2 v[5:6], v1, s[4:5] offset:-1 slc nv +// CHECK: [0xff,0x9f,0x56,0xdc,0x01,0x00,0x84,0x05] + global_load_dwordx3 v[5:7], v1, s[4:5] offset:-1 // CHECK: [0xff,0x9f,0x58,0xdc,0x01,0x00,0x04,0x05] @@ -1467,6 +1839,15 @@ global_load_dwordx3 v[5:7], v1, s[4:5] offset:-1 glc global_load_dwordx3 v[5:7], v1, s[4:5] offset:-1 slc // CHECK: [0xff,0x9f,0x5a,0xdc,0x01,0x00,0x04,0x05] +global_load_dwordx3 v[5:7], v1, s[4:5] nv +// CHECK: [0x00,0x80,0x58,0xdc,0x01,0x00,0x84,0x05] +global_load_dwordx3 v[5:7], v1, s[4:5] offset:-1 nv +// CHECK: [0xff,0x9f,0x58,0xdc,0x01,0x00,0x84,0x05] +global_load_dwordx3 v[5:7], v1, s[4:5] offset:-1 glc nv +// CHECK: [0xff,0x9f,0x59,0xdc,0x01,0x00,0x84,0x05] +global_load_dwordx3 v[5:7], v1, s[4:5] offset:-1 slc nv +// CHECK: [0xff,0x9f,0x5a,0xdc,0x01,0x00,0x84,0x05] + global_load_dwordx4 v[5:8], v1, s[4:5] offset:-1 // CHECK: [0xff,0x9f,0x5c,0xdc,0x01,0x00,0x04,0x05] @@ -1512,6 +1893,15 @@ global_load_dwordx4 v[5:8], v1, s[4:5] offset:-1 glc global_load_dwordx4 v[5:8], v1, s[4:5] offset:-1 slc // CHECK: [0xff,0x9f,0x5e,0xdc,0x01,0x00,0x04,0x05] +global_load_dwordx4 v[5:8], v1, s[4:5] nv +// CHECK: [0x00,0x80,0x5c,0xdc,0x01,0x00,0x84,0x05] +global_load_dwordx4 v[5:8], v1, s[4:5] offset:-1 nv +// CHECK: [0xff,0x9f,0x5c,0xdc,0x01,0x00,0x84,0x05] +global_load_dwordx4 v[5:8], v1, s[4:5] offset:-1 glc nv +// CHECK: [0xff,0x9f,0x5d,0xdc,0x01,0x00,0x84,0x05] +global_load_dwordx4 v[5:8], v1, s[4:5] offset:-1 slc nv +// CHECK: [0xff,0x9f,0x5e,0xdc,0x01,0x00,0x84,0x05] + global_store_byte v1, v2, s[6:7] offset:-1 // CHECK: [0xff,0x9f,0x60,0xdc,0x01,0x02,0x06,0x00] @@ -1557,6 +1947,18 @@ global_store_byte v1, v2, s[6:7] offset:-1 glc global_store_byte v1, v2, s[6:7] offset:-1 slc // CHECK: [0xff,0x9f,0x62,0xdc,0x01,0x02,0x06,0x00] +global_store_byte v1, v2, s[6:7] nv +// CHECK: [0x00,0x80,0x60,0xdc,0x01,0x02,0x86,0x00] + +global_store_byte v1, v2, s[6:7] offset:-1 nv +// CHECK: [0xff,0x9f,0x60,0xdc,0x01,0x02,0x86,0x00] + +global_store_byte v1, v2, s[6:7] offset:-1 glc nv +// CHECK: [0xff,0x9f,0x61,0xdc,0x01,0x02,0x86,0x00] + +global_store_byte v1, v2, s[6:7] offset:-1 slc nv +// CHECK: [0xff,0x9f,0x62,0xdc,0x01,0x02,0x86,0x00] + global_store_byte_d16_hi v1, v2, s[6:7] offset:-1 // CHECK: [0xff,0x9f,0x64,0xdc,0x01,0x02,0x06,0x00] @@ -1602,6 +2004,18 @@ global_store_byte_d16_hi v1, v2, s[6:7] offset:-1 glc global_store_byte_d16_hi v1, v2, s[6:7] offset:-1 slc // CHECK: [0xff,0x9f,0x66,0xdc,0x01,0x02,0x06,0x00] +global_store_byte_d16_hi v1, v2, s[6:7] nv +// CHECK: [0x00,0x80,0x64,0xdc,0x01,0x02,0x86,0x00] + +global_store_byte_d16_hi v1, v2, s[6:7] offset:-1 nv +// CHECK: [0xff,0x9f,0x64,0xdc,0x01,0x02,0x86,0x00] + +global_store_byte_d16_hi v1, v2, s[6:7] offset:-1 glc nv +// CHECK: [0xff,0x9f,0x65,0xdc,0x01,0x02,0x86,0x00] + +global_store_byte_d16_hi v1, v2, s[6:7] offset:-1 slc nv +// CHECK: [0xff,0x9f,0x66,0xdc,0x01,0x02,0x86,0x00] + global_store_short v1, v2, s[6:7] offset:-1 // CHECK: [0xff,0x9f,0x68,0xdc,0x01,0x02,0x06,0x00] @@ -1647,6 +2061,18 @@ global_store_short v1, v2, s[6:7] offset:-1 glc global_store_short v1, v2, s[6:7] offset:-1 slc // CHECK: [0xff,0x9f,0x6a,0xdc,0x01,0x02,0x06,0x00] +global_store_short v1, v2, s[6:7] nv +// CHECK: [0x00,0x80,0x68,0xdc,0x01,0x02,0x86,0x00] + +global_store_short v1, v2, s[6:7] offset:-1 nv +// CHECK: [0xff,0x9f,0x68,0xdc,0x01,0x02,0x86,0x00] + +global_store_short v1, v2, s[6:7] offset:-1 glc nv +// CHECK: [0xff,0x9f,0x69,0xdc,0x01,0x02,0x86,0x00] + +global_store_short v1, v2, s[6:7] offset:-1 slc nv +// CHECK: [0xff,0x9f,0x6a,0xdc,0x01,0x02,0x86,0x00] + global_store_short_d16_hi v1, v2, s[6:7] offset:-1 // CHECK: [0xff,0x9f,0x6c,0xdc,0x01,0x02,0x06,0x00] @@ -1692,6 +2118,18 @@ global_store_short_d16_hi v1, v2, s[6:7] offset:-1 glc global_store_short_d16_hi v1, v2, s[6:7] offset:-1 slc // CHECK: [0xff,0x9f,0x6e,0xdc,0x01,0x02,0x06,0x00] +global_store_short_d16_hi v1, v2, s[6:7] nv +// CHECK: [0x00,0x80,0x6c,0xdc,0x01,0x02,0x86,0x00] + +global_store_short_d16_hi v1, v2, s[6:7] offset:-1 nv +// CHECK: [0xff,0x9f,0x6c,0xdc,0x01,0x02,0x86,0x00] + +global_store_short_d16_hi v1, v2, s[6:7] offset:-1 glc nv +// CHECK: [0xff,0x9f,0x6d,0xdc,0x01,0x02,0x86,0x00] + +global_store_short_d16_hi v1, v2, s[6:7] offset:-1 slc nv +// CHECK: [0xff,0x9f,0x6e,0xdc,0x01,0x02,0x86,0x00] + global_store_dword v1, v2, s[6:7] offset:-1 // CHECK: [0xff,0x9f,0x70,0xdc,0x01,0x02,0x06,0x00] @@ -1737,6 +2175,18 @@ global_store_dword v1, v2, s[6:7] offset:-1 glc global_store_dword v1, v2, s[6:7] offset:-1 slc // CHECK: [0xff,0x9f,0x72,0xdc,0x01,0x02,0x06,0x00] +global_store_dword v1, v2, s[6:7] nv +// CHECK: [0x00,0x80,0x70,0xdc,0x01,0x02,0x86,0x00] + +global_store_dword v1, v2, s[6:7] offset:-1 nv +// CHECK: [0xff,0x9f,0x70,0xdc,0x01,0x02,0x86,0x00] + +global_store_dword v1, v2, s[6:7] offset:-1 glc nv +// CHECK: [0xff,0x9f,0x71,0xdc,0x01,0x02,0x86,0x00] + +global_store_dword v1, v2, s[6:7] offset:-1 slc nv +// CHECK: [0xff,0x9f,0x72,0xdc,0x01,0x02,0x86,0x00] + global_store_dwordx2 v1, v[2:3], s[6:7] offset:-1 // CHECK: [0xff,0x9f,0x74,0xdc,0x01,0x02,0x06,0x00] @@ -1782,6 +2232,18 @@ global_store_dwordx2 v1, v[2:3], s[6:7] offset:-1 glc global_store_dwordx2 v1, v[2:3], s[6:7] offset:-1 slc // CHECK: [0xff,0x9f,0x76,0xdc,0x01,0x02,0x06,0x00] +global_store_dwordx2 v1, v[2:3], s[6:7] nv +// CHECK: [0x00,0x80,0x74,0xdc,0x01,0x02,0x86,0x00] + +global_store_dwordx2 v1, v[2:3], s[6:7] offset:-1 nv +// CHECK: [0xff,0x9f,0x74,0xdc,0x01,0x02,0x86,0x00] + +global_store_dwordx2 v1, v[2:3], s[6:7] offset:-1 glc nv +// CHECK: [0xff,0x9f,0x75,0xdc,0x01,0x02,0x86,0x00] + +global_store_dwordx2 v1, v[2:3], s[6:7] offset:-1 slc nv +// CHECK: [0xff,0x9f,0x76,0xdc,0x01,0x02,0x86,0x00] + global_store_dwordx3 v1, v[2:4], s[6:7] offset:-1 // CHECK: [0xff,0x9f,0x78,0xdc,0x01,0x02,0x06,0x00] @@ -1827,6 +2289,18 @@ global_store_dwordx3 v1, v[2:4], s[6:7] offset:-1 glc global_store_dwordx3 v1, v[2:4], s[6:7] offset:-1 slc // CHECK: [0xff,0x9f,0x7a,0xdc,0x01,0x02,0x06,0x00] +global_store_dwordx3 v1, v[2:4], s[6:7] nv +// CHECK: [0x00,0x80,0x78,0xdc,0x01,0x02,0x86,0x00] + +global_store_dwordx3 v1, v[2:4], s[6:7] offset:-1 nv +// CHECK: [0xff,0x9f,0x78,0xdc,0x01,0x02,0x86,0x00] + +global_store_dwordx3 v1, v[2:4], s[6:7] offset:-1 glc nv +// CHECK: [0xff,0x9f,0x79,0xdc,0x01,0x02,0x86,0x00] + +global_store_dwordx3 v1, v[2:4], s[6:7] offset:-1 slc nv +// CHECK: [0xff,0x9f,0x7a,0xdc,0x01,0x02,0x86,0x00] + global_store_dwordx4 v1, v[2:5], s[6:7] offset:-1 // CHECK: [0xff,0x9f,0x7c,0xdc,0x01,0x02,0x06,0x00] @@ -1872,6 +2346,18 @@ global_store_dwordx4 v1, v[2:5], s[6:7] offset:-1 glc global_store_dwordx4 v1, v[2:5], s[6:7] offset:-1 slc // CHECK: [0xff,0x9f,0x7e,0xdc,0x01,0x02,0x06,0x00] +global_store_dwordx4 v1, v[2:5], s[6:7] nv +// CHECK: [0x00,0x80,0x7c,0xdc,0x01,0x02,0x86,0x00] + +global_store_dwordx4 v1, v[2:5], s[6:7] offset:-1 nv +// CHECK: [0xff,0x9f,0x7c,0xdc,0x01,0x02,0x86,0x00] + +global_store_dwordx4 v1, v[2:5], s[6:7] offset:-1 glc nv +// CHECK: [0xff,0x9f,0x7d,0xdc,0x01,0x02,0x86,0x00] + +global_store_dwordx4 v1, v[2:5], s[6:7] offset:-1 slc nv +// CHECK: [0xff,0x9f,0x7e,0xdc,0x01,0x02,0x86,0x00] + global_load_ubyte_d16 v5, v1, s[4:5] offset:-1 // CHECK: [0xff,0x9f,0x80,0xdc,0x01,0x00,0x04,0x05] @@ -1917,6 +2403,18 @@ global_load_ubyte_d16 v5, v1, s[4:5] offset:-1 glc global_load_ubyte_d16 v5, v1, s[4:5] offset:-1 slc // CHECK: [0xff,0x9f,0x82,0xdc,0x01,0x00,0x04,0x05] +global_load_ubyte_d16 v5, v1, s[4:5] nv +// CHECK: [0x00,0x80,0x80,0xdc,0x01,0x00,0x84,0x05] + +global_load_ubyte_d16 v5, v1, s[4:5] offset:-1 nv +// CHECK: [0xff,0x9f,0x80,0xdc,0x01,0x00,0x84,0x05] + +global_load_ubyte_d16 v5, v1, s[4:5] offset:-1 glc nv +// CHECK: [0xff,0x9f,0x81,0xdc,0x01,0x00,0x84,0x05] + +global_load_ubyte_d16 v5, v1, s[4:5] offset:-1 slc nv +// CHECK: [0xff,0x9f,0x82,0xdc,0x01,0x00,0x84,0x05] + global_load_ubyte_d16_hi v5, v1, s[4:5] offset:-1 // CHECK: [0xff,0x9f,0x84,0xdc,0x01,0x00,0x04,0x05] @@ -1962,6 +2460,18 @@ global_load_ubyte_d16_hi v5, v1, s[4:5] offset:-1 glc global_load_ubyte_d16_hi v5, v1, s[4:5] offset:-1 slc // CHECK: [0xff,0x9f,0x86,0xdc,0x01,0x00,0x04,0x05] +global_load_ubyte_d16_hi v5, v1, s[4:5] nv +// CHECK: [0x00,0x80,0x84,0xdc,0x01,0x00,0x84,0x05] + +global_load_ubyte_d16_hi v5, v1, s[4:5] offset:-1 nv +// CHECK: [0xff,0x9f,0x84,0xdc,0x01,0x00,0x84,0x05] + +global_load_ubyte_d16_hi v5, v1, s[4:5] offset:-1 glc nv +// CHECK: [0xff,0x9f,0x85,0xdc,0x01,0x00,0x84,0x05] + +global_load_ubyte_d16_hi v5, v1, s[4:5] offset:-1 slc nv +// CHECK: [0xff,0x9f,0x86,0xdc,0x01,0x00,0x84,0x05] + global_load_sbyte_d16 v5, v1, s[4:5] offset:-1 // CHECK: [0xff,0x9f,0x88,0xdc,0x01,0x00,0x04,0x05] @@ -2007,6 +2517,18 @@ global_load_sbyte_d16 v5, v1, s[4:5] offset:-1 glc global_load_sbyte_d16 v5, v1, s[4:5] offset:-1 slc // CHECK: [0xff,0x9f,0x8a,0xdc,0x01,0x00,0x04,0x05] +global_load_sbyte_d16 v5, v1, s[4:5] nv +// CHECK: [0x00,0x80,0x88,0xdc,0x01,0x00,0x84,0x05] + +global_load_sbyte_d16 v5, v1, s[4:5] offset:-1 nv +// CHECK: [0xff,0x9f,0x88,0xdc,0x01,0x00,0x84,0x05] + +global_load_sbyte_d16 v5, v1, s[4:5] offset:-1 glc nv +// CHECK: [0xff,0x9f,0x89,0xdc,0x01,0x00,0x84,0x05] + +global_load_sbyte_d16 v5, v1, s[4:5] offset:-1 slc nv +// CHECK: [0xff,0x9f,0x8a,0xdc,0x01,0x00,0x84,0x05] + global_load_sbyte_d16_hi v5, v1, s[4:5] offset:-1 // CHECK: [0xff,0x9f,0x8c,0xdc,0x01,0x00,0x04,0x05] @@ -2052,6 +2574,18 @@ global_load_sbyte_d16_hi v5, v1, s[4:5] offset:-1 glc global_load_sbyte_d16_hi v5, v1, s[4:5] offset:-1 slc // CHECK: [0xff,0x9f,0x8e,0xdc,0x01,0x00,0x04,0x05] +global_load_sbyte_d16_hi v5, v1, s[4:5] nv +// CHECK: [0x00,0x80,0x8c,0xdc,0x01,0x00,0x84,0x05] + +global_load_sbyte_d16_hi v5, v1, s[4:5] offset:-1 nv +// CHECK: [0xff,0x9f,0x8c,0xdc,0x01,0x00,0x84,0x05] + +global_load_sbyte_d16_hi v5, v1, s[4:5] offset:-1 glc nv +// CHECK: [0xff,0x9f,0x8d,0xdc,0x01,0x00,0x84,0x05] + +global_load_sbyte_d16_hi v5, v1, s[4:5] offset:-1 slc nv +// CHECK: [0xff,0x9f,0x8e,0xdc,0x01,0x00,0x84,0x05] + global_load_short_d16 v5, v1, s[4:5] offset:-1 // CHECK: [0xff,0x9f,0x90,0xdc,0x01,0x00,0x04,0x05] @@ -2097,6 +2631,18 @@ global_load_short_d16 v5, v1, s[4:5] offset:-1 glc global_load_short_d16 v5, v1, s[4:5] offset:-1 slc // CHECK: [0xff,0x9f,0x92,0xdc,0x01,0x00,0x04,0x05] +global_load_short_d16 v5, v1, s[4:5] nv +// CHECK: [0x00,0x80,0x90,0xdc,0x01,0x00,0x84,0x05] + +global_load_short_d16 v5, v1, s[4:5] offset:-1 nv +// CHECK: [0xff,0x9f,0x90,0xdc,0x01,0x00,0x84,0x05] + +global_load_short_d16 v5, v1, s[4:5] offset:-1 glc nv +// CHECK: [0xff,0x9f,0x91,0xdc,0x01,0x00,0x84,0x05] + +global_load_short_d16 v5, v1, s[4:5] offset:-1 slc nv +// CHECK: [0xff,0x9f,0x92,0xdc,0x01,0x00,0x84,0x05] + global_load_short_d16_hi v5, v1, s[4:5] offset:-1 // CHECK: [0xff,0x9f,0x94,0xdc,0x01,0x00,0x04,0x05] @@ -2142,6 +2688,18 @@ global_load_short_d16_hi v5, v1, s[4:5] offset:-1 glc global_load_short_d16_hi v5, v1, s[4:5] offset:-1 slc // CHECK: [0xff,0x9f,0x96,0xdc,0x01,0x00,0x04,0x05] +global_load_short_d16_hi v5, v1, s[4:5] nv +// CHECK: [0x00,0x80,0x94,0xdc,0x01,0x00,0x84,0x05] + +global_load_short_d16_hi v5, v1, s[4:5] offset:-1 nv +// CHECK: [0xff,0x9f,0x94,0xdc,0x01,0x00,0x84,0x05] + +global_load_short_d16_hi v5, v1, s[4:5] offset:-1 glc nv +// CHECK: [0xff,0x9f,0x95,0xdc,0x01,0x00,0x84,0x05] + +global_load_short_d16_hi v5, v1, s[4:5] offset:-1 slc nv +// CHECK: [0xff,0x9f,0x96,0xdc,0x01,0x00,0x84,0x05] + global_atomic_swap v1, v2, s[6:7] offset:-1 // CHECK: [0xff,0x9f,0x00,0xdd,0x01,0x02,0x06,0x00] @@ -2187,6 +2745,18 @@ global_atomic_swap v0, v1, v2, s[6:7] offset:-1 glc global_atomic_swap v1, v2, s[6:7] offset:-1 slc // CHECK: [0xff,0x9f,0x02,0xdd,0x01,0x02,0x06,0x00] +global_atomic_swap v1, v2, s[6:7] nv +// CHECK: [0x00,0x80,0x00,0xdd,0x01,0x02,0x86,0x00] + +global_atomic_swap v1, v2, s[6:7] offset:-1 nv +// CHECK: [0xff,0x9f,0x00,0xdd,0x01,0x02,0x86,0x00] + +global_atomic_swap v0, v1, v2, s[6:7] offset:-1 glc nv +// CHECK: [0xff,0x9f,0x01,0xdd,0x01,0x02,0x86,0x00] + +global_atomic_swap v1, v2, s[6:7] offset:-1 slc nv +// CHECK: [0xff,0x9f,0x02,0xdd,0x01,0x02,0x86,0x00] + global_atomic_cmpswap v1, v[2:3], s[6:7] offset:-1 // CHECK: [0xff,0x9f,0x04,0xdd,0x01,0x02,0x06,0x00] @@ -2232,6 +2802,18 @@ global_atomic_cmpswap v0, v1, v[2:3], s[6:7] offset:-1 glc global_atomic_cmpswap v1, v[2:3], s[6:7] offset:-1 slc // CHECK: [0xff,0x9f,0x06,0xdd,0x01,0x02,0x06,0x00] +global_atomic_cmpswap v1, v[2:3], s[6:7] nv +// CHECK: [0x00,0x80,0x04,0xdd,0x01,0x02,0x86,0x00] + +global_atomic_cmpswap v1, v[2:3], s[6:7] offset:-1 nv +// CHECK: [0xff,0x9f,0x04,0xdd,0x01,0x02,0x86,0x00] + +global_atomic_cmpswap v0, v1, v[2:3], s[6:7] offset:-1 glc nv +// CHECK: [0xff,0x9f,0x05,0xdd,0x01,0x02,0x86,0x00] + +global_atomic_cmpswap v1, v[2:3], s[6:7] offset:-1 slc nv +// CHECK: [0xff,0x9f,0x06,0xdd,0x01,0x02,0x86,0x00] + global_atomic_add v1, v2, s[6:7] offset:-1 // CHECK: [0xff,0x9f,0x08,0xdd,0x01,0x02,0x06,0x00] @@ -2277,6 +2859,18 @@ global_atomic_add v0, v1, v2, s[6:7] offset:-1 glc global_atomic_add v1, v2, s[6:7] offset:-1 slc // CHECK: [0xff,0x9f,0x0a,0xdd,0x01,0x02,0x06,0x00] +global_atomic_add v1, v2, s[6:7] nv +// CHECK: [0x00,0x80,0x08,0xdd,0x01,0x02,0x86,0x00] + +global_atomic_add v1, v2, s[6:7] offset:-1 nv +// CHECK: [0xff,0x9f,0x08,0xdd,0x01,0x02,0x86,0x00] + +global_atomic_add v0, v1, v2, s[6:7] offset:-1 glc nv +// CHECK: [0xff,0x9f,0x09,0xdd,0x01,0x02,0x86,0x00] + +global_atomic_add v1, v2, s[6:7] offset:-1 slc nv +// CHECK: [0xff,0x9f,0x0a,0xdd,0x01,0x02,0x86,0x00] + global_atomic_sub v1, v2, s[6:7] offset:-1 // CHECK: [0xff,0x9f,0x0c,0xdd,0x01,0x02,0x06,0x00] @@ -3357,6 +3951,18 @@ scratch_load_ubyte v5, off, s2 offset:-1 glc scratch_load_ubyte v5, off, s2 offset:-1 slc // CHECK: [0xff,0x5f,0x42,0xdc,0x00,0x00,0x02,0x05] +scratch_load_ubyte v5, off, s2 nv +// CHECK: [0x00,0x40,0x40,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_ubyte v5, off, s2 offset:-1 nv +// CHECK: [0xff,0x5f,0x40,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_ubyte v5, off, s2 offset:-1 glc nv +// CHECK: [0xff,0x5f,0x41,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_ubyte v5, off, s2 offset:-1 slc nv +// CHECK: [0xff,0x5f,0x42,0xdc,0x00,0x00,0x82,0x05] + scratch_load_sbyte v5, off, s2 offset:-1 // CHECK: [0xff,0x5f,0x44,0xdc,0x00,0x00,0x02,0x05] @@ -3402,6 +4008,18 @@ scratch_load_sbyte v5, off, s2 offset:-1 glc scratch_load_sbyte v5, off, s2 offset:-1 slc // CHECK: [0xff,0x5f,0x46,0xdc,0x00,0x00,0x02,0x05] +scratch_load_sbyte v5, off, s2 nv +// CHECK: [0x00,0x40,0x44,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_sbyte v5, off, s2 offset:-1 nv +// CHECK: [0xff,0x5f,0x44,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_sbyte v5, off, s2 offset:-1 glc nv +// CHECK: [0xff,0x5f,0x45,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_sbyte v5, off, s2 offset:-1 slc nv +// CHECK: [0xff,0x5f,0x46,0xdc,0x00,0x00,0x82,0x05] + scratch_load_ushort v5, off, s2 offset:-1 // CHECK: [0xff,0x5f,0x48,0xdc,0x00,0x00,0x02,0x05] @@ -3447,6 +4065,18 @@ scratch_load_ushort v5, off, s2 offset:-1 glc scratch_load_ushort v5, off, s2 offset:-1 slc // CHECK: [0xff,0x5f,0x4a,0xdc,0x00,0x00,0x02,0x05] +scratch_load_ushort v5, off, s2 nv +// CHECK: [0x00,0x40,0x48,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_ushort v5, off, s2 offset:-1 nv +// CHECK: [0xff,0x5f,0x48,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_ushort v5, off, s2 offset:-1 glc nv +// CHECK: [0xff,0x5f,0x49,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_ushort v5, off, s2 offset:-1 slc nv +// CHECK: [0xff,0x5f,0x4a,0xdc,0x00,0x00,0x82,0x05] + scratch_load_sshort v5, off, s2 offset:-1 // CHECK: [0xff,0x5f,0x4c,0xdc,0x00,0x00,0x02,0x05] @@ -3492,6 +4122,18 @@ scratch_load_sshort v5, off, s2 offset:-1 glc scratch_load_sshort v5, off, s2 offset:-1 slc // CHECK: [0xff,0x5f,0x4e,0xdc,0x00,0x00,0x02,0x05] +scratch_load_sshort v5, off, s2 nv +// CHECK: [0x00,0x40,0x4c,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_sshort v5, off, s2 offset:-1 nv +// CHECK: [0xff,0x5f,0x4c,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_sshort v5, off, s2 offset:-1 glc nv +// CHECK: [0xff,0x5f,0x4d,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_sshort v5, off, s2 offset:-1 slc nv +// CHECK: [0xff,0x5f,0x4e,0xdc,0x00,0x00,0x82,0x05] + scratch_load_dword v5, off, s2 offset:-1 // CHECK: [0xff,0x5f,0x50,0xdc,0x00,0x00,0x02,0x05] @@ -3537,6 +4179,18 @@ scratch_load_dword v5, off, s2 offset:-1 glc scratch_load_dword v5, off, s2 offset:-1 slc // CHECK: [0xff,0x5f,0x52,0xdc,0x00,0x00,0x02,0x05] +scratch_load_dword v5, off, s2 nv +// CHECK: [0x00,0x40,0x50,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_dword v5, off, s2 offset:-1 nv +// CHECK: [0xff,0x5f,0x50,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_dword v5, off, s2 offset:-1 glc nv +// CHECK: [0xff,0x5f,0x51,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_dword v5, off, s2 offset:-1 slc nv +// CHECK: [0xff,0x5f,0x52,0xdc,0x00,0x00,0x82,0x05] + scratch_load_dwordx2 v[5:6], off, s2 offset:-1 // CHECK: [0xff,0x5f,0x54,0xdc,0x00,0x00,0x02,0x05] @@ -3582,6 +4236,18 @@ scratch_load_dwordx2 v[5:6], off, s2 offset:-1 glc scratch_load_dwordx2 v[5:6], off, s2 offset:-1 slc // CHECK: [0xff,0x5f,0x56,0xdc,0x00,0x00,0x02,0x05] +scratch_load_dwordx2 v[5:6], off, s2 nv +// CHECK: [0x00,0x40,0x54,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_dwordx2 v[5:6], off, s2 offset:-1 nv +// CHECK: [0xff,0x5f,0x54,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_dwordx2 v[5:6], off, s2 offset:-1 glc nv +// CHECK: [0xff,0x5f,0x55,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_dwordx2 v[5:6], off, s2 offset:-1 slc nv +// CHECK: [0xff,0x5f,0x56,0xdc,0x00,0x00,0x82,0x05] + scratch_load_dwordx3 v[5:7], off, s2 offset:-1 // CHECK: [0xff,0x5f,0x58,0xdc,0x00,0x00,0x02,0x05] @@ -3627,6 +4293,18 @@ scratch_load_dwordx3 v[5:7], off, s2 offset:-1 glc scratch_load_dwordx3 v[5:7], off, s2 offset:-1 slc // CHECK: [0xff,0x5f,0x5a,0xdc,0x00,0x00,0x02,0x05] +scratch_load_dwordx3 v[5:7], off, s2 nv +// CHECK: [0x00,0x40,0x58,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_dwordx3 v[5:7], off, s2 offset:-1 nv +// CHECK: [0xff,0x5f,0x58,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_dwordx3 v[5:7], off, s2 offset:-1 glc nv +// CHECK: [0xff,0x5f,0x59,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_dwordx3 v[5:7], off, s2 offset:-1 slc nv +// CHECK: [0xff,0x5f,0x5a,0xdc,0x00,0x00,0x82,0x05] + scratch_load_dwordx4 v[5:8], off, s2 offset:-1 // CHECK: [0xff,0x5f,0x5c,0xdc,0x00,0x00,0x02,0x05] @@ -3672,6 +4350,18 @@ scratch_load_dwordx4 v[5:8], off, s2 offset:-1 glc scratch_load_dwordx4 v[5:8], off, s2 offset:-1 slc // CHECK: [0xff,0x5f,0x5e,0xdc,0x00,0x00,0x02,0x05] +scratch_load_dwordx4 v[5:8], off, s2 nv +// CHECK: [0x00,0x40,0x5c,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_dwordx4 v[5:8], off, s2 offset:-1 nv +// CHECK: [0xff,0x5f,0x5c,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_dwordx4 v[5:8], off, s2 offset:-1 glc nv +// CHECK: [0xff,0x5f,0x5d,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_dwordx4 v[5:8], off, s2 offset:-1 slc nv +// CHECK: [0xff,0x5f,0x5e,0xdc,0x00,0x00,0x82,0x05] + scratch_store_byte off, v2, s3 offset:-1 // CHECK: [0xff,0x5f,0x60,0xdc,0x00,0x02,0x03,0x00] @@ -3717,6 +4407,18 @@ scratch_store_byte off, v2, s3 offset:-1 glc scratch_store_byte off, v2, s3 offset:-1 slc // CHECK: [0xff,0x5f,0x62,0xdc,0x00,0x02,0x03,0x00] +scratch_store_byte off, v2, s3 nv +// CHECK: [0x00,0x40,0x60,0xdc,0x00,0x02,0x83,0x00] + +scratch_store_byte off, v2, s3 offset:-1 nv +// CHECK: [0xff,0x5f,0x60,0xdc,0x00,0x02,0x83,0x00] + +scratch_store_byte off, v2, s3 offset:-1 glc nv +// CHECK: [0xff,0x5f,0x61,0xdc,0x00,0x02,0x83,0x00] + +scratch_store_byte off, v2, s3 offset:-1 slc nv +// CHECK: [0xff,0x5f,0x62,0xdc,0x00,0x02,0x83,0x00] + scratch_store_byte_d16_hi off, v2, s3 offset:-1 // CHECK: [0xff,0x5f,0x64,0xdc,0x00,0x02,0x03,0x00] @@ -3762,6 +4464,18 @@ scratch_store_byte_d16_hi off, v2, s3 offset:-1 glc scratch_store_byte_d16_hi off, v2, s3 offset:-1 slc // CHECK: [0xff,0x5f,0x66,0xdc,0x00,0x02,0x03,0x00] +scratch_store_byte_d16_hi off, v2, s3 nv +// CHECK: [0x00,0x40,0x64,0xdc,0x00,0x02,0x83,0x00] + +scratch_store_byte_d16_hi off, v2, s3 offset:-1 nv +// CHECK: [0xff,0x5f,0x64,0xdc,0x00,0x02,0x83,0x00] + +scratch_store_byte_d16_hi off, v2, s3 offset:-1 glc nv +// CHECK: [0xff,0x5f,0x65,0xdc,0x00,0x02,0x83,0x00] + +scratch_store_byte_d16_hi off, v2, s3 offset:-1 slc nv +// CHECK: [0xff,0x5f,0x66,0xdc,0x00,0x02,0x83,0x00] + scratch_store_short off, v2, s3 offset:-1 // CHECK: [0xff,0x5f,0x68,0xdc,0x00,0x02,0x03,0x00] @@ -3807,6 +4521,18 @@ scratch_store_short off, v2, s3 offset:-1 glc scratch_store_short off, v2, s3 offset:-1 slc // CHECK: [0xff,0x5f,0x6a,0xdc,0x00,0x02,0x03,0x00] +scratch_store_short off, v2, s3 nv +// CHECK: [0x00,0x40,0x68,0xdc,0x00,0x02,0x83,0x00] + +scratch_store_short off, v2, s3 offset:-1 nv +// CHECK: [0xff,0x5f,0x68,0xdc,0x00,0x02,0x83,0x00] + +scratch_store_short off, v2, s3 offset:-1 glc nv +// CHECK: [0xff,0x5f,0x69,0xdc,0x00,0x02,0x83,0x00] + +scratch_store_short off, v2, s3 offset:-1 slc nv +// CHECK: [0xff,0x5f,0x6a,0xdc,0x00,0x02,0x83,0x00] + scratch_store_short_d16_hi off, v2, s3 offset:-1 // CHECK: [0xff,0x5f,0x6c,0xdc,0x00,0x02,0x03,0x00] @@ -3852,6 +4578,18 @@ scratch_store_short_d16_hi off, v2, s3 offset:-1 glc scratch_store_short_d16_hi off, v2, s3 offset:-1 slc // CHECK: [0xff,0x5f,0x6e,0xdc,0x00,0x02,0x03,0x00] +scratch_store_short_d16_hi off, v2, s3 nv +// CHECK: [0x00,0x40,0x6c,0xdc,0x00,0x02,0x83,0x00] + +scratch_store_short_d16_hi off, v2, s3 offset:-1 nv +// CHECK: [0xff,0x5f,0x6c,0xdc,0x00,0x02,0x83,0x00] + +scratch_store_short_d16_hi off, v2, s3 offset:-1 glc nv +// CHECK: [0xff,0x5f,0x6d,0xdc,0x00,0x02,0x83,0x00] + +scratch_store_short_d16_hi off, v2, s3 offset:-1 slc nv +// CHECK: [0xff,0x5f,0x6e,0xdc,0x00,0x02,0x83,0x00] + scratch_store_dword off, v2, s3 offset:-1 // CHECK: [0xff,0x5f,0x70,0xdc,0x00,0x02,0x03,0x00] @@ -3897,6 +4635,18 @@ scratch_store_dword off, v2, s3 offset:-1 glc scratch_store_dword off, v2, s3 offset:-1 slc // CHECK: [0xff,0x5f,0x72,0xdc,0x00,0x02,0x03,0x00] +scratch_store_dword off, v2, s3 nv +// CHECK: [0x00,0x40,0x70,0xdc,0x00,0x02,0x83,0x00] + +scratch_store_dword off, v2, s3 offset:-1 nv +// CHECK: [0xff,0x5f,0x70,0xdc,0x00,0x02,0x83,0x00] + +scratch_store_dword off, v2, s3 offset:-1 glc nv +// CHECK: [0xff,0x5f,0x71,0xdc,0x00,0x02,0x83,0x00] + +scratch_store_dword off, v2, s3 offset:-1 slc nv +// CHECK: [0xff,0x5f,0x72,0xdc,0x00,0x02,0x83,0x00] + scratch_store_dwordx2 off, v[2:3], s3 offset:-1 // CHECK: [0xff,0x5f,0x74,0xdc,0x00,0x02,0x03,0x00] @@ -3942,6 +4692,18 @@ scratch_store_dwordx2 off, v[2:3], s3 offset:-1 glc scratch_store_dwordx2 off, v[2:3], s3 offset:-1 slc // CHECK: [0xff,0x5f,0x76,0xdc,0x00,0x02,0x03,0x00] +scratch_store_dwordx2 off, v[2:3], s3 nv +// CHECK: [0x00,0x40,0x74,0xdc,0x00,0x02,0x83,0x00] + +scratch_store_dwordx2 off, v[2:3], s3 offset:-1 nv +// CHECK: [0xff,0x5f,0x74,0xdc,0x00,0x02,0x83,0x00] + +scratch_store_dwordx2 off, v[2:3], s3 offset:-1 glc nv +// CHECK: [0xff,0x5f,0x75,0xdc,0x00,0x02,0x83,0x00] + +scratch_store_dwordx2 off, v[2:3], s3 offset:-1 slc nv +// CHECK: [0xff,0x5f,0x76,0xdc,0x00,0x02,0x83,0x00] + scratch_store_dwordx3 off, v[2:4], s3 offset:-1 // CHECK: [0xff,0x5f,0x78,0xdc,0x00,0x02,0x03,0x00] @@ -3987,6 +4749,18 @@ scratch_store_dwordx3 off, v[2:4], s3 offset:-1 glc scratch_store_dwordx3 off, v[2:4], s3 offset:-1 slc // CHECK: [0xff,0x5f,0x7a,0xdc,0x00,0x02,0x03,0x00] +scratch_store_dwordx3 off, v[2:4], s3 nv +// CHECK: [0x00,0x40,0x78,0xdc,0x00,0x02,0x83,0x00] + +scratch_store_dwordx3 off, v[2:4], s3 offset:-1 nv +// CHECK: [0xff,0x5f,0x78,0xdc,0x00,0x02,0x83,0x00] + +scratch_store_dwordx3 off, v[2:4], s3 offset:-1 glc nv +// CHECK: [0xff,0x5f,0x79,0xdc,0x00,0x02,0x83,0x00] + +scratch_store_dwordx3 off, v[2:4], s3 offset:-1 slc nv +// CHECK: [0xff,0x5f,0x7a,0xdc,0x00,0x02,0x83,0x00] + scratch_store_dwordx4 off, v[2:5], s3 offset:-1 // CHECK: [0xff,0x5f,0x7c,0xdc,0x00,0x02,0x03,0x00] @@ -4032,6 +4806,18 @@ scratch_store_dwordx4 off, v[2:5], s3 offset:-1 glc scratch_store_dwordx4 off, v[2:5], s3 offset:-1 slc // CHECK: [0xff,0x5f,0x7e,0xdc,0x00,0x02,0x03,0x00] +scratch_store_dwordx4 off, v[2:5], s3 nv +// CHECK: [0x00,0x40,0x7c,0xdc,0x00,0x02,0x83,0x00] + +scratch_store_dwordx4 off, v[2:5], s3 offset:-1 nv +// CHECK: [0xff,0x5f,0x7c,0xdc,0x00,0x02,0x83,0x00] + +scratch_store_dwordx4 off, v[2:5], s3 offset:-1 glc nv +// CHECK: [0xff,0x5f,0x7d,0xdc,0x00,0x02,0x83,0x00] + +scratch_store_dwordx4 off, v[2:5], s3 offset:-1 slc nv +// CHECK: [0xff,0x5f,0x7e,0xdc,0x00,0x02,0x83,0x00] + scratch_load_ubyte_d16 v5, off, s2 offset:-1 // CHECK: [0xff,0x5f,0x80,0xdc,0x00,0x00,0x02,0x05] @@ -4077,6 +4863,18 @@ scratch_load_ubyte_d16 v5, off, s2 offset:-1 glc scratch_load_ubyte_d16 v5, off, s2 offset:-1 slc // CHECK: [0xff,0x5f,0x82,0xdc,0x00,0x00,0x02,0x05] +scratch_load_ubyte_d16 v5, off, s2 nv +// CHECK: [0x00,0x40,0x80,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_ubyte_d16 v5, off, s2 offset:-1 nv +// CHECK: [0xff,0x5f,0x80,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_ubyte_d16 v5, off, s2 offset:-1 glc nv +// CHECK: [0xff,0x5f,0x81,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_ubyte_d16 v5, off, s2 offset:-1 slc nv +// CHECK: [0xff,0x5f,0x82,0xdc,0x00,0x00,0x82,0x05] + scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 // CHECK: [0xff,0x5f,0x84,0xdc,0x00,0x00,0x02,0x05] @@ -4122,6 +4920,18 @@ scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 glc scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 slc // CHECK: [0xff,0x5f,0x86,0xdc,0x00,0x00,0x02,0x05] +scratch_load_ubyte_d16_hi v5, off, s2 nv +// CHECK: [0x00,0x40,0x84,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 nv +// CHECK: [0xff,0x5f,0x84,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 glc nv +// CHECK: [0xff,0x5f,0x85,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 slc nv +// CHECK: [0xff,0x5f,0x86,0xdc,0x00,0x00,0x82,0x05] + scratch_load_sbyte_d16 v5, off, s2 offset:-1 // CHECK: [0xff,0x5f,0x88,0xdc,0x00,0x00,0x02,0x05] @@ -4167,6 +4977,18 @@ scratch_load_sbyte_d16 v5, off, s2 offset:-1 glc scratch_load_sbyte_d16 v5, off, s2 offset:-1 slc // CHECK: [0xff,0x5f,0x8a,0xdc,0x00,0x00,0x02,0x05] +scratch_load_sbyte_d16 v5, off, s2 nv +// CHECK: [0x00,0x40,0x88,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_sbyte_d16 v5, off, s2 offset:-1 nv +// CHECK: [0xff,0x5f,0x88,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_sbyte_d16 v5, off, s2 offset:-1 glc nv +// CHECK: [0xff,0x5f,0x89,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_sbyte_d16 v5, off, s2 offset:-1 slc nv +// CHECK: [0xff,0x5f,0x8a,0xdc,0x00,0x00,0x82,0x05] + scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 // CHECK: [0xff,0x5f,0x8c,0xdc,0x00,0x00,0x02,0x05] @@ -4212,6 +5034,18 @@ scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 glc scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 slc // CHECK: [0xff,0x5f,0x8e,0xdc,0x00,0x00,0x02,0x05] +scratch_load_sbyte_d16_hi v5, off, s2 nv +// CHECK: [0x00,0x40,0x8c,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 nv +// CHECK: [0xff,0x5f,0x8c,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 glc nv +// CHECK: [0xff,0x5f,0x8d,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 slc nv +// CHECK: [0xff,0x5f,0x8e,0xdc,0x00,0x00,0x82,0x05] + scratch_load_short_d16 v5, off, s2 offset:-1 // CHECK: [0xff,0x5f,0x90,0xdc,0x00,0x00,0x02,0x05] @@ -4254,6 +5088,18 @@ scratch_load_short_d16 v5, off, s2 offset:-4096 scratch_load_short_d16 v5, off, s2 offset:-1 glc // CHECK: [0xff,0x5f,0x91,0xdc,0x00,0x00,0x02,0x05] +scratch_load_short_d16 v5, off, s2 nv +// CHECK: [0x00,0x40,0x90,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_short_d16 v5, off, s2 offset:-1 nv +// CHECK: [0xff,0x5f,0x90,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_short_d16 v5, off, s2 offset:-1 glc nv +// CHECK: [0xff,0x5f,0x91,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_short_d16 v5, off, s2 offset:-1 slc nv +// CHECK: [0xff,0x5f,0x92,0xdc,0x00,0x00,0x82,0x05] + scratch_load_short_d16 v5, off, s2 offset:-1 slc // CHECK: [0xff,0x5f,0x92,0xdc,0x00,0x00,0x02,0x05] @@ -4302,6 +5148,18 @@ scratch_load_short_d16_hi v5, off, s2 offset:-1 glc scratch_load_short_d16_hi v5, off, s2 offset:-1 slc // CHECK: [0xff,0x5f,0x96,0xdc,0x00,0x00,0x02,0x05] +scratch_load_short_d16_hi v5, off, s2 nv +// CHECK: [0x00,0x40,0x94,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_short_d16_hi v5, off, s2 offset:-1 nv +// CHECK: [0xff,0x5f,0x94,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_short_d16_hi v5, off, s2 offset:-1 glc nv +// CHECK: [0xff,0x5f,0x95,0xdc,0x00,0x00,0x82,0x05] + +scratch_load_short_d16_hi v5, off, s2 offset:-1 slc nv +// CHECK: [0xff,0x5f,0x96,0xdc,0x00,0x00,0x82,0x05] + global_load_dword v[2:3], off lds // CHECK: [0x00,0xa0,0x50,0xdc,0x02,0x00,0x7f,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx9_flat.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx9_flat.txt index 0ee659e207c91..4c06585a4c2eb 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx9_flat.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx9_flat.txt @@ -21,6 +21,18 @@ # CHECK: flat_load_ubyte v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x42,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x42,0xdc,0x01,0x00,0x00,0x05 +# CHECK: flat_load_ubyte v5, v[1:2] nv ; encoding: [0x00,0x00,0x40,0xdc,0x01,0x00,0x80,0x05] +0x00,0x00,0x40,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_ubyte v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x40,0xdc,0x01,0x00,0x80,0x05] +0x07,0x00,0x40,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_ubyte v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x41,0xdc,0x01,0x00,0x80,0x05] +0xff,0x0f,0x41,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_ubyte v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x42,0xdc,0x01,0x00,0x80,0x05] +0xff,0x0f,0x42,0xdc,0x01,0x00,0x80,0x05 + # CHECK: flat_load_sbyte v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x44,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x44,0xdc,0x01,0x00,0x00,0x05 @@ -42,6 +54,18 @@ # CHECK: flat_load_sbyte v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x46,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x46,0xdc,0x01,0x00,0x00,0x05 +# CHECK: flat_load_sbyte v5, v[1:2] nv ; encoding: [0x00,0x00,0x44,0xdc,0x01,0x00,0x80,0x05] +0x00,0x00,0x44,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_sbyte v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x44,0xdc,0x01,0x00,0x80,0x05] +0x07,0x00,0x44,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_sbyte v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x45,0xdc,0x01,0x00,0x80,0x05] +0xff,0x0f,0x45,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_sbyte v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x46,0xdc,0x01,0x00,0x80,0x05] +0xff,0x0f,0x46,0xdc,0x01,0x00,0x80,0x05 + # CHECK: flat_load_ushort v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x48,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x48,0xdc,0x01,0x00,0x00,0x05 @@ -63,6 +87,18 @@ # CHECK: flat_load_ushort v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x4a,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x4a,0xdc,0x01,0x00,0x00,0x05 +# CHECK: flat_load_ushort v5, v[1:2] nv ; encoding: [0x00,0x00,0x48,0xdc,0x01,0x00,0x80,0x05] +0x00,0x00,0x48,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_ushort v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x48,0xdc,0x01,0x00,0x80,0x05] +0x07,0x00,0x48,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_ushort v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x49,0xdc,0x01,0x00,0x80,0x05] +0xff,0x0f,0x49,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_ushort v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x4a,0xdc,0x01,0x00,0x80,0x05] +0xff,0x0f,0x4a,0xdc,0x01,0x00,0x80,0x05 + # CHECK: flat_load_sshort v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x4c,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x4c,0xdc,0x01,0x00,0x00,0x05 @@ -84,6 +120,18 @@ # CHECK: flat_load_sshort v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x4e,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x4e,0xdc,0x01,0x00,0x00,0x05 +# CHECK: flat_load_sshort v5, v[1:2] nv ; encoding: [0x00,0x00,0x4c,0xdc,0x01,0x00,0x80,0x05] +0x00,0x00,0x4c,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_sshort v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x4c,0xdc,0x01,0x00,0x80,0x05] +0x07,0x00,0x4c,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_sshort v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x4d,0xdc,0x01,0x00,0x80,0x05] +0xff,0x0f,0x4d,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_sshort v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x4e,0xdc,0x01,0x00,0x80,0x05] +0xff,0x0f,0x4e,0xdc,0x01,0x00,0x80,0x05 + # CHECK: flat_load_dword v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x50,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x50,0xdc,0x01,0x00,0x00,0x05 @@ -105,6 +153,18 @@ # CHECK: flat_load_dword v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x52,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x52,0xdc,0x01,0x00,0x00,0x05 +# CHECK: flat_load_dword v5, v[1:2] nv ; encoding: [0x00,0x00,0x50,0xdc,0x01,0x00,0x80,0x05] +0x00,0x00,0x50,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_dword v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x50,0xdc,0x01,0x00,0x80,0x05] +0x07,0x00,0x50,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_dword v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x51,0xdc,0x01,0x00,0x80,0x05] +0xff,0x0f,0x51,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_dword v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x52,0xdc,0x01,0x00,0x80,0x05] +0xff,0x0f,0x52,0xdc,0x01,0x00,0x80,0x05 + # CHECK: flat_load_dwordx2 v[5:6], v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x54,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x54,0xdc,0x01,0x00,0x00,0x05 @@ -126,6 +186,18 @@ # CHECK: flat_load_dwordx2 v[5:6], v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x56,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x56,0xdc,0x01,0x00,0x00,0x05 +# CHECK: flat_load_dwordx2 v[5:6], v[1:2] nv ; encoding: [0x00,0x00,0x54,0xdc,0x01,0x00,0x80,0x05] +0x00,0x00,0x54,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_dwordx2 v[5:6], v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x54,0xdc,0x01,0x00,0x80,0x05] +0x07,0x00,0x54,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_dwordx2 v[5:6], v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x55,0xdc,0x01,0x00,0x80,0x05] +0xff,0x0f,0x55,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_dwordx2 v[5:6], v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x56,0xdc,0x01,0x00,0x80,0x05] +0xff,0x0f,0x56,0xdc,0x01,0x00,0x80,0x05 + # CHECK: flat_load_dwordx3 v[5:7], v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x58,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x58,0xdc,0x01,0x00,0x00,0x05 @@ -147,6 +219,18 @@ # CHECK: flat_load_dwordx3 v[5:7], v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x5a,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x5a,0xdc,0x01,0x00,0x00,0x05 +# CHECK: flat_load_dwordx3 v[5:7], v[1:2] nv ; encoding: [0x00,0x00,0x58,0xdc,0x01,0x00,0x80,0x05] +0x00,0x00,0x58,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_dwordx3 v[5:7], v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x58,0xdc,0x01,0x00,0x80,0x05] +0x07,0x00,0x58,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_dwordx3 v[5:7], v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x59,0xdc,0x01,0x00,0x80,0x05] +0xff,0x0f,0x59,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_dwordx3 v[5:7], v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x5a,0xdc,0x01,0x00,0x80,0x05] +0xff,0x0f,0x5a,0xdc,0x01,0x00,0x80,0x05 + # CHECK: flat_load_dwordx4 v[5:8], v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x5c,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x5c,0xdc,0x01,0x00,0x00,0x05 @@ -168,6 +252,18 @@ # CHECK: flat_load_dwordx4 v[5:8], v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x5e,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x5e,0xdc,0x01,0x00,0x00,0x05 +# CHECK: flat_load_dwordx4 v[5:8], v[1:2] nv ; encoding: [0x00,0x00,0x5c,0xdc,0x01,0x00,0x80,0x05] +0x00,0x00,0x5c,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_dwordx4 v[5:8], v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x5c,0xdc,0x01,0x00,0x80,0x05] +0x07,0x00,0x5c,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_dwordx4 v[5:8], v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x5d,0xdc,0x01,0x00,0x80,0x05] +0xff,0x0f,0x5d,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_dwordx4 v[5:8], v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x5e,0xdc,0x01,0x00,0x80,0x05] +0xff,0x0f,0x5e,0xdc,0x01,0x00,0x80,0x05 + # CHECK: flat_store_byte v[1:2], v2 offset:4095 ; encoding: [0xff,0x0f,0x60,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x60,0xdc,0x01,0x02,0x00,0x00 @@ -189,6 +285,18 @@ # CHECK: flat_store_byte v[1:2], v2 offset:4095 slc ; encoding: [0xff,0x0f,0x62,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x62,0xdc,0x01,0x02,0x00,0x00 +# CHECK: flat_store_byte v[1:2], v2 nv ; encoding: [0x00,0x00,0x60,0xdc,0x01,0x02,0x80,0x00] +0x00,0x00,0x60,0xdc,0x01,0x02,0x80,0x00 + +# CHECK: flat_store_byte v[1:2], v2 offset:7 nv ; encoding: [0x07,0x00,0x60,0xdc,0x01,0x02,0x80,0x00] +0x07,0x00,0x60,0xdc,0x01,0x02,0x80,0x00 + +# CHECK: flat_store_byte v[1:2], v2 offset:4095 glc nv ; encoding: [0xff,0x0f,0x61,0xdc,0x01,0x02,0x80,0x00] +0xff,0x0f,0x61,0xdc,0x01,0x02,0x80,0x00 + +# CHECK: flat_store_byte v[1:2], v2 offset:4095 slc nv ; encoding: [0xff,0x0f,0x62,0xdc,0x01,0x02,0x80,0x00] +0xff,0x0f,0x62,0xdc,0x01,0x02,0x80,0x00 + # CHECK: flat_store_byte_d16_hi v[1:2], v2 offset:4095 ; encoding: [0xff,0x0f,0x64,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x64,0xdc,0x01,0x02,0x00,0x00 @@ -210,6 +318,18 @@ # CHECK: flat_store_byte_d16_hi v[1:2], v2 offset:4095 slc ; encoding: [0xff,0x0f,0x66,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x66,0xdc,0x01,0x02,0x00,0x00 +# CHECK: flat_store_byte_d16_hi v[1:2], v2 nv ; encoding: [0x00,0x00,0x64,0xdc,0x01,0x02,0x80,0x00] +0x00,0x00,0x64,0xdc,0x01,0x02,0x80,0x00 + +# CHECK: flat_store_byte_d16_hi v[1:2], v2 offset:7 nv ; encoding: [0x07,0x00,0x64,0xdc,0x01,0x02,0x80,0x00] +0x07,0x00,0x64,0xdc,0x01,0x02,0x80,0x00 + +# CHECK: flat_store_byte_d16_hi v[1:2], v2 offset:4095 glc nv ; encoding: [0xff,0x0f,0x65,0xdc,0x01,0x02,0x80,0x00] +0xff,0x0f,0x65,0xdc,0x01,0x02,0x80,0x00 + +# CHECK: flat_store_byte_d16_hi v[1:2], v2 offset:4095 slc nv ; encoding: [0xff,0x0f,0x66,0xdc,0x01,0x02,0x80,0x00] +0xff,0x0f,0x66,0xdc,0x01,0x02,0x80,0x00 + # CHECK: flat_store_short v[1:2], v2 offset:4095 ; encoding: [0xff,0x0f,0x68,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x68,0xdc,0x01,0x02,0x00,0x00 @@ -231,6 +351,18 @@ # CHECK: flat_store_short v[1:2], v2 offset:4095 slc ; encoding: [0xff,0x0f,0x6a,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x6a,0xdc,0x01,0x02,0x00,0x00 +# CHECK: flat_store_short v[1:2], v2 nv ; encoding: [0x00,0x00,0x68,0xdc,0x01,0x02,0x80,0x00] +0x00,0x00,0x68,0xdc,0x01,0x02,0x80,0x00 + +# CHECK: flat_store_short v[1:2], v2 offset:7 nv ; encoding: [0x07,0x00,0x68,0xdc,0x01,0x02,0x80,0x00] +0x07,0x00,0x68,0xdc,0x01,0x02,0x80,0x00 + +# CHECK: flat_store_short v[1:2], v2 offset:4095 glc nv ; encoding: [0xff,0x0f,0x69,0xdc,0x01,0x02,0x80,0x00] +0xff,0x0f,0x69,0xdc,0x01,0x02,0x80,0x00 + +# CHECK: flat_store_short v[1:2], v2 offset:4095 slc nv ; encoding: [0xff,0x0f,0x6a,0xdc,0x01,0x02,0x80,0x00] +0xff,0x0f,0x6a,0xdc,0x01,0x02,0x80,0x00 + # CHECK: flat_store_short_d16_hi v[1:2], v2 offset:4095 ; encoding: [0xff,0x0f,0x6c,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x6c,0xdc,0x01,0x02,0x00,0x00 @@ -252,6 +384,18 @@ # CHECK: flat_store_short_d16_hi v[1:2], v2 offset:4095 slc ; encoding: [0xff,0x0f,0x6e,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x6e,0xdc,0x01,0x02,0x00,0x00 +# CHECK: flat_store_short_d16_hi v[1:2], v2 nv ; encoding: [0x00,0x00,0x6c,0xdc,0x01,0x02,0x80,0x00] +0x00,0x00,0x6c,0xdc,0x01,0x02,0x80,0x00 + +# CHECK: flat_store_short_d16_hi v[1:2], v2 offset:7 nv ; encoding: [0x07,0x00,0x6c,0xdc,0x01,0x02,0x80,0x00] +0x07,0x00,0x6c,0xdc,0x01,0x02,0x80,0x00 + +# CHECK: flat_store_short_d16_hi v[1:2], v2 offset:4095 glc nv ; encoding: [0xff,0x0f,0x6d,0xdc,0x01,0x02,0x80,0x00] +0xff,0x0f,0x6d,0xdc,0x01,0x02,0x80,0x00 + +# CHECK: flat_store_short_d16_hi v[1:2], v2 offset:4095 slc nv ; encoding: [0xff,0x0f,0x6e,0xdc,0x01,0x02,0x80,0x00] +0xff,0x0f,0x6e,0xdc,0x01,0x02,0x80,0x00 + # CHECK: flat_store_dword v[1:2], v2 offset:4095 ; encoding: [0xff,0x0f,0x70,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x70,0xdc,0x01,0x02,0x00,0x00 @@ -273,6 +417,18 @@ # CHECK: flat_store_dword v[1:2], v2 offset:4095 slc ; encoding: [0xff,0x0f,0x72,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x72,0xdc,0x01,0x02,0x00,0x00 +# CHECK: flat_store_dword v[1:2], v2 nv ; encoding: [0x00,0x00,0x70,0xdc,0x01,0x02,0x80,0x00] +0x00,0x00,0x70,0xdc,0x01,0x02,0x80,0x00 + +# CHECK: flat_store_dword v[1:2], v2 offset:7 nv ; encoding: [0x07,0x00,0x70,0xdc,0x01,0x02,0x80,0x00] +0x07,0x00,0x70,0xdc,0x01,0x02,0x80,0x00 + +# CHECK: flat_store_dword v[1:2], v2 offset:4095 glc nv ; encoding: [0xff,0x0f,0x71,0xdc,0x01,0x02,0x80,0x00] +0xff,0x0f,0x71,0xdc,0x01,0x02,0x80,0x00 + +# CHECK: flat_store_dword v[1:2], v2 offset:4095 slc nv ; encoding: [0xff,0x0f,0x72,0xdc,0x01,0x02,0x80,0x00] +0xff,0x0f,0x72,0xdc,0x01,0x02,0x80,0x00 + # CHECK: flat_store_dwordx2 v[1:2], v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x74,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x74,0xdc,0x01,0x02,0x00,0x00 @@ -294,6 +450,18 @@ # CHECK: flat_store_dwordx2 v[1:2], v[2:3] offset:4095 slc ; encoding: [0xff,0x0f,0x76,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x76,0xdc,0x01,0x02,0x00,0x00 +# CHECK: flat_store_dwordx2 v[1:2], v[2:3] nv ; encoding: [0x00,0x00,0x74,0xdc,0x01,0x02,0x80,0x00] +0x00,0x00,0x74,0xdc,0x01,0x02,0x80,0x00 + +# CHECK: flat_store_dwordx2 v[1:2], v[2:3] offset:7 nv ; encoding: [0x07,0x00,0x74,0xdc,0x01,0x02,0x80,0x00] +0x07,0x00,0x74,0xdc,0x01,0x02,0x80,0x00 + +# CHECK: flat_store_dwordx2 v[1:2], v[2:3] offset:4095 glc nv ; encoding: [0xff,0x0f,0x75,0xdc,0x01,0x02,0x80,0x00] +0xff,0x0f,0x75,0xdc,0x01,0x02,0x80,0x00 + +# CHECK: flat_store_dwordx2 v[1:2], v[2:3] offset:4095 slc nv ; encoding: [0xff,0x0f,0x76,0xdc,0x01,0x02,0x80,0x00] +0xff,0x0f,0x76,0xdc,0x01,0x02,0x80,0x00 + # CHECK: flat_store_dwordx3 v[1:2], v[2:4] offset:4095 ; encoding: [0xff,0x0f,0x78,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x78,0xdc,0x01,0x02,0x00,0x00 @@ -315,6 +483,18 @@ # CHECK: flat_store_dwordx3 v[1:2], v[2:4] offset:4095 slc ; encoding: [0xff,0x0f,0x7a,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x7a,0xdc,0x01,0x02,0x00,0x00 +# CHECK: flat_store_dwordx3 v[1:2], v[2:4] nv ; encoding: [0x00,0x00,0x78,0xdc,0x01,0x02,0x80,0x00] +0x00,0x00,0x78,0xdc,0x01,0x02,0x80,0x00 + +# CHECK: flat_store_dwordx3 v[1:2], v[2:4] offset:7 nv ; encoding: [0x07,0x00,0x78,0xdc,0x01,0x02,0x80,0x00] +0x07,0x00,0x78,0xdc,0x01,0x02,0x80,0x00 + +# CHECK: flat_store_dwordx3 v[1:2], v[2:4] offset:4095 glc nv ; encoding: [0xff,0x0f,0x79,0xdc,0x01,0x02,0x80,0x00] +0xff,0x0f,0x79,0xdc,0x01,0x02,0x80,0x00 + +# CHECK: flat_store_dwordx3 v[1:2], v[2:4] offset:4095 slc nv ; encoding: [0xff,0x0f,0x7a,0xdc,0x01,0x02,0x80,0x00] +0xff,0x0f,0x7a,0xdc,0x01,0x02,0x80,0x00 + # CHECK: flat_store_dwordx4 v[1:2], v[2:5] offset:4095 ; encoding: [0xff,0x0f,0x7c,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x7c,0xdc,0x01,0x02,0x00,0x00 @@ -336,6 +516,18 @@ # CHECK: flat_store_dwordx4 v[1:2], v[2:5] offset:4095 slc ; encoding: [0xff,0x0f,0x7e,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x7e,0xdc,0x01,0x02,0x00,0x00 +# CHECK: flat_store_dwordx4 v[1:2], v[2:5] nv ; encoding: [0x00,0x00,0x7c,0xdc,0x01,0x02,0x80,0x00] +0x00,0x00,0x7c,0xdc,0x01,0x02,0x80,0x00 + +# CHECK: flat_store_dwordx4 v[1:2], v[2:5] offset:7 nv ; encoding: [0x07,0x00,0x7c,0xdc,0x01,0x02,0x80,0x00] +0x07,0x00,0x7c,0xdc,0x01,0x02,0x80,0x00 + +# CHECK: flat_store_dwordx4 v[1:2], v[2:5] offset:4095 glc nv ; encoding: [0xff,0x0f,0x7d,0xdc,0x01,0x02,0x80,0x00] +0xff,0x0f,0x7d,0xdc,0x01,0x02,0x80,0x00 + +# CHECK: flat_store_dwordx4 v[1:2], v[2:5] offset:4095 slc nv ; encoding: [0xff,0x0f,0x7e,0xdc,0x01,0x02,0x80,0x00] +0xff,0x0f,0x7e,0xdc,0x01,0x02,0x80,0x00 + # CHECK: flat_load_ubyte_d16 v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x80,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x80,0xdc,0x01,0x00,0x00,0x05 @@ -357,6 +549,18 @@ # CHECK: flat_load_ubyte_d16 v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x82,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x82,0xdc,0x01,0x00,0x00,0x05 +# CHECK: flat_load_ubyte_d16 v5, v[1:2] nv ; encoding: [0x00,0x00,0x80,0xdc,0x01,0x00,0x80,0x05] +0x00,0x00,0x80,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_ubyte_d16 v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x80,0xdc,0x01,0x00,0x80,0x05] +0x07,0x00,0x80,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_ubyte_d16 v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x81,0xdc,0x01,0x00,0x80,0x05] +0xff,0x0f,0x81,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_ubyte_d16 v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x82,0xdc,0x01,0x00,0x80,0x05] +0xff,0x0f,0x82,0xdc,0x01,0x00,0x80,0x05 + # CHECK: flat_load_ubyte_d16_hi v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x84,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x84,0xdc,0x01,0x00,0x00,0x05 @@ -378,6 +582,18 @@ # CHECK: flat_load_ubyte_d16_hi v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x86,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x86,0xdc,0x01,0x00,0x00,0x05 +# CHECK: flat_load_ubyte_d16_hi v5, v[1:2] nv ; encoding: [0x00,0x00,0x84,0xdc,0x01,0x00,0x80,0x05] +0x00,0x00,0x84,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_ubyte_d16_hi v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x84,0xdc,0x01,0x00,0x80,0x05] +0x07,0x00,0x84,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_ubyte_d16_hi v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x85,0xdc,0x01,0x00,0x80,0x05] +0xff,0x0f,0x85,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_ubyte_d16_hi v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x86,0xdc,0x01,0x00,0x80,0x05] +0xff,0x0f,0x86,0xdc,0x01,0x00,0x80,0x05 + # CHECK: flat_load_sbyte_d16 v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x88,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x88,0xdc,0x01,0x00,0x00,0x05 @@ -399,6 +615,18 @@ # CHECK: flat_load_sbyte_d16 v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x8a,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x8a,0xdc,0x01,0x00,0x00,0x05 +# CHECK: flat_load_sbyte_d16 v5, v[1:2] nv ; encoding: [0x00,0x00,0x88,0xdc,0x01,0x00,0x80,0x05] +0x00,0x00,0x88,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_sbyte_d16 v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x88,0xdc,0x01,0x00,0x80,0x05] +0x07,0x00,0x88,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_sbyte_d16 v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x89,0xdc,0x01,0x00,0x80,0x05] +0xff,0x0f,0x89,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_sbyte_d16 v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x8a,0xdc,0x01,0x00,0x80,0x05] +0xff,0x0f,0x8a,0xdc,0x01,0x00,0x80,0x05 + # CHECK: flat_load_sbyte_d16_hi v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x8c,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x8c,0xdc,0x01,0x00,0x00,0x05 @@ -420,6 +648,18 @@ # CHECK: flat_load_sbyte_d16_hi v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x8e,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x8e,0xdc,0x01,0x00,0x00,0x05 +# CHECK: flat_load_sbyte_d16_hi v5, v[1:2] nv ; encoding: [0x00,0x00,0x8c,0xdc,0x01,0x00,0x80,0x05] +0x00,0x00,0x8c,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_sbyte_d16_hi v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x8c,0xdc,0x01,0x00,0x80,0x05] +0x07,0x00,0x8c,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_sbyte_d16_hi v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x8d,0xdc,0x01,0x00,0x80,0x05] +0xff,0x0f,0x8d,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_sbyte_d16_hi v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x8e,0xdc,0x01,0x00,0x80,0x05] +0xff,0x0f,0x8e,0xdc,0x01,0x00,0x80,0x05 + # CHECK: flat_load_short_d16 v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x90,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x90,0xdc,0x01,0x00,0x00,0x05 @@ -441,6 +681,18 @@ # CHECK: flat_load_short_d16 v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x92,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x92,0xdc,0x01,0x00,0x00,0x05 +# CHECK: flat_load_short_d16 v5, v[1:2] nv ; encoding: [0x00,0x00,0x90,0xdc,0x01,0x00,0x80,0x05] +0x00,0x00,0x90,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_short_d16 v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x90,0xdc,0x01,0x00,0x80,0x05] +0x07,0x00,0x90,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_short_d16 v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x91,0xdc,0x01,0x00,0x80,0x05] +0xff,0x0f,0x91,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_short_d16 v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x92,0xdc,0x01,0x00,0x80,0x05] +0xff,0x0f,0x92,0xdc,0x01,0x00,0x80,0x05 + # CHECK: flat_load_short_d16_hi v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x94,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x94,0xdc,0x01,0x00,0x00,0x05 @@ -462,6 +714,18 @@ # CHECK: flat_load_short_d16_hi v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x96,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x96,0xdc,0x01,0x00,0x00,0x05 +# CHECK: flat_load_short_d16_hi v5, v[1:2] nv ; encoding: [0x00,0x00,0x94,0xdc,0x01,0x00,0x80,0x05] +0x00,0x00,0x94,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_short_d16_hi v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x94,0xdc,0x01,0x00,0x80,0x05] +0x07,0x00,0x94,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_short_d16_hi v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x95,0xdc,0x01,0x00,0x80,0x05] +0xff,0x0f,0x95,0xdc,0x01,0x00,0x80,0x05 + +# CHECK: flat_load_short_d16_hi v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x96,0xdc,0x01,0x00,0x80,0x05] +0xff,0x0f,0x96,0xdc,0x01,0x00,0x80,0x05 + # CHECK: flat_atomic_swap v[1:2], v2 offset:4095 ; encoding: [0xff,0x0f,0x00,0xdd,0x01,0x02,0x00,0x00] 0xff,0x0f,0x00,0xdd,0x01,0x02,0x00,0x00 @@ -483,6 +747,18 @@ # CHECK: flat_atomic_swap v[1:2], v2 offset:4095 slc ; encoding: [0xff,0x0f,0x02,0xdd,0x01,0x02,0x00,0x00] 0xff,0x0f,0x02,0xdd,0x01,0x02,0x00,0x00 +# CHECK: flat_atomic_swap v[1:2], v2 nv ; encoding: [0x00,0x00,0x00,0xdd,0x01,0x02,0x80,0x00] +0x00,0x00,0x00,0xdd,0x01,0x02,0x80,0x00 + +# CHECK: flat_atomic_swap v[1:2], v2 offset:7 nv ; encoding: [0x07,0x00,0x00,0xdd,0x01,0x02,0x80,0x00] +0x07,0x00,0x00,0xdd,0x01,0x02,0x80,0x00 + +# CHECK: flat_atomic_swap v0, v[1:2], v2 offset:4095 glc nv ; encoding: [0xff,0x0f,0x01,0xdd,0x01,0x02,0x80,0x00] +0xff,0x0f,0x01,0xdd,0x01,0x02,0x80,0x00 + +# CHECK: flat_atomic_swap v[1:2], v2 offset:4095 slc nv ; encoding: [0xff,0x0f,0x02,0xdd,0x01,0x02,0x80,0x00] +0xff,0x0f,0x02,0xdd,0x01,0x02,0x80,0x00 + # CHECK: flat_atomic_cmpswap v[1:2], v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x04,0xdd,0x01,0x02,0x00,0x00] 0xff,0x0f,0x04,0xdd,0x01,0x02,0x00,0x00 @@ -504,6 +780,18 @@ # CHECK: flat_atomic_cmpswap v[1:2], v[2:3] offset:4095 slc ; encoding: [0xff,0x0f,0x06,0xdd,0x01,0x02,0x00,0x00] 0xff,0x0f,0x06,0xdd,0x01,0x02,0x00,0x00 +# CHECK: flat_atomic_cmpswap v[1:2], v[2:3] nv ; encoding: [0x00,0x00,0x04,0xdd,0x01,0x02,0x80,0x00] +0x00,0x00,0x04,0xdd,0x01,0x02,0x80,0x00 + +# CHECK: flat_atomic_cmpswap v[1:2], v[2:3] offset:7 nv ; encoding: [0x07,0x00,0x04,0xdd,0x01,0x02,0x80,0x00] +0x07,0x00,0x04,0xdd,0x01,0x02,0x80,0x00 + +# CHECK: flat_atomic_cmpswap v0, v[1:2], v[2:3] offset:4095 glc nv ; encoding: [0xff,0x0f,0x05,0xdd,0x01,0x02,0x80,0x00] +0xff,0x0f,0x05,0xdd,0x01,0x02,0x80,0x00 + +# CHECK: flat_atomic_cmpswap v[1:2], v[2:3] offset:4095 slc nv ; encoding: [0xff,0x0f,0x06,0xdd,0x01,0x02,0x80,0x00] +0xff,0x0f,0x06,0xdd,0x01,0x02,0x80,0x00 + # CHECK: flat_atomic_add v[1:2], v2 offset:4095 ; encoding: [0xff,0x0f,0x08,0xdd,0x01,0x02,0x00,0x00] 0xff,0x0f,0x08,0xdd,0x01,0x02,0x00,0x00 @@ -525,6 +813,18 @@ # CHECK: flat_atomic_add v[1:2], v2 offset:4095 slc ; encoding: [0xff,0x0f,0x0a,0xdd,0x01,0x02,0x00,0x00] 0xff,0x0f,0x0a,0xdd,0x01,0x02,0x00,0x00 +# CHECK: flat_atomic_add v[1:2], v2 nv ; encoding: [0x00,0x00,0x08,0xdd,0x01,0x02,0x80,0x00] +0x00,0x00,0x08,0xdd,0x01,0x02,0x80,0x00 + +# CHECK: flat_atomic_add v[1:2], v2 offset:7 nv ; encoding: [0x07,0x00,0x08,0xdd,0x01,0x02,0x80,0x00] +0x07,0x00,0x08,0xdd,0x01,0x02,0x80,0x00 + +# CHECK: flat_atomic_add v0, v[1:2], v2 offset:4095 glc nv ; encoding: [0xff,0x0f,0x09,0xdd,0x01,0x02,0x80,0x00] +0xff,0x0f,0x09,0xdd,0x01,0x02,0x80,0x00 + +# CHECK: flat_atomic_add v[1:2], v2 offset:4095 slc nv ; encoding: [0xff,0x0f,0x0a,0xdd,0x01,0x02,0x80,0x00] +0xff,0x0f,0x0a,0xdd,0x01,0x02,0x80,0x00 + # CHECK: flat_atomic_sub v[1:2], v2 offset:4095 ; encoding: [0xff,0x0f,0x0c,0xdd,0x01,0x02,0x00,0x00] 0xff,0x0f,0x0c,0xdd,0x01,0x02,0x00,0x00 @@ -1017,6 +1317,18 @@ # CHECK: global_load_ubyte v5, v[1:2], off ; encoding: [0x00,0x80,0x40,0xdc,0x01,0x00,0x7f,0x05] 0x00,0x80,0x40,0xdc,0x01,0x00,0x7f,0x05 +# CHECK: global_load_ubyte v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x40,0xdc,0x01,0x00,0x84,0x05] +0x00,0x80,0x40,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_ubyte v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x40,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x40,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_ubyte v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x41,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x41,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_ubyte v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x42,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x42,0xdc,0x01,0x00,0x84,0x05 + # CHECK: global_load_sbyte v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x44,0xdc,0x01,0x00,0x7f,0x05] 0xff,0x9f,0x44,0xdc,0x01,0x00,0x7f,0x05 @@ -1026,6 +1338,18 @@ # CHECK: global_load_sbyte v5, v[1:2], off ; encoding: [0x00,0x80,0x44,0xdc,0x01,0x00,0x7f,0x05] 0x00,0x80,0x44,0xdc,0x01,0x00,0x7f,0x05 +# CHECK: global_load_sbyte v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x44,0xdc,0x01,0x00,0x84,0x05] +0x00,0x80,0x44,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_sbyte v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x44,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x44,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_sbyte v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x45,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x45,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_sbyte v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x46,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x46,0xdc,0x01,0x00,0x84,0x05 + # CHECK: global_load_ushort v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x48,0xdc,0x01,0x00,0x7f,0x05] 0xff,0x9f,0x48,0xdc,0x01,0x00,0x7f,0x05 @@ -1035,6 +1359,18 @@ # CHECK: global_load_ushort v5, v[1:2], off ; encoding: [0x00,0x80,0x48,0xdc,0x01,0x00,0x7f,0x05] 0x00,0x80,0x48,0xdc,0x01,0x00,0x7f,0x05 +# CHECK: global_load_ushort v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x48,0xdc,0x01,0x00,0x84,0x05] +0x00,0x80,0x48,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_ushort v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x48,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x48,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_ushort v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x49,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x49,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_ushort v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x4a,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x4a,0xdc,0x01,0x00,0x84,0x05 + # CHECK: global_load_sshort v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x4c,0xdc,0x01,0x00,0x7f,0x05] 0xff,0x9f,0x4c,0xdc,0x01,0x00,0x7f,0x05 @@ -1044,6 +1380,18 @@ # CHECK: global_load_sshort v5, v[1:2], off ; encoding: [0x00,0x80,0x4c,0xdc,0x01,0x00,0x7f,0x05] 0x00,0x80,0x4c,0xdc,0x01,0x00,0x7f,0x05 +# CHECK: global_load_sshort v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x4c,0xdc,0x01,0x00,0x84,0x05] +0x00,0x80,0x4c,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_sshort v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x4c,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x4c,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_sshort v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x4d,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x4d,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_sshort v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x4e,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x4e,0xdc,0x01,0x00,0x84,0x05 + # CHECK: global_load_dword v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x50,0xdc,0x01,0x00,0x7f,0x05] 0xff,0x9f,0x50,0xdc,0x01,0x00,0x7f,0x05 @@ -1053,6 +1401,18 @@ # CHECK: global_load_dword v5, v[1:2], off ; encoding: [0x00,0x80,0x50,0xdc,0x01,0x00,0x7f,0x05] 0x00,0x80,0x50,0xdc,0x01,0x00,0x7f,0x05 +# CHECK: global_load_dword v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x50,0xdc,0x01,0x00,0x84,0x05] +0x00,0x80,0x50,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_dword v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x50,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x50,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_dword v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x51,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x51,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_dword v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x52,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x52,0xdc,0x01,0x00,0x84,0x05 + # CHECK: global_load_dwordx2 v[5:6], v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x54,0xdc,0x01,0x00,0x7f,0x05] 0xff,0x9f,0x54,0xdc,0x01,0x00,0x7f,0x05 @@ -1062,6 +1422,18 @@ # CHECK: global_load_dwordx2 v[5:6], v[1:2], off ; encoding: [0x00,0x80,0x54,0xdc,0x01,0x00,0x7f,0x05] 0x00,0x80,0x54,0xdc,0x01,0x00,0x7f,0x05 +# CHECK: global_load_dwordx2 v[5:6], v1, s[4:5] nv ; encoding: [0x00,0x80,0x54,0xdc,0x01,0x00,0x84,0x05] +0x00,0x80,0x54,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_dwordx2 v[5:6], v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x54,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x54,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_dwordx2 v[5:6], v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x55,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x55,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_dwordx2 v[5:6], v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x56,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x56,0xdc,0x01,0x00,0x84,0x05 + # CHECK: global_load_dwordx3 v[5:7], v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x58,0xdc,0x01,0x00,0x7f,0x05] 0xff,0x9f,0x58,0xdc,0x01,0x00,0x7f,0x05 @@ -1071,6 +1443,18 @@ # CHECK: global_load_dwordx3 v[5:7], v[1:2], off ; encoding: [0x00,0x80,0x58,0xdc,0x01,0x00,0x7f,0x05] 0x00,0x80,0x58,0xdc,0x01,0x00,0x7f,0x05 +# CHECK: global_load_dwordx3 v[5:7], v1, s[4:5] nv ; encoding: [0x00,0x80,0x58,0xdc,0x01,0x00,0x84,0x05] +0x00,0x80,0x58,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_dwordx3 v[5:7], v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x58,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x58,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_dwordx3 v[5:7], v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x59,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x59,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_dwordx3 v[5:7], v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x5a,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x5a,0xdc,0x01,0x00,0x84,0x05 + # CHECK: global_load_dwordx4 v[5:8], v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x5c,0xdc,0x01,0x00,0x7f,0x05] 0xff,0x9f,0x5c,0xdc,0x01,0x00,0x7f,0x05 @@ -1080,6 +1464,18 @@ # CHECK: global_load_dwordx4 v[5:8], v[1:2], off ; encoding: [0x00,0x80,0x5c,0xdc,0x01,0x00,0x7f,0x05] 0x00,0x80,0x5c,0xdc,0x01,0x00,0x7f,0x05 +# CHECK: global_load_dwordx4 v[5:8], v1, s[4:5] nv ; encoding: [0x00,0x80,0x5c,0xdc,0x01,0x00,0x84,0x05] +0x00,0x80,0x5c,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_dwordx4 v[5:8], v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x5c,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x5c,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_dwordx4 v[5:8], v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x5d,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x5d,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_dwordx4 v[5:8], v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x5e,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x5e,0xdc,0x01,0x00,0x84,0x05 + # CHECK: global_store_byte v[1:2], v2, off offset:-1 ; encoding: [0xff,0x9f,0x60,0xdc,0x01,0x02,0x7f,0x00] 0xff,0x9f,0x60,0xdc,0x01,0x02,0x7f,0x00 @@ -1089,6 +1485,18 @@ # CHECK: global_store_byte v[1:2], v2, off ; encoding: [0x00,0x80,0x60,0xdc,0x01,0x02,0x7f,0x00] 0x00,0x80,0x60,0xdc,0x01,0x02,0x7f,0x00 +# CHECK: global_store_byte v1, v2, s[6:7] nv ; encoding: [0x00,0x80,0x60,0xdc,0x01,0x02,0x86,0x00] +0x00,0x80,0x60,0xdc,0x01,0x02,0x86,0x00 + +# CHECK: global_store_byte v1, v2, s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x60,0xdc,0x01,0x02,0x86,0x00] +0xff,0x9f,0x60,0xdc,0x01,0x02,0x86,0x00 + +# CHECK: global_store_byte v1, v2, s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x61,0xdc,0x01,0x02,0x86,0x00] +0xff,0x9f,0x61,0xdc,0x01,0x02,0x86,0x00 + +# CHECK: global_store_byte v1, v2, s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x62,0xdc,0x01,0x02,0x86,0x00] +0xff,0x9f,0x62,0xdc,0x01,0x02,0x86,0x00 + # CHECK: global_store_byte_d16_hi v[1:2], v2, off offset:-1 ; encoding: [0xff,0x9f,0x64,0xdc,0x01,0x02,0x7f,0x00] 0xff,0x9f,0x64,0xdc,0x01,0x02,0x7f,0x00 @@ -1098,6 +1506,18 @@ # CHECK: global_store_byte_d16_hi v[1:2], v2, off ; encoding: [0x00,0x80,0x64,0xdc,0x01,0x02,0x7f,0x00] 0x00,0x80,0x64,0xdc,0x01,0x02,0x7f,0x00 +# CHECK: global_store_byte_d16_hi v1, v2, s[6:7] nv ; encoding: [0x00,0x80,0x64,0xdc,0x01,0x02,0x86,0x00] +0x00,0x80,0x64,0xdc,0x01,0x02,0x86,0x00 + +# CHECK: global_store_byte_d16_hi v1, v2, s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x64,0xdc,0x01,0x02,0x86,0x00] +0xff,0x9f,0x64,0xdc,0x01,0x02,0x86,0x00 + +# CHECK: global_store_byte_d16_hi v1, v2, s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x65,0xdc,0x01,0x02,0x86,0x00] +0xff,0x9f,0x65,0xdc,0x01,0x02,0x86,0x00 + +# CHECK: global_store_byte_d16_hi v1, v2, s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x66,0xdc,0x01,0x02,0x86,0x00] +0xff,0x9f,0x66,0xdc,0x01,0x02,0x86,0x00 + # CHECK: global_store_short v[1:2], v2, off offset:-1 ; encoding: [0xff,0x9f,0x68,0xdc,0x01,0x02,0x7f,0x00] 0xff,0x9f,0x68,0xdc,0x01,0x02,0x7f,0x00 @@ -1107,6 +1527,18 @@ # CHECK: global_store_short v[1:2], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x01,0x02,0x7f,0x00] 0x00,0x80,0x68,0xdc,0x01,0x02,0x7f,0x00 +# CHECK: global_store_short v1, v2, s[6:7] nv ; encoding: [0x00,0x80,0x68,0xdc,0x01,0x02,0x86,0x00] +0x00,0x80,0x68,0xdc,0x01,0x02,0x86,0x00 + +# CHECK: global_store_short v1, v2, s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x68,0xdc,0x01,0x02,0x86,0x00] +0xff,0x9f,0x68,0xdc,0x01,0x02,0x86,0x00 + +# CHECK: global_store_short v1, v2, s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x69,0xdc,0x01,0x02,0x86,0x00] +0xff,0x9f,0x69,0xdc,0x01,0x02,0x86,0x00 + +# CHECK: global_store_short v1, v2, s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x6a,0xdc,0x01,0x02,0x86,0x00] +0xff,0x9f,0x6a,0xdc,0x01,0x02,0x86,0x00 + # CHECK: global_store_short_d16_hi v[1:2], v2, off offset:-1 ; encoding: [0xff,0x9f,0x6c,0xdc,0x01,0x02,0x7f,0x00] 0xff,0x9f,0x6c,0xdc,0x01,0x02,0x7f,0x00 @@ -1116,6 +1548,18 @@ # CHECK: global_store_short_d16_hi v[1:2], v2, off ; encoding: [0x00,0x80,0x6c,0xdc,0x01,0x02,0x7f,0x00] 0x00,0x80,0x6c,0xdc,0x01,0x02,0x7f,0x00 +# CHECK: global_store_short_d16_hi v1, v2, s[6:7] nv ; encoding: [0x00,0x80,0x6c,0xdc,0x01,0x02,0x86,0x00] +0x00,0x80,0x6c,0xdc,0x01,0x02,0x86,0x00 + +# CHECK: global_store_short_d16_hi v1, v2, s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x6c,0xdc,0x01,0x02,0x86,0x00] +0xff,0x9f,0x6c,0xdc,0x01,0x02,0x86,0x00 + +# CHECK: global_store_short_d16_hi v1, v2, s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x6d,0xdc,0x01,0x02,0x86,0x00] +0xff,0x9f,0x6d,0xdc,0x01,0x02,0x86,0x00 + +# CHECK: global_store_short_d16_hi v1, v2, s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x6e,0xdc,0x01,0x02,0x86,0x00] +0xff,0x9f,0x6e,0xdc,0x01,0x02,0x86,0x00 + # CHECK: global_store_dword v[1:2], v2, off offset:-1 ; encoding: [0xff,0x9f,0x70,0xdc,0x01,0x02,0x7f,0x00] 0xff,0x9f,0x70,0xdc,0x01,0x02,0x7f,0x00 @@ -1125,6 +1569,18 @@ # CHECK: global_store_dword v[1:2], v2, off ; encoding: [0x00,0x80,0x70,0xdc,0x01,0x02,0x7f,0x00] 0x00,0x80,0x70,0xdc,0x01,0x02,0x7f,0x00 +# CHECK: global_store_dword v1, v2, s[6:7] nv ; encoding: [0x00,0x80,0x70,0xdc,0x01,0x02,0x86,0x00] +0x00,0x80,0x70,0xdc,0x01,0x02,0x86,0x00 + +# CHECK: global_store_dword v1, v2, s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x70,0xdc,0x01,0x02,0x86,0x00] +0xff,0x9f,0x70,0xdc,0x01,0x02,0x86,0x00 + +# CHECK: global_store_dword v1, v2, s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x71,0xdc,0x01,0x02,0x86,0x00] +0xff,0x9f,0x71,0xdc,0x01,0x02,0x86,0x00 + +# CHECK: global_store_dword v1, v2, s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x72,0xdc,0x01,0x02,0x86,0x00] +0xff,0x9f,0x72,0xdc,0x01,0x02,0x86,0x00 + # CHECK: global_store_dwordx2 v[1:2], v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x74,0xdc,0x01,0x02,0x7f,0x00] 0xff,0x9f,0x74,0xdc,0x01,0x02,0x7f,0x00 @@ -1134,6 +1590,18 @@ # CHECK: global_store_dwordx2 v[1:2], v[2:3], off ; encoding: [0x00,0x80,0x74,0xdc,0x01,0x02,0x7f,0x00] 0x00,0x80,0x74,0xdc,0x01,0x02,0x7f,0x00 +# CHECK: global_store_dwordx2 v1, v[2:3], s[6:7] nv ; encoding: [0x00,0x80,0x74,0xdc,0x01,0x02,0x86,0x00] +0x00,0x80,0x74,0xdc,0x01,0x02,0x86,0x00 + +# CHECK: global_store_dwordx2 v1, v[2:3], s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x74,0xdc,0x01,0x02,0x86,0x00] +0xff,0x9f,0x74,0xdc,0x01,0x02,0x86,0x00 + +# CHECK: global_store_dwordx2 v1, v[2:3], s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x75,0xdc,0x01,0x02,0x86,0x00] +0xff,0x9f,0x75,0xdc,0x01,0x02,0x86,0x00 + +# CHECK: global_store_dwordx2 v1, v[2:3], s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x76,0xdc,0x01,0x02,0x86,0x00] +0xff,0x9f,0x76,0xdc,0x01,0x02,0x86,0x00 + # CHECK: global_store_dwordx3 v[1:2], v[2:4], off offset:-1 ; encoding: [0xff,0x9f,0x78,0xdc,0x01,0x02,0x7f,0x00] 0xff,0x9f,0x78,0xdc,0x01,0x02,0x7f,0x00 @@ -1143,6 +1611,18 @@ # CHECK: global_store_dwordx3 v[1:2], v[2:4], off ; encoding: [0x00,0x80,0x78,0xdc,0x01,0x02,0x7f,0x00] 0x00,0x80,0x78,0xdc,0x01,0x02,0x7f,0x00 +# CHECK: global_store_dwordx3 v1, v[2:4], s[6:7] nv ; encoding: [0x00,0x80,0x78,0xdc,0x01,0x02,0x86,0x00] +0x00,0x80,0x78,0xdc,0x01,0x02,0x86,0x00 + +# CHECK: global_store_dwordx3 v1, v[2:4], s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x78,0xdc,0x01,0x02,0x86,0x00] +0xff,0x9f,0x78,0xdc,0x01,0x02,0x86,0x00 + +# CHECK: global_store_dwordx3 v1, v[2:4], s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x79,0xdc,0x01,0x02,0x86,0x00] +0xff,0x9f,0x79,0xdc,0x01,0x02,0x86,0x00 + +# CHECK: global_store_dwordx3 v1, v[2:4], s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x7a,0xdc,0x01,0x02,0x86,0x00] +0xff,0x9f,0x7a,0xdc,0x01,0x02,0x86,0x00 + # CHECK: global_store_dwordx4 v[1:2], v[2:5], off offset:-1 ; encoding: [0xff,0x9f,0x7c,0xdc,0x01,0x02,0x7f,0x00] 0xff,0x9f,0x7c,0xdc,0x01,0x02,0x7f,0x00 @@ -1152,6 +1632,18 @@ # CHECK: global_store_dwordx4 v[1:2], v[2:5], off ; encoding: [0x00,0x80,0x7c,0xdc,0x01,0x02,0x7f,0x00] 0x00,0x80,0x7c,0xdc,0x01,0x02,0x7f,0x00 +# CHECK: global_store_dwordx4 v1, v[2:5], s[6:7] nv ; encoding: [0x00,0x80,0x7c,0xdc,0x01,0x02,0x86,0x00] +0x00,0x80,0x7c,0xdc,0x01,0x02,0x86,0x00 + +# CHECK: global_store_dwordx4 v1, v[2:5], s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x7c,0xdc,0x01,0x02,0x86,0x00] +0xff,0x9f,0x7c,0xdc,0x01,0x02,0x86,0x00 + +# CHECK: global_store_dwordx4 v1, v[2:5], s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x7d,0xdc,0x01,0x02,0x86,0x00] +0xff,0x9f,0x7d,0xdc,0x01,0x02,0x86,0x00 + +# CHECK: global_store_dwordx4 v1, v[2:5], s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x7e,0xdc,0x01,0x02,0x86,0x00] +0xff,0x9f,0x7e,0xdc,0x01,0x02,0x86,0x00 + # CHECK: global_load_ubyte_d16 v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x80,0xdc,0x01,0x00,0x7f,0x05] 0xff,0x9f,0x80,0xdc,0x01,0x00,0x7f,0x05 @@ -1161,6 +1653,18 @@ # CHECK: global_load_ubyte_d16 v5, v[1:2], off ; encoding: [0x00,0x80,0x80,0xdc,0x01,0x00,0x7f,0x05] 0x00,0x80,0x80,0xdc,0x01,0x00,0x7f,0x05 +# CHECK: global_load_ubyte_d16 v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x80,0xdc,0x01,0x00,0x84,0x05] +0x00,0x80,0x80,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_ubyte_d16 v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x80,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x80,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_ubyte_d16 v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x81,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x81,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_ubyte_d16 v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x82,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x82,0xdc,0x01,0x00,0x84,0x05 + # CHECK: global_load_ubyte_d16_hi v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x84,0xdc,0x01,0x00,0x7f,0x05] 0xff,0x9f,0x84,0xdc,0x01,0x00,0x7f,0x05 @@ -1170,6 +1674,18 @@ # CHECK: global_load_ubyte_d16_hi v5, v[1:2], off ; encoding: [0x00,0x80,0x84,0xdc,0x01,0x00,0x7f,0x05] 0x00,0x80,0x84,0xdc,0x01,0x00,0x7f,0x05 +# CHECK: global_load_ubyte_d16_hi v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x84,0xdc,0x01,0x00,0x84,0x05] +0x00,0x80,0x84,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_ubyte_d16_hi v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x84,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x84,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_ubyte_d16_hi v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x85,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x85,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_ubyte_d16_hi v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x86,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x86,0xdc,0x01,0x00,0x84,0x05 + # CHECK: global_load_sbyte_d16 v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x88,0xdc,0x01,0x00,0x7f,0x05] 0xff,0x9f,0x88,0xdc,0x01,0x00,0x7f,0x05 @@ -1179,6 +1695,18 @@ # CHECK: global_load_sbyte_d16 v5, v[1:2], off ; encoding: [0x00,0x80,0x88,0xdc,0x01,0x00,0x7f,0x05] 0x00,0x80,0x88,0xdc,0x01,0x00,0x7f,0x05 +# CHECK: global_load_sbyte_d16 v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x88,0xdc,0x01,0x00,0x84,0x05] +0x00,0x80,0x88,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_sbyte_d16 v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x88,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x88,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_sbyte_d16 v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x89,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x89,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_sbyte_d16 v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x8a,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x8a,0xdc,0x01,0x00,0x84,0x05 + # CHECK: global_load_sbyte_d16_hi v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x8c,0xdc,0x01,0x00,0x7f,0x05] 0xff,0x9f,0x8c,0xdc,0x01,0x00,0x7f,0x05 @@ -1188,6 +1716,18 @@ # CHECK: global_load_sbyte_d16_hi v5, v[1:2], off ; encoding: [0x00,0x80,0x8c,0xdc,0x01,0x00,0x7f,0x05] 0x00,0x80,0x8c,0xdc,0x01,0x00,0x7f,0x05 +# CHECK: global_load_sbyte_d16_hi v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x8c,0xdc,0x01,0x00,0x84,0x05] +0x00,0x80,0x8c,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_sbyte_d16_hi v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x8c,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x8c,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_sbyte_d16_hi v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x8d,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x8d,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_sbyte_d16_hi v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x8e,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x8e,0xdc,0x01,0x00,0x84,0x05 + # CHECK: global_load_short_d16 v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x90,0xdc,0x01,0x00,0x7f,0x05] 0xff,0x9f,0x90,0xdc,0x01,0x00,0x7f,0x05 @@ -1197,6 +1737,18 @@ # CHECK: global_load_short_d16 v5, v[1:2], off ; encoding: [0x00,0x80,0x90,0xdc,0x01,0x00,0x7f,0x05] 0x00,0x80,0x90,0xdc,0x01,0x00,0x7f,0x05 +# CHECK: global_load_short_d16 v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x90,0xdc,0x01,0x00,0x84,0x05] +0x00,0x80,0x90,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_short_d16 v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x90,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x90,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_short_d16 v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x91,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x91,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_short_d16 v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x92,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x92,0xdc,0x01,0x00,0x84,0x05 + # CHECK: global_load_short_d16_hi v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x94,0xdc,0x01,0x00,0x7f,0x05] 0xff,0x9f,0x94,0xdc,0x01,0x00,0x7f,0x05 @@ -1206,6 +1758,18 @@ # CHECK: global_load_short_d16_hi v5, v[1:2], off ; encoding: [0x00,0x80,0x94,0xdc,0x01,0x00,0x7f,0x05] 0x00,0x80,0x94,0xdc,0x01,0x00,0x7f,0x05 +# CHECK: global_load_short_d16_hi v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x94,0xdc,0x01,0x00,0x84,0x05] +0x00,0x80,0x94,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_short_d16_hi v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x94,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x94,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_short_d16_hi v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x95,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x95,0xdc,0x01,0x00,0x84,0x05 + +# CHECK: global_load_short_d16_hi v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x96,0xdc,0x01,0x00,0x84,0x05] +0xff,0x9f,0x96,0xdc,0x01,0x00,0x84,0x05 + # CHECK: global_atomic_swap v[1:2], v2, off offset:-1 ; encoding: [0xff,0x9f,0x00,0xdd,0x01,0x02,0x7f,0x00] 0xff,0x9f,0x00,0xdd,0x01,0x02,0x7f,0x00 @@ -1215,6 +1779,18 @@ # CHECK: global_atomic_swap v[1:2], v2, off ; encoding: [0x00,0x80,0x00,0xdd,0x01,0x02,0x7f,0x00] 0x00,0x80,0x00,0xdd,0x01,0x02,0x7f,0x00 +# CHECK: global_atomic_swap v1, v2, s[6:7] nv ; encoding: [0x00,0x80,0x00,0xdd,0x01,0x02,0x86,0x00] +0x00,0x80,0x00,0xdd,0x01,0x02,0x86,0x00 + +# CHECK: global_atomic_swap v1, v2, s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x00,0xdd,0x01,0x02,0x86,0x00] +0xff,0x9f,0x00,0xdd,0x01,0x02,0x86,0x00 + +# CHECK: global_atomic_swap v0, v1, v2, s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x01,0xdd,0x01,0x02,0x86,0x00] +0xff,0x9f,0x01,0xdd,0x01,0x02,0x86,0x00 + +# CHECK: global_atomic_swap v1, v2, s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x02,0xdd,0x01,0x02,0x86,0x00] +0xff,0x9f,0x02,0xdd,0x01,0x02,0x86,0x00 + # CHECK: global_atomic_cmpswap v[1:2], v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x04,0xdd,0x01,0x02,0x7f,0x00] 0xff,0x9f,0x04,0xdd,0x01,0x02,0x7f,0x00 @@ -1236,6 +1812,18 @@ # CHECK: global_atomic_cmpswap v1, v[2:3], v[4:5], off glc ; encoding: [0x00,0x80,0x05,0xdd,0x02,0x04,0x7f,0x01] 0x00,0x80,0x05,0xdd,0x02,0x04,0x7f,0x01 +# CHECK: global_atomic_cmpswap v1, v[2:3], s[6:7] nv ; encoding: [0x00,0x80,0x04,0xdd,0x01,0x02,0x86,0x00] +0x00,0x80,0x04,0xdd,0x01,0x02,0x86,0x00 + +# CHECK: global_atomic_cmpswap v1, v[2:3], s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x04,0xdd,0x01,0x02,0x86,0x00] +0xff,0x9f,0x04,0xdd,0x01,0x02,0x86,0x00 + +# CHECK: global_atomic_cmpswap v0, v1, v[2:3], s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x05,0xdd,0x01,0x02,0x86,0x00] +0xff,0x9f,0x05,0xdd,0x01,0x02,0x86,0x00 + +# CHECK: global_atomic_cmpswap v1, v[2:3], s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x06,0xdd,0x01,0x02,0x86,0x00] +0xff,0x9f,0x06,0xdd,0x01,0x02,0x86,0x00 + # CHECK: global_atomic_add v[1:2], v2, off offset:-1 ; encoding: [0xff,0x9f,0x08,0xdd,0x01,0x02,0x7f,0x00] 0xff,0x9f,0x08,0xdd,0x01,0x02,0x7f,0x00 @@ -1245,6 +1833,18 @@ # CHECK: global_atomic_add v[1:2], v2, off ; encoding: [0x00,0x80,0x08,0xdd,0x01,0x02,0x7f,0x00] 0x00,0x80,0x08,0xdd,0x01,0x02,0x7f,0x00 +# CHECK: global_atomic_add v1, v2, s[6:7] nv ; encoding: [0x00,0x80,0x08,0xdd,0x01,0x02,0x86,0x00] +0x00,0x80,0x08,0xdd,0x01,0x02,0x86,0x00 + +# CHECK: global_atomic_add v1, v2, s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x08,0xdd,0x01,0x02,0x86,0x00] +0xff,0x9f,0x08,0xdd,0x01,0x02,0x86,0x00 + +# CHECK: global_atomic_add v0, v1, v2, s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x09,0xdd,0x01,0x02,0x86,0x00] +0xff,0x9f,0x09,0xdd,0x01,0x02,0x86,0x00 + +# CHECK: global_atomic_add v1, v2, s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x0a,0xdd,0x01,0x02,0x86,0x00] +0xff,0x9f,0x0a,0xdd,0x01,0x02,0x86,0x00 + # CHECK: global_atomic_sub v[1:2], v2, off offset:-1 ; encoding: [0xff,0x9f,0x0c,0xdd,0x01,0x02,0x7f,0x00] 0xff,0x9f,0x0c,0xdd,0x01,0x02,0x7f,0x00 @@ -1503,6 +2103,18 @@ # CHECK: scratch_load_ubyte v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x42,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x42,0xdc,0x00,0x00,0x02,0x05 +# CHECK: scratch_load_ubyte v5, off, s2 nv ; encoding: [0x00,0x40,0x40,0xdc,0x00,0x00,0x82,0x05] +0x00,0x40,0x40,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_ubyte v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x40,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x40,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_ubyte v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x41,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x41,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_ubyte v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x42,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x42,0xdc,0x00,0x00,0x82,0x05 + # CHECK: scratch_load_sbyte v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x44,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x44,0xdc,0x00,0x00,0x02,0x05 @@ -1542,6 +2154,18 @@ # CHECK: scratch_load_sbyte v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x46,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x46,0xdc,0x00,0x00,0x02,0x05 +# CHECK: scratch_load_sbyte v5, off, s2 nv ; encoding: [0x00,0x40,0x44,0xdc,0x00,0x00,0x82,0x05] +0x00,0x40,0x44,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_sbyte v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x44,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x44,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_sbyte v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x45,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x45,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_sbyte v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x46,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x46,0xdc,0x00,0x00,0x82,0x05 + # CHECK: scratch_load_ushort v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x48,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x48,0xdc,0x00,0x00,0x02,0x05 @@ -1581,6 +2205,18 @@ # CHECK: scratch_load_ushort v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x4a,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x4a,0xdc,0x00,0x00,0x02,0x05 +# CHECK: scratch_load_ushort v5, off, s2 nv ; encoding: [0x00,0x40,0x48,0xdc,0x00,0x00,0x82,0x05] +0x00,0x40,0x48,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_ushort v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x48,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x48,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_ushort v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x49,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x49,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_ushort v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x4a,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x4a,0xdc,0x00,0x00,0x82,0x05 + # CHECK: scratch_load_sshort v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x4c,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x4c,0xdc,0x00,0x00,0x02,0x05 @@ -1620,6 +2256,18 @@ # CHECK: scratch_load_sshort v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x4e,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x4e,0xdc,0x00,0x00,0x02,0x05 +# CHECK: scratch_load_sshort v5, off, s2 nv ; encoding: [0x00,0x40,0x4c,0xdc,0x00,0x00,0x82,0x05] +0x00,0x40,0x4c,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_sshort v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x4c,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x4c,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_sshort v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x4d,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x4d,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_sshort v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x4e,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x4e,0xdc,0x00,0x00,0x82,0x05 + # CHECK: scratch_load_dword v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x50,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x50,0xdc,0x00,0x00,0x02,0x05 @@ -1659,6 +2307,18 @@ # CHECK: scratch_load_dword v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x52,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x52,0xdc,0x00,0x00,0x02,0x05 +# CHECK: scratch_load_dword v5, off, s2 nv ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0x82,0x05] +0x00,0x40,0x50,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_dword v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x50,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x50,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_dword v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x51,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x51,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_dword v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x52,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x52,0xdc,0x00,0x00,0x82,0x05 + # CHECK: scratch_load_dwordx2 v[5:6], off, s2 offset:-1 ; encoding: [0xff,0x5f,0x54,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x54,0xdc,0x00,0x00,0x02,0x05 @@ -1698,6 +2358,18 @@ # CHECK: scratch_load_dwordx2 v[5:6], off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x56,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x56,0xdc,0x00,0x00,0x02,0x05 +# CHECK: scratch_load_dwordx2 v[5:6], off, s2 nv ; encoding: [0x00,0x40,0x54,0xdc,0x00,0x00,0x82,0x05] +0x00,0x40,0x54,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_dwordx2 v[5:6], off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x54,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x54,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_dwordx2 v[5:6], off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x55,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x55,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_dwordx2 v[5:6], off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x56,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x56,0xdc,0x00,0x00,0x82,0x05 + # CHECK: scratch_load_dwordx3 v[5:7], off, s2 offset:-1 ; encoding: [0xff,0x5f,0x58,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x58,0xdc,0x00,0x00,0x02,0x05 @@ -1737,6 +2409,18 @@ # CHECK: scratch_load_dwordx3 v[5:7], off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x5a,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x5a,0xdc,0x00,0x00,0x02,0x05 +# CHECK: scratch_load_dwordx3 v[5:7], off, s2 nv ; encoding: [0x00,0x40,0x58,0xdc,0x00,0x00,0x82,0x05] +0x00,0x40,0x58,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_dwordx3 v[5:7], off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x58,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x58,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_dwordx3 v[5:7], off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x59,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x59,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_dwordx3 v[5:7], off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x5a,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x5a,0xdc,0x00,0x00,0x82,0x05 + # CHECK: scratch_load_dwordx4 v[5:8], off, s2 offset:-1 ; encoding: [0xff,0x5f,0x5c,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x5c,0xdc,0x00,0x00,0x02,0x05 @@ -1776,6 +2460,18 @@ # CHECK: scratch_load_dwordx4 v[5:8], off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x5e,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x5e,0xdc,0x00,0x00,0x02,0x05 +# CHECK: scratch_load_dwordx4 v[5:8], off, s2 nv ; encoding: [0x00,0x40,0x5c,0xdc,0x00,0x00,0x82,0x05] +0x00,0x40,0x5c,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_dwordx4 v[5:8], off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x5c,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x5c,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_dwordx4 v[5:8], off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x5d,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x5d,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_dwordx4 v[5:8], off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x5e,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x5e,0xdc,0x00,0x00,0x82,0x05 + # CHECK: scratch_store_byte off, v2, s3 offset:-1 ; encoding: [0xff,0x5f,0x60,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x60,0xdc,0x00,0x02,0x03,0x00 @@ -1815,6 +2511,18 @@ # CHECK: scratch_store_byte off, v2, s3 offset:-1 slc ; encoding: [0xff,0x5f,0x62,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x62,0xdc,0x00,0x02,0x03,0x00 +# CHECK: scratch_store_byte off, v2, s3 nv ; encoding: [0x00,0x40,0x60,0xdc,0x00,0x02,0x83,0x00] +0x00,0x40,0x60,0xdc,0x00,0x02,0x83,0x00 + +# CHECK: scratch_store_byte off, v2, s3 offset:-1 nv ; encoding: [0xff,0x5f,0x60,0xdc,0x00,0x02,0x83,0x00] +0xff,0x5f,0x60,0xdc,0x00,0x02,0x83,0x00 + +# CHECK: scratch_store_byte off, v2, s3 offset:-1 glc nv ; encoding: [0xff,0x5f,0x61,0xdc,0x00,0x02,0x83,0x00] +0xff,0x5f,0x61,0xdc,0x00,0x02,0x83,0x00 + +# CHECK: scratch_store_byte off, v2, s3 offset:-1 slc nv ; encoding: [0xff,0x5f,0x62,0xdc,0x00,0x02,0x83,0x00] +0xff,0x5f,0x62,0xdc,0x00,0x02,0x83,0x00 + # CHECK: scratch_store_byte_d16_hi off, v2, s3 offset:-1 ; encoding: [0xff,0x5f,0x64,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x64,0xdc,0x00,0x02,0x03,0x00 @@ -1854,6 +2562,18 @@ # CHECK: scratch_store_byte_d16_hi off, v2, s3 offset:-1 slc ; encoding: [0xff,0x5f,0x66,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x66,0xdc,0x00,0x02,0x03,0x00 +# CHECK: scratch_store_byte_d16_hi off, v2, s3 nv ; encoding: [0x00,0x40,0x64,0xdc,0x00,0x02,0x83,0x00] +0x00,0x40,0x64,0xdc,0x00,0x02,0x83,0x00 + +# CHECK: scratch_store_byte_d16_hi off, v2, s3 offset:-1 nv ; encoding: [0xff,0x5f,0x64,0xdc,0x00,0x02,0x83,0x00] +0xff,0x5f,0x64,0xdc,0x00,0x02,0x83,0x00 + +# CHECK: scratch_store_byte_d16_hi off, v2, s3 offset:-1 glc nv ; encoding: [0xff,0x5f,0x65,0xdc,0x00,0x02,0x83,0x00] +0xff,0x5f,0x65,0xdc,0x00,0x02,0x83,0x00 + +# CHECK: scratch_store_byte_d16_hi off, v2, s3 offset:-1 slc nv ; encoding: [0xff,0x5f,0x66,0xdc,0x00,0x02,0x83,0x00] +0xff,0x5f,0x66,0xdc,0x00,0x02,0x83,0x00 + # CHECK: scratch_store_short off, v2, s3 offset:-1 ; encoding: [0xff,0x5f,0x68,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x68,0xdc,0x00,0x02,0x03,0x00 @@ -1893,6 +2613,18 @@ # CHECK: scratch_store_short off, v2, s3 offset:-1 slc ; encoding: [0xff,0x5f,0x6a,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x6a,0xdc,0x00,0x02,0x03,0x00 +# CHECK: scratch_store_short off, v2, s3 nv ; encoding: [0x00,0x40,0x68,0xdc,0x00,0x02,0x83,0x00] +0x00,0x40,0x68,0xdc,0x00,0x02,0x83,0x00 + +# CHECK: scratch_store_short off, v2, s3 offset:-1 nv ; encoding: [0xff,0x5f,0x68,0xdc,0x00,0x02,0x83,0x00] +0xff,0x5f,0x68,0xdc,0x00,0x02,0x83,0x00 + +# CHECK: scratch_store_short off, v2, s3 offset:-1 glc nv ; encoding: [0xff,0x5f,0x69,0xdc,0x00,0x02,0x83,0x00] +0xff,0x5f,0x69,0xdc,0x00,0x02,0x83,0x00 + +# CHECK: scratch_store_short off, v2, s3 offset:-1 slc nv ; encoding: [0xff,0x5f,0x6a,0xdc,0x00,0x02,0x83,0x00] +0xff,0x5f,0x6a,0xdc,0x00,0x02,0x83,0x00 + # CHECK: scratch_store_short_d16_hi off, v2, s3 offset:-1 ; encoding: [0xff,0x5f,0x6c,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x6c,0xdc,0x00,0x02,0x03,0x00 @@ -1932,6 +2664,18 @@ # CHECK: scratch_store_short_d16_hi off, v2, s3 offset:-1 slc ; encoding: [0xff,0x5f,0x6e,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x6e,0xdc,0x00,0x02,0x03,0x00 +# CHECK: scratch_store_short_d16_hi off, v2, s3 nv ; encoding: [0x00,0x40,0x6c,0xdc,0x00,0x02,0x83,0x00] +0x00,0x40,0x6c,0xdc,0x00,0x02,0x83,0x00 + +# CHECK: scratch_store_short_d16_hi off, v2, s3 offset:-1 nv ; encoding: [0xff,0x5f,0x6c,0xdc,0x00,0x02,0x83,0x00] +0xff,0x5f,0x6c,0xdc,0x00,0x02,0x83,0x00 + +# CHECK: scratch_store_short_d16_hi off, v2, s3 offset:-1 glc nv ; encoding: [0xff,0x5f,0x6d,0xdc,0x00,0x02,0x83,0x00] +0xff,0x5f,0x6d,0xdc,0x00,0x02,0x83,0x00 + +# CHECK: scratch_store_short_d16_hi off, v2, s3 offset:-1 slc nv ; encoding: [0xff,0x5f,0x6e,0xdc,0x00,0x02,0x83,0x00] +0xff,0x5f,0x6e,0xdc,0x00,0x02,0x83,0x00 + # CHECK: scratch_store_dword off, v2, s3 offset:-1 ; encoding: [0xff,0x5f,0x70,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x70,0xdc,0x00,0x02,0x03,0x00 @@ -1971,6 +2715,18 @@ # CHECK: scratch_store_dword off, v2, s3 offset:-1 slc ; encoding: [0xff,0x5f,0x72,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x72,0xdc,0x00,0x02,0x03,0x00 +# CHECK: scratch_store_dword off, v2, s3 nv ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x83,0x00] +0x00,0x40,0x70,0xdc,0x00,0x02,0x83,0x00 + +# CHECK: scratch_store_dword off, v2, s3 offset:-1 nv ; encoding: [0xff,0x5f,0x70,0xdc,0x00,0x02,0x83,0x00] +0xff,0x5f,0x70,0xdc,0x00,0x02,0x83,0x00 + +# CHECK: scratch_store_dword off, v2, s3 offset:-1 glc nv ; encoding: [0xff,0x5f,0x71,0xdc,0x00,0x02,0x83,0x00] +0xff,0x5f,0x71,0xdc,0x00,0x02,0x83,0x00 + +# CHECK: scratch_store_dword off, v2, s3 offset:-1 slc nv ; encoding: [0xff,0x5f,0x72,0xdc,0x00,0x02,0x83,0x00] +0xff,0x5f,0x72,0xdc,0x00,0x02,0x83,0x00 + # CHECK: scratch_store_dwordx2 off, v[2:3], s3 offset:-1 ; encoding: [0xff,0x5f,0x74,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x74,0xdc,0x00,0x02,0x03,0x00 @@ -2010,6 +2766,18 @@ # CHECK: scratch_store_dwordx2 off, v[2:3], s3 offset:-1 slc ; encoding: [0xff,0x5f,0x76,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x76,0xdc,0x00,0x02,0x03,0x00 +# CHECK: scratch_store_dwordx2 off, v[2:3], s3 nv ; encoding: [0x00,0x40,0x74,0xdc,0x00,0x02,0x83,0x00] +0x00,0x40,0x74,0xdc,0x00,0x02,0x83,0x00 + +# CHECK: scratch_store_dwordx2 off, v[2:3], s3 offset:-1 nv ; encoding: [0xff,0x5f,0x74,0xdc,0x00,0x02,0x83,0x00] +0xff,0x5f,0x74,0xdc,0x00,0x02,0x83,0x00 + +# CHECK: scratch_store_dwordx2 off, v[2:3], s3 offset:-1 glc nv ; encoding: [0xff,0x5f,0x75,0xdc,0x00,0x02,0x83,0x00] +0xff,0x5f,0x75,0xdc,0x00,0x02,0x83,0x00 + +# CHECK: scratch_store_dwordx2 off, v[2:3], s3 offset:-1 slc nv ; encoding: [0xff,0x5f,0x76,0xdc,0x00,0x02,0x83,0x00] +0xff,0x5f,0x76,0xdc,0x00,0x02,0x83,0x00 + # CHECK: scratch_store_dwordx3 off, v[2:4], s3 offset:-1 ; encoding: [0xff,0x5f,0x78,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x78,0xdc,0x00,0x02,0x03,0x00 @@ -2049,6 +2817,18 @@ # CHECK: scratch_store_dwordx3 off, v[2:4], s3 offset:-1 slc ; encoding: [0xff,0x5f,0x7a,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x7a,0xdc,0x00,0x02,0x03,0x00 +# CHECK: scratch_store_dwordx3 off, v[2:4], s3 nv ; encoding: [0x00,0x40,0x78,0xdc,0x00,0x02,0x83,0x00] +0x00,0x40,0x78,0xdc,0x00,0x02,0x83,0x00 + +# CHECK: scratch_store_dwordx3 off, v[2:4], s3 offset:-1 nv ; encoding: [0xff,0x5f,0x78,0xdc,0x00,0x02,0x83,0x00] +0xff,0x5f,0x78,0xdc,0x00,0x02,0x83,0x00 + +# CHECK: scratch_store_dwordx3 off, v[2:4], s3 offset:-1 glc nv ; encoding: [0xff,0x5f,0x79,0xdc,0x00,0x02,0x83,0x00] +0xff,0x5f,0x79,0xdc,0x00,0x02,0x83,0x00 + +# CHECK: scratch_store_dwordx3 off, v[2:4], s3 offset:-1 slc nv ; encoding: [0xff,0x5f,0x7a,0xdc,0x00,0x02,0x83,0x00] +0xff,0x5f,0x7a,0xdc,0x00,0x02,0x83,0x00 + # CHECK: scratch_store_dwordx4 off, v[2:5], s3 offset:-1 ; encoding: [0xff,0x5f,0x7c,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x7c,0xdc,0x00,0x02,0x03,0x00 @@ -2088,6 +2868,18 @@ # CHECK: scratch_store_dwordx4 off, v[2:5], s3 offset:-1 slc ; encoding: [0xff,0x5f,0x7e,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x7e,0xdc,0x00,0x02,0x03,0x00 +# CHECK: scratch_store_dwordx4 off, v[2:5], s3 nv ; encoding: [0x00,0x40,0x7c,0xdc,0x00,0x02,0x83,0x00] +0x00,0x40,0x7c,0xdc,0x00,0x02,0x83,0x00 + +# CHECK: scratch_store_dwordx4 off, v[2:5], s3 offset:-1 nv ; encoding: [0xff,0x5f,0x7c,0xdc,0x00,0x02,0x83,0x00] +0xff,0x5f,0x7c,0xdc,0x00,0x02,0x83,0x00 + +# CHECK: scratch_store_dwordx4 off, v[2:5], s3 offset:-1 glc nv ; encoding: [0xff,0x5f,0x7d,0xdc,0x00,0x02,0x83,0x00] +0xff,0x5f,0x7d,0xdc,0x00,0x02,0x83,0x00 + +# CHECK: scratch_store_dwordx4 off, v[2:5], s3 offset:-1 slc nv ; encoding: [0xff,0x5f,0x7e,0xdc,0x00,0x02,0x83,0x00] +0xff,0x5f,0x7e,0xdc,0x00,0x02,0x83,0x00 + # CHECK: scratch_load_ubyte_d16 v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x80,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x80,0xdc,0x00,0x00,0x02,0x05 @@ -2127,6 +2919,18 @@ # CHECK: scratch_load_ubyte_d16 v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x82,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x82,0xdc,0x00,0x00,0x02,0x05 +# CHECK: scratch_load_ubyte_d16 v5, off, s2 nv ; encoding: [0x00,0x40,0x80,0xdc,0x00,0x00,0x82,0x05] +0x00,0x40,0x80,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_ubyte_d16 v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x80,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x80,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_ubyte_d16 v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x81,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x81,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_ubyte_d16 v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x82,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x82,0xdc,0x00,0x00,0x82,0x05 + # CHECK: scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x84,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x84,0xdc,0x00,0x00,0x02,0x05 @@ -2166,6 +2970,18 @@ # CHECK: scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x86,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x86,0xdc,0x00,0x00,0x02,0x05 +# CHECK: scratch_load_ubyte_d16_hi v5, off, s2 nv ; encoding: [0x00,0x40,0x84,0xdc,0x00,0x00,0x82,0x05] +0x00,0x40,0x84,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x84,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x84,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x85,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x85,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x86,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x86,0xdc,0x00,0x00,0x82,0x05 + # CHECK: scratch_load_sbyte_d16 v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x88,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x88,0xdc,0x00,0x00,0x02,0x05 @@ -2205,6 +3021,18 @@ # CHECK: scratch_load_sbyte_d16 v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x8a,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x8a,0xdc,0x00,0x00,0x02,0x05 +# CHECK: scratch_load_sbyte_d16 v5, off, s2 nv ; encoding: [0x00,0x40,0x88,0xdc,0x00,0x00,0x82,0x05] +0x00,0x40,0x88,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_sbyte_d16 v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x88,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x88,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_sbyte_d16 v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x89,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x89,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_sbyte_d16 v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x8a,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x8a,0xdc,0x00,0x00,0x82,0x05 + # CHECK: scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x8c,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x8c,0xdc,0x00,0x00,0x02,0x05 @@ -2244,6 +3072,18 @@ # CHECK: scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x8e,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x8e,0xdc,0x00,0x00,0x02,0x05 +# CHECK: scratch_load_sbyte_d16_hi v5, off, s2 nv ; encoding: [0x00,0x40,0x8c,0xdc,0x00,0x00,0x82,0x05] +0x00,0x40,0x8c,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x8c,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x8c,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x8d,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x8d,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x8e,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x8e,0xdc,0x00,0x00,0x82,0x05 + # CHECK: scratch_load_short_d16 v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x90,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x90,0xdc,0x00,0x00,0x02,0x05 @@ -2283,6 +3123,18 @@ # CHECK: scratch_load_short_d16 v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x92,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x92,0xdc,0x00,0x00,0x02,0x05 +# CHECK: scratch_load_short_d16 v5, off, s2 nv ; encoding: [0x00,0x40,0x90,0xdc,0x00,0x00,0x82,0x05] +0x00,0x40,0x90,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_short_d16 v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x90,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x90,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_short_d16 v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x91,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x91,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_short_d16 v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x92,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x92,0xdc,0x00,0x00,0x82,0x05 + # CHECK: scratch_load_short_d16_hi v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x94,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x94,0xdc,0x00,0x00,0x02,0x05 @@ -2322,6 +3174,18 @@ # CHECK: scratch_load_short_d16_hi v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x96,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x96,0xdc,0x00,0x00,0x02,0x05 +# CHECK: scratch_load_short_d16_hi v5, off, s2 nv ; encoding: [0x00,0x40,0x94,0xdc,0x00,0x00,0x82,0x05] +0x00,0x40,0x94,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_short_d16_hi v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x94,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x94,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_short_d16_hi v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x95,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x95,0xdc,0x00,0x00,0x82,0x05 + +# CHECK: scratch_load_short_d16_hi v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x96,0xdc,0x00,0x00,0x82,0x05] +0xff,0x5f,0x96,0xdc,0x00,0x00,0x82,0x05 + # CHECK: global_load_dword v[2:3], off lds ; encoding: [0x00,0xa0,0x50,0xdc,0x02,0x00,0x7f,0x00] 0x00,0xa0,0x50,0xdc,0x02,0x00,0x7f,0x00 diff --git a/llvm/test/MC/Disassembler/PowerPC/ppc-encoding-ISAFuture.txt b/llvm/test/MC/Disassembler/PowerPC/ppc-encoding-ISAFuture.txt index 2661ed5b04cc9..b27a50d93f5b9 100644 --- a/llvm/test/MC/Disassembler/PowerPC/ppc-encoding-ISAFuture.txt +++ b/llvm/test/MC/Disassembler/PowerPC/ppc-encoding-ISAFuture.txt @@ -250,6 +250,9 @@ #CHECK: vucmprhh 1, 3, 6 0x10,0x23,0x31,0x03 +#CHECK: xvrlw 34, 15, 16 +0xf0,0x4f,0x85,0xc1 + #CHECK: xxaes192encp 8, 10, 14 0xf1,0x0b,0x76,0x10 diff --git a/llvm/test/MC/Disassembler/PowerPC/ppc64le-encoding-ISAFuture.txt b/llvm/test/MC/Disassembler/PowerPC/ppc64le-encoding-ISAFuture.txt index 7fb8254ced0ac..72662d9736740 100644 --- a/llvm/test/MC/Disassembler/PowerPC/ppc64le-encoding-ISAFuture.txt +++ b/llvm/test/MC/Disassembler/PowerPC/ppc64le-encoding-ISAFuture.txt @@ -244,6 +244,9 @@ #CHECK: vucmprhh 1, 3, 6 0x03,0x31,0x23,0x10 +#CHECK: xvrlw 34, 15, 16 +0xc1,0x85,0x4f,0xf0 + #CHECK: xxaes192encp 8, 10, 14 0x10,0x76,0x0b,0xf1 diff --git a/llvm/test/MC/PowerPC/ppc-encoding-ISAFuture.s b/llvm/test/MC/PowerPC/ppc-encoding-ISAFuture.s index 40059c440b128..ab72649fc3404 100644 --- a/llvm/test/MC/PowerPC/ppc-encoding-ISAFuture.s +++ b/llvm/test/MC/PowerPC/ppc-encoding-ISAFuture.s @@ -355,6 +355,10 @@ #CHECK-BE: vucmprhh 1, 3, 6 # encoding: [0x10,0x23,0x31,0x03] #CHECK-LE: vucmprhh 1, 3, 6 # encoding: [0x03,0x31,0x23,0x10] + xvrlw 34, 15, 16 +#CHECK-BE: xvrlw 34, 15, 16 # encoding: [0xf0,0x4f,0x85,0xc1] +#CHECK-LE: xvrlw 34, 15, 16 # encoding: [0xc1,0x85,0x4f,0xf0] + xxaes192encp 8, 10, 14 #CHECK-BE: xxaes192encp 8, 10, 14 # encoding: [0xf1,0x0b,0x76,0x10] #CHECK-LE: xxaes192encp 8, 10, 14 # encoding: [0x10,0x76,0x0b,0xf1] diff --git a/llvm/test/MC/Xtensa/s32c1i.s b/llvm/test/MC/Xtensa/s32c1i.s new file mode 100644 index 0000000000000..218a86dd56752 --- /dev/null +++ b/llvm/test/MC/Xtensa/s32c1i.s @@ -0,0 +1,13 @@ +# RUN: llvm-mc %s -triple=xtensa -show-encoding --mattr=+s32c1i \ +# RUN: | FileCheck -check-prefixes=CHECK,CHECK-INST %s + +.align 4 +LBL0: + +# CHECK-INST: xsr a3, atomctl +# CHECK: # encoding: [0x30,0x63,0x61] +xsr a3, atomctl + +# CHECK-INST: xsr a3, scompare1 +# CHECK: # encoding: [0x30,0x0c,0x61] +xsr a3, scompare1 diff --git a/llvm/test/TableGen/RuntimeLibcallEmitter-calling-conv.td b/llvm/test/TableGen/RuntimeLibcallEmitter-calling-conv.td index 2904474f6110b..e4a7126d79fbd 100644 --- a/llvm/test/TableGen/RuntimeLibcallEmitter-calling-conv.td +++ b/llvm/test/TableGen/RuntimeLibcallEmitter-calling-conv.td @@ -53,21 +53,21 @@ def MSP430LibraryWithCondCC : SystemRuntimeLibrary; // CHECK-NEXT: AvailableLibcallImpls = SystemAvailableImpls; // CHECK-EMPTY: -// CHECK-NEXT: static const LibcallImplPair LibraryCalls[] = { -// CHECK-NEXT: {RTLIB::SOME_FUNC, RTLIB::impl_func_b}, // func_b +// CHECK-NEXT: static const RTLIB::LibcallImpl LibraryCalls[] = { +// CHECK-NEXT: RTLIB::impl_func_b, // func_b // CHECK-NEXT: }; // CHECK-EMPTY: -// CHECK-NEXT: for (const auto [Func, Impl] : LibraryCalls) { -// CHECK-NEXT: setLibcallImpl(Func, Impl); +// CHECK-NEXT: for (const RTLIB::LibcallImpl Impl : LibraryCalls) { +// CHECK-NEXT: setAvailable(Impl); // CHECK-NEXT: } // CHECK-EMPTY: // CHECK-NEXT: return; @@ -53,13 +53,13 @@ def TheSystemLibraryA : SystemRuntimeLibrary]> { // IMPL: #ifdef GEN_FLANG_DIRECTIVE_CLAUSE_SETS // IMPL-NEXT: #undef GEN_FLANG_DIRECTIVE_CLAUSE_SETS // IMPL-EMPTY: -// IMPL-NEXT: namespace llvm { -// IMPL-NEXT: namespace tdl { +// IMPL-NEXT: namespace llvm::tdl { // IMPL-EMPTY: // IMPL-NEXT: // Sets for dira // IMPL-EMPTY: @@ -204,8 +203,8 @@ def TDL_DirA : Directive<[Spelling<"dira">]> { // IMPL-EMPTY: // IMPL-NEXT: static requiredClauses_TDLD_dira { // IMPL-NEXT: }; -// IMPL-NEXT: } // namespace tdl -// IMPL-NEXT: } // namespace llvm +// IMPL-EMPTY: +// IMPL-NEXT: } // namespace llvm::tdl // IMPL-EMPTY: // IMPL-NEXT: #endif // GEN_FLANG_DIRECTIVE_CLAUSE_SETS // IMPL-EMPTY: diff --git a/llvm/test/TableGen/directive2.td b/llvm/test/TableGen/directive2.td index 96022d7647440..eaaf82ddaaf41 100644 --- a/llvm/test/TableGen/directive2.td +++ b/llvm/test/TableGen/directive2.td @@ -159,8 +159,7 @@ def TDL_DirA : Directive<[Spelling<"dira">]> { // IMPL: #ifdef GEN_FLANG_DIRECTIVE_CLAUSE_SETS // IMPL-NEXT: #undef GEN_FLANG_DIRECTIVE_CLAUSE_SETS // IMPL-EMPTY: -// IMPL-NEXT: namespace llvm { -// IMPL-NEXT: namespace tdl { +// IMPL-NEXT: namespace llvm::tdl { // IMPL-EMPTY: // IMPL-NEXT: // Sets for dira // IMPL-EMPTY: @@ -177,8 +176,8 @@ def TDL_DirA : Directive<[Spelling<"dira">]> { // IMPL-EMPTY: // IMPL-NEXT: static requiredClauses_TDLD_dira { // IMPL-NEXT: }; -// IMPL-NEXT: } // namespace tdl -// IMPL-NEXT: } // namespace llvm +// IMPL-EMPTY: +// IMPL-NEXT: } // namespace llvm::tdl // IMPL-EMPTY: // IMPL-NEXT: #endif // GEN_FLANG_DIRECTIVE_CLAUSE_SETS // IMPL-EMPTY: diff --git a/llvm/test/Transforms/AtomicExpand/AArch64/atomicrmw-fp.ll b/llvm/test/Transforms/AtomicExpand/AArch64/atomicrmw-fp.ll index 8ffacb9bdd5f6..1b728f56ab2ea 100644 --- a/llvm/test/Transforms/AtomicExpand/AArch64/atomicrmw-fp.ll +++ b/llvm/test/Transforms/AtomicExpand/AArch64/atomicrmw-fp.ll @@ -1,7 +1,7 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals ; RUN: opt -S -mtriple=aarch64-linux-gnu -passes=atomic-expand %s | FileCheck %s -define float @test_atomicrmw_fadd_f32(ptr %ptr, float %value) { +define float @test_atomicrmw_fadd_f32(ptr %ptr, float %value) !prof !0 { ; CHECK-LABEL: @test_atomicrmw_fadd_f32( ; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[PTR:%.*]], align 4 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] @@ -14,7 +14,7 @@ define float @test_atomicrmw_fadd_f32(ptr %ptr, float %value) { ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 ; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 ; CHECK-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float -; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]], !prof [[PROF1:![0-9]+]] ; CHECK: atomicrmw.end: ; CHECK-NEXT: ret float [[TMP5]] ; @@ -336,3 +336,11 @@ define <2 x half> @atomicrmw_fminimum_2_x_half(ptr %ptr, <2 x half> %val) { %res = atomicrmw fminimum ptr %ptr, <2 x half> %val seq_cst ret <2 x half> %res } + +!0 = !{!"function_entry_count", i64 1000} +;. +; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nocreateundeforpoison nofree nosync nounwind speculatable willreturn memory(none) } +;. +; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1000} +; CHECK: [[PROF1]] = !{!"unknown", !"atomic-expand"} +;. diff --git a/llvm/test/Transforms/LoopDistribute/basic-with-memchecks.ll b/llvm/test/Transforms/LoopDistribute/basic-with-memchecks.ll index 97ea2c6708dad..2828882afe779 100644 --- a/llvm/test/Transforms/LoopDistribute/basic-with-memchecks.ll +++ b/llvm/test/Transforms/LoopDistribute/basic-with-memchecks.ll @@ -28,7 +28,7 @@ target triple = "x86_64-apple-macosx10.10.0" @E = common global ptr null, align 8 ; CHECK-LABEL: @f( -define void @f() { +define void @f() !prof !{!"function_entry_count", i32 10} { entry: %a = load ptr, ptr @A, align 8 %b = load ptr, ptr @B, align 8 @@ -55,7 +55,7 @@ entry: ; CHECK: = icmp ; CHECK-NOT: = icmp -; CHECK: br i1 %conflict.rdx15, label %for.body.ph.lver.orig, label %for.body.ph.ldist1 +; CHECK: br i1 %conflict.rdx15, label %for.body.ph.lver.orig, label %for.body.ph.ldist1, !prof ![[PROF1:[0-9]]] ; The non-distributed loop that the memchecks fall back on. @@ -289,3 +289,4 @@ attributes #1 = { nounwind convergent } !0 = distinct !{!0, !1} !1 = !{!"llvm.loop.distribute.enable", i1 true} +; CHECK: ![[PROF1]] = !{!"unknown", !"loop-versioning"} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_store.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_store.ll new file mode 100644 index 0000000000000..bd5f4e2a3279b --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_store.ll @@ -0,0 +1,117 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -passes=loop-vectorize -enable-interleaved-mem-accesses=true -max-interleave-group-factor=16 -S < %s | FileCheck %s + +define dso_local void @_Z6unpackPhS_(ptr noalias noundef readonly captures(none) %in, ptr noalias noundef writeonly captures(none) %out) { +; CHECK-LABEL: define dso_local void @_Z6unpackPhS_( +; CHECK-SAME: ptr noalias noundef readonly captures(none) [[IN:%.*]], ptr noalias noundef writeonly captures(none) [[OUT:%.*]]) { +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 16 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[OUT]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[OFFSET_IDX2:%.*]] = mul i64 [[INDEX]], 4 +; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[IN]], i64 [[OFFSET_IDX2]] +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i8>, ptr [[NEXT_GEP3]], align 1, !alias.scope [[META0:![0-9]+]] +; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_VEC]], <16 x i8> poison, <4 x i32> +; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <16 x i8> [[WIDE_VEC]], <16 x i8> poison, <4 x i32> +; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <16 x i8> [[WIDE_VEC]], <16 x i8> poison, <4 x i32> +; CHECK-NEXT: [[STRIDED_VEC6:%.*]] = shufflevector <16 x i8> [[WIDE_VEC]], <16 x i8> poison, <4 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = add <4 x i8> [[STRIDED_VEC6]], [[STRIDED_VEC5]] +; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i8> [[STRIDED_VEC6]], [[STRIDED_VEC4]] +; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i8> [[STRIDED_VEC5]], [[STRIDED_VEC4]] +; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i8> [[STRIDED_VEC6]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i8> [[STRIDED_VEC6]], [[STRIDED_VEC]] +; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i8> [[STRIDED_VEC5]], [[STRIDED_VEC]] +; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i8> [[STRIDED_VEC6]], [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i8> [[STRIDED_VEC4]], [[STRIDED_VEC]] +; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i8> [[STRIDED_VEC6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i8> [[STRIDED_VEC5]], [[TMP7]] +; CHECK-NEXT: [[TMP10:%.*]] = add <4 x i8> [[STRIDED_VEC6]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i8> zeroinitializer, <4 x i8> [[STRIDED_VEC6]], <8 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i8> [[STRIDED_VEC5]], <4 x i8> [[TMP0]], <8 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i8> [[STRIDED_VEC4]], <4 x i8> [[TMP1]], <8 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> [[TMP3]], <8 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x i8> [[STRIDED_VEC]], <4 x i8> [[TMP4]], <8 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <4 x i8> [[TMP5]], <4 x i8> [[TMP6]], <8 x i32> +; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <4 x i8> [[TMP7]], <4 x i8> [[TMP8]], <8 x i32> +; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> [[TMP10]], <8 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <8 x i8> [[TMP11]], <8 x i8> [[TMP12]], <16 x i32> +; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <8 x i8> [[TMP13]], <8 x i8> [[TMP14]], <16 x i32> +; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <8 x i8> [[TMP15]], <8 x i8> [[TMP16]], <16 x i32> +; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <8 x i8> [[TMP17]], <8 x i8> [[TMP18]], <16 x i32> +; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <16 x i8> [[TMP19]], <16 x i8> [[TMP20]], <32 x i32> +; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <16 x i8> [[TMP21]], <16 x i8> [[TMP22]], <32 x i32> +; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <32 x i8> [[TMP23]], <32 x i8> [[TMP24]], <64 x i32> +; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <64 x i8> [[TMP25]], <64 x i8> poison, <64 x i32> +; CHECK-NEXT: store <64 x i8> [[INTERLEAVED_VEC]], ptr [[NEXT_GEP]], align 1, !alias.scope [[META3:![0-9]+]], !noalias [[META0]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 +; CHECK-NEXT: br i1 [[TMP26]], label %[[MIDDLE_BLOCK:.*]], label %vector.body, !llvm.loop [[LOOP5:![0-9]+]] +; +entry: + br label %for.body + +for.cond.cleanup: ; preds = %for.body + ret void + +for.body: ; preds = %entry, %for.body + %i.033 = phi i32 [ 0, %entry ], [ %inc17, %for.body ] + %out.addr.032 = phi ptr [ %out, %entry ], [ %add.ptr, %for.body ] + %in.addr.031 = phi ptr [ %in, %entry ], [ %add.ptr15, %for.body ] + store i8 0, ptr %out.addr.032, align 1 + %arrayidx10 = getelementptr inbounds nuw i8, ptr %in.addr.031, i64 3 + %0 = load i8, ptr %arrayidx10, align 1 + %arrayidx14 = getelementptr inbounds nuw i8, ptr %out.addr.032, i64 1 + store i8 %0, ptr %arrayidx14, align 1 + %arrayidx10.1 = getelementptr inbounds nuw i8, ptr %in.addr.031, i64 2 + %1 = load i8, ptr %arrayidx10.1, align 1 + %arrayidx14.1 = getelementptr inbounds nuw i8, ptr %out.addr.032, i64 2 + store i8 %1, ptr %arrayidx14.1, align 1 + %add.2 = add i8 %0, %1 + %arrayidx14.2 = getelementptr inbounds nuw i8, ptr %out.addr.032, i64 3 + store i8 %add.2, ptr %arrayidx14.2, align 1 + %arrayidx10.3 = getelementptr inbounds nuw i8, ptr %in.addr.031, i64 1 + %2 = load i8, ptr %arrayidx10.3, align 1 + %arrayidx14.3 = getelementptr inbounds nuw i8, ptr %out.addr.032, i64 4 + store i8 %2, ptr %arrayidx14.3, align 1 + %add.4 = add i8 %0, %2 + %arrayidx14.4 = getelementptr inbounds nuw i8, ptr %out.addr.032, i64 5 + store i8 %add.4, ptr %arrayidx14.4, align 1 + %add.5 = add i8 %1, %2 + %arrayidx14.5 = getelementptr inbounds nuw i8, ptr %out.addr.032, i64 6 + store i8 %add.5, ptr %arrayidx14.5, align 1 + %add.6 = add i8 %0, %add.5 + %arrayidx14.6 = getelementptr inbounds nuw i8, ptr %out.addr.032, i64 7 + store i8 %add.6, ptr %arrayidx14.6, align 1 + %3 = load i8, ptr %in.addr.031, align 1 + %arrayidx14.7 = getelementptr inbounds nuw i8, ptr %out.addr.032, i64 8 + store i8 %3, ptr %arrayidx14.7, align 1 + %add.8 = add i8 %0, %3 + %arrayidx14.8 = getelementptr inbounds nuw i8, ptr %out.addr.032, i64 9 + store i8 %add.8, ptr %arrayidx14.8, align 1 + %add.9 = add i8 %1, %3 + %arrayidx14.9 = getelementptr inbounds nuw i8, ptr %out.addr.032, i64 10 + store i8 %add.9, ptr %arrayidx14.9, align 1 + %add.10 = add i8 %0, %add.9 + %arrayidx14.10 = getelementptr inbounds nuw i8, ptr %out.addr.032, i64 11 + store i8 %add.10, ptr %arrayidx14.10, align 1 + %add.11 = add i8 %2, %3 + %arrayidx14.11 = getelementptr inbounds nuw i8, ptr %out.addr.032, i64 12 + store i8 %add.11, ptr %arrayidx14.11, align 1 + %add.12 = add i8 %0, %add.11 + %arrayidx14.12 = getelementptr inbounds nuw i8, ptr %out.addr.032, i64 13 + store i8 %add.12, ptr %arrayidx14.12, align 1 + %add.13 = add i8 %1, %add.11 + %arrayidx14.13 = getelementptr inbounds nuw i8, ptr %out.addr.032, i64 14 + store i8 %add.13, ptr %arrayidx14.13, align 1 + %add.14 = add i8 %0, %add.13 + %arrayidx14.14 = getelementptr inbounds nuw i8, ptr %out.addr.032, i64 15 + store i8 %add.14, ptr %arrayidx14.14, align 1 + %add.ptr = getelementptr inbounds nuw i8, ptr %out.addr.032, i64 16 + %add.ptr15 = getelementptr inbounds nuw i8, ptr %in.addr.031, i64 4 + %inc17 = add nuw nsw i32 %i.033, 1 + %exitcond.not = icmp eq i32 %inc17, 32 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0 +} + +!0 = distinct !{!0, !1} +!1 = !{!"llvm.loop.mustprogress"} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll index 68cfc659e1e94..cdddcc9fc4226 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "scalar.ph:" --version 6 -; RUN: opt -p loop-vectorize -S %s | FileCheck %s +; RUN: opt -p loop-vectorize -max-interleave-group-factor=4 -S %s | FileCheck %s target triple = "arm64-apple-macosx15.0.0" diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops-and-casts.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops-and-casts.ll new file mode 100644 index 0000000000000..bba7d058d6637 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops-and-casts.ll @@ -0,0 +1,694 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "^scalar.ph:" --version 5 +; RUN: opt -p loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -S %s | FileCheck --check-prefixes=VF2 %s +; RUN: opt -p loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S %s | FileCheck --check-prefixes=VF4 %s + +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32" +target triple = "arm64-apple-macosx" + +define void @test_2xi64_matching_zext_interleave_group(ptr noalias %dst, ptr %src) { +; VF2-LABEL: define void @test_2xi64_matching_zext_interleave_group( +; VF2-SAME: ptr noalias [[DST:%.*]], ptr [[SRC:%.*]]) { +; VF2-NEXT: [[ENTRY:.*:]] +; VF2-NEXT: br label %[[VECTOR_PH:.*]] +; VF2: [[VECTOR_PH]]: +; VF2-NEXT: br label %[[VECTOR_BODY:.*]] +; VF2: [[VECTOR_BODY]]: +; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF2-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]] +; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP1]], align 8 +; VF2-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[WIDE_LOAD]] to <2 x i64> +; VF2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP0]] +; VF2-NEXT: [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP2]], <4 x i32> +; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> poison, <4 x i32> +; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP3]], align 8 +; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VF2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF2-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; VF2: [[MIDDLE_BLOCK]]: +; VF2-NEXT: br label %[[EXIT:.*]] +; VF2: [[EXIT]]: +; VF2-NEXT: ret void +; +; VF4-LABEL: define void @test_2xi64_matching_zext_interleave_group( +; VF4-SAME: ptr noalias [[DST:%.*]], ptr [[SRC:%.*]]) { +; VF4-NEXT: [[ENTRY:.*:]] +; VF4-NEXT: br label %[[VECTOR_PH:.*]] +; VF4: [[VECTOR_PH]]: +; VF4-NEXT: br label %[[VECTOR_BODY:.*]] +; VF4: [[VECTOR_BODY]]: +; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF4-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]] +; VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 8 +; VF4-NEXT: [[TMP2:%.*]] = zext <4 x i32> [[WIDE_LOAD]] to <4 x i64> +; VF4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP0]] +; VF4-NEXT: [[TMP4:%.*]] = shufflevector <4 x i64> [[TMP2]], <4 x i64> [[TMP2]], <8 x i32> +; VF4-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i64> [[TMP4]], <8 x i64> poison, <8 x i32> +; VF4-NEXT: store <8 x i64> [[INTERLEAVED_VEC]], ptr [[TMP3]], align 8 +; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; VF4-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF4-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; VF4: [[MIDDLE_BLOCK]]: +; VF4-NEXT: br label %[[EXIT:.*]] +; VF4: [[EXIT]]: +; VF4-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %idx.0 = shl nsw i64 %iv, 1 + %gep.src.0 = getelementptr inbounds i32, ptr %src, i64 %iv + %l.0 = load i32 , ptr %gep.src.0, align 8 + %ext.0 = zext i32 %l.0 to i64 + %dst.0 = getelementptr inbounds i64, ptr %dst, i64 %idx.0 + store i64 %ext.0, ptr %dst.0, align 8 + %idx.1 = add i64 %idx.0, 1 + %dst.1 = getelementptr inbounds i64, ptr %dst, i64 %idx.1 + %ext.1 = zext i32 %l.0 to i64 + store i64 %ext.1, ptr %dst.1, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 100 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @test_2xi64_matching_sext_interleave_group(ptr noalias %dst, ptr %src) { +; VF2-LABEL: define void @test_2xi64_matching_sext_interleave_group( +; VF2-SAME: ptr noalias [[DST:%.*]], ptr [[SRC:%.*]]) { +; VF2-NEXT: [[ENTRY:.*:]] +; VF2-NEXT: br label %[[VECTOR_PH:.*]] +; VF2: [[VECTOR_PH]]: +; VF2-NEXT: br label %[[VECTOR_BODY:.*]] +; VF2: [[VECTOR_BODY]]: +; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF2-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]] +; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP1]], align 8 +; VF2-NEXT: [[TMP2:%.*]] = sext <2 x i32> [[WIDE_LOAD]] to <2 x i64> +; VF2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP0]] +; VF2-NEXT: [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP2]], <4 x i32> +; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> poison, <4 x i32> +; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP3]], align 8 +; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VF2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF2-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; VF2: [[MIDDLE_BLOCK]]: +; VF2-NEXT: br label %[[EXIT:.*]] +; VF2: [[EXIT]]: +; VF2-NEXT: ret void +; +; VF4-LABEL: define void @test_2xi64_matching_sext_interleave_group( +; VF4-SAME: ptr noalias [[DST:%.*]], ptr [[SRC:%.*]]) { +; VF4-NEXT: [[ENTRY:.*:]] +; VF4-NEXT: br label %[[VECTOR_PH:.*]] +; VF4: [[VECTOR_PH]]: +; VF4-NEXT: br label %[[VECTOR_BODY:.*]] +; VF4: [[VECTOR_BODY]]: +; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF4-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]] +; VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 8 +; VF4-NEXT: [[TMP2:%.*]] = sext <4 x i32> [[WIDE_LOAD]] to <4 x i64> +; VF4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP0]] +; VF4-NEXT: [[TMP4:%.*]] = shufflevector <4 x i64> [[TMP2]], <4 x i64> [[TMP2]], <8 x i32> +; VF4-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i64> [[TMP4]], <8 x i64> poison, <8 x i32> +; VF4-NEXT: store <8 x i64> [[INTERLEAVED_VEC]], ptr [[TMP3]], align 8 +; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; VF4-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF4-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; VF4: [[MIDDLE_BLOCK]]: +; VF4-NEXT: br label %[[EXIT:.*]] +; VF4: [[EXIT]]: +; VF4-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %idx.0 = shl nsw i64 %iv, 1 + %gep.src.0 = getelementptr inbounds i32, ptr %src, i64 %iv + %l.0 = load i32 , ptr %gep.src.0, align 8 + %ext.0 = sext i32 %l.0 to i64 + %dst.0 = getelementptr inbounds i64, ptr %dst, i64 %idx.0 + store i64 %ext.0, ptr %dst.0, align 8 + %idx.1 = add i64 %idx.0, 1 + %dst.1 = getelementptr inbounds i64, ptr %dst, i64 %idx.1 + %ext.1 = sext i32 %l.0 to i64 + store i64 %ext.1, ptr %dst.1, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 100 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @test_2xi64_mismatching_cast_interleave_group(ptr noalias %dst, ptr %src) { +; VF2-LABEL: define void @test_2xi64_mismatching_cast_interleave_group( +; VF2-SAME: ptr noalias [[DST:%.*]], ptr [[SRC:%.*]]) { +; VF2-NEXT: [[ENTRY:.*:]] +; VF2-NEXT: br label %[[VECTOR_PH:.*]] +; VF2: [[VECTOR_PH]]: +; VF2-NEXT: br label %[[VECTOR_BODY:.*]] +; VF2: [[VECTOR_BODY]]: +; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF2-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]] +; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP1]], align 8 +; VF2-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[WIDE_LOAD]] to <2 x i64> +; VF2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP0]] +; VF2-NEXT: [[TMP4:%.*]] = sext <2 x i32> [[WIDE_LOAD]] to <2 x i64> +; VF2-NEXT: [[TMP5:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP4]], <4 x i32> +; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP5]], <4 x i64> poison, <4 x i32> +; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP3]], align 8 +; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VF2-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF2-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; VF2: [[MIDDLE_BLOCK]]: +; VF2-NEXT: br label %[[EXIT:.*]] +; VF2: [[EXIT]]: +; VF2-NEXT: ret void +; +; VF4-LABEL: define void @test_2xi64_mismatching_cast_interleave_group( +; VF4-SAME: ptr noalias [[DST:%.*]], ptr [[SRC:%.*]]) { +; VF4-NEXT: [[ENTRY:.*:]] +; VF4-NEXT: br label %[[VECTOR_PH:.*]] +; VF4: [[VECTOR_PH]]: +; VF4-NEXT: br label %[[VECTOR_BODY:.*]] +; VF4: [[VECTOR_BODY]]: +; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF4-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]] +; VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 8 +; VF4-NEXT: [[TMP2:%.*]] = zext <4 x i32> [[WIDE_LOAD]] to <4 x i64> +; VF4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP0]] +; VF4-NEXT: [[TMP4:%.*]] = sext <4 x i32> [[WIDE_LOAD]] to <4 x i64> +; VF4-NEXT: [[TMP5:%.*]] = shufflevector <4 x i64> [[TMP2]], <4 x i64> [[TMP4]], <8 x i32> +; VF4-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i64> [[TMP5]], <8 x i64> poison, <8 x i32> +; VF4-NEXT: store <8 x i64> [[INTERLEAVED_VEC]], ptr [[TMP3]], align 8 +; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; VF4-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF4-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; VF4: [[MIDDLE_BLOCK]]: +; VF4-NEXT: br label %[[EXIT:.*]] +; VF4: [[EXIT]]: +; VF4-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %idx.0 = shl nsw i64 %iv, 1 + %gep.src.0 = getelementptr inbounds i32, ptr %src, i64 %iv + %l.0 = load i32 , ptr %gep.src.0, align 8 + %ext.0 = zext i32 %l.0 to i64 + %dst.0 = getelementptr inbounds i64, ptr %dst, i64 %idx.0 + store i64 %ext.0, ptr %dst.0, align 8 + %idx.1 = add i64 %idx.0, 1 + %dst.1 = getelementptr inbounds i64, ptr %dst, i64 %idx.1 + %ext.1 = sext i32 %l.0 to i64 + store i64 %ext.1, ptr %dst.1, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 100 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @test_2xi64_matching_cast_add_interleave_group(ptr noalias %dst, ptr %src) { +; VF2-LABEL: define void @test_2xi64_matching_cast_add_interleave_group( +; VF2-SAME: ptr noalias [[DST:%.*]], ptr [[SRC:%.*]]) { +; VF2-NEXT: [[ENTRY:.*:]] +; VF2-NEXT: br label %[[VECTOR_PH:.*]] +; VF2: [[VECTOR_PH]]: +; VF2-NEXT: br label %[[VECTOR_BODY:.*]] +; VF2: [[VECTOR_BODY]]: +; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF2-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]] +; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP1]], align 8 +; VF2-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[WIDE_LOAD]] to <2 x i64> +; VF2-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP2]], splat (i64 2) +; VF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP0]] +; VF2-NEXT: [[TMP5:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP3]], <4 x i32> +; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP5]], <4 x i64> poison, <4 x i32> +; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP4]], align 8 +; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VF2-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF2-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; VF2: [[MIDDLE_BLOCK]]: +; VF2-NEXT: br label %[[EXIT:.*]] +; VF2: [[EXIT]]: +; VF2-NEXT: ret void +; +; VF4-LABEL: define void @test_2xi64_matching_cast_add_interleave_group( +; VF4-SAME: ptr noalias [[DST:%.*]], ptr [[SRC:%.*]]) { +; VF4-NEXT: [[ENTRY:.*:]] +; VF4-NEXT: br label %[[VECTOR_PH:.*]] +; VF4: [[VECTOR_PH]]: +; VF4-NEXT: br label %[[VECTOR_BODY:.*]] +; VF4: [[VECTOR_BODY]]: +; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF4-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]] +; VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 8 +; VF4-NEXT: [[TMP2:%.*]] = zext <4 x i32> [[WIDE_LOAD]] to <4 x i64> +; VF4-NEXT: [[TMP3:%.*]] = add <4 x i64> [[TMP2]], splat (i64 2) +; VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP0]] +; VF4-NEXT: [[TMP5:%.*]] = shufflevector <4 x i64> [[TMP3]], <4 x i64> [[TMP3]], <8 x i32> +; VF4-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i64> [[TMP5]], <8 x i64> poison, <8 x i32> +; VF4-NEXT: store <8 x i64> [[INTERLEAVED_VEC]], ptr [[TMP4]], align 8 +; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; VF4-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF4-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; VF4: [[MIDDLE_BLOCK]]: +; VF4-NEXT: br label %[[EXIT:.*]] +; VF4: [[EXIT]]: +; VF4-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %idx.0 = shl nsw i64 %iv, 1 + %gep.src.0 = getelementptr inbounds i32, ptr %src, i64 %iv + %l.0 = load i32 , ptr %gep.src.0, align 8 + %ext.0 = zext i32 %l.0 to i64 + %add.0 = add i64 %ext.0, 2 + %dst.0 = getelementptr inbounds i64, ptr %dst, i64 %idx.0 + store i64 %add.0, ptr %dst.0, align 8 + %idx.1 = add i64 %idx.0, 1 + %dst.1 = getelementptr inbounds i64, ptr %dst, i64 %idx.1 + %ext.1 = zext i32 %l.0 to i64 + %add.1 = add i64 %ext.1, 2 + store i64 %add.1, ptr %dst.1, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 100 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @test_2xi64_mismatching_cast_add_interleave_group(ptr noalias %dst, ptr %src) { +; VF2-LABEL: define void @test_2xi64_mismatching_cast_add_interleave_group( +; VF2-SAME: ptr noalias [[DST:%.*]], ptr [[SRC:%.*]]) { +; VF2-NEXT: [[ENTRY:.*:]] +; VF2-NEXT: br label %[[VECTOR_PH:.*]] +; VF2: [[VECTOR_PH]]: +; VF2-NEXT: br label %[[VECTOR_BODY:.*]] +; VF2: [[VECTOR_BODY]]: +; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF2-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]] +; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP1]], align 8 +; VF2-NEXT: [[TMP2:%.*]] = sext <2 x i32> [[WIDE_LOAD]] to <2 x i64> +; VF2-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP2]], splat (i64 2) +; VF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP0]] +; VF2-NEXT: [[TMP5:%.*]] = zext <2 x i32> [[WIDE_LOAD]] to <2 x i64> +; VF2-NEXT: [[TMP6:%.*]] = add <2 x i64> [[TMP5]], splat (i64 2) +; VF2-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP6]], <4 x i32> +; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> poison, <4 x i32> +; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP4]], align 8 +; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VF2-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF2-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; VF2: [[MIDDLE_BLOCK]]: +; VF2-NEXT: br label %[[EXIT:.*]] +; VF2: [[EXIT]]: +; VF2-NEXT: ret void +; +; VF4-LABEL: define void @test_2xi64_mismatching_cast_add_interleave_group( +; VF4-SAME: ptr noalias [[DST:%.*]], ptr [[SRC:%.*]]) { +; VF4-NEXT: [[ENTRY:.*:]] +; VF4-NEXT: br label %[[VECTOR_PH:.*]] +; VF4: [[VECTOR_PH]]: +; VF4-NEXT: br label %[[VECTOR_BODY:.*]] +; VF4: [[VECTOR_BODY]]: +; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF4-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]] +; VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 8 +; VF4-NEXT: [[TMP2:%.*]] = sext <4 x i32> [[WIDE_LOAD]] to <4 x i64> +; VF4-NEXT: [[TMP3:%.*]] = add <4 x i64> [[TMP2]], splat (i64 2) +; VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP0]] +; VF4-NEXT: [[TMP5:%.*]] = zext <4 x i32> [[WIDE_LOAD]] to <4 x i64> +; VF4-NEXT: [[TMP6:%.*]] = add <4 x i64> [[TMP5]], splat (i64 2) +; VF4-NEXT: [[TMP7:%.*]] = shufflevector <4 x i64> [[TMP3]], <4 x i64> [[TMP6]], <8 x i32> +; VF4-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i64> [[TMP7]], <8 x i64> poison, <8 x i32> +; VF4-NEXT: store <8 x i64> [[INTERLEAVED_VEC]], ptr [[TMP4]], align 8 +; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; VF4-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF4-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; VF4: [[MIDDLE_BLOCK]]: +; VF4-NEXT: br label %[[EXIT:.*]] +; VF4: [[EXIT]]: +; VF4-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %idx.0 = shl nsw i64 %iv, 1 + %gep.src.0 = getelementptr inbounds i32, ptr %src, i64 %iv + %l.0 = load i32 , ptr %gep.src.0, align 8 + %ext.0 = sext i32 %l.0 to i64 + %add.0 = add i64 %ext.0, 2 + %dst.0 = getelementptr inbounds i64, ptr %dst, i64 %idx.0 + store i64 %add.0, ptr %dst.0, align 8 + %idx.1 = add i64 %idx.0, 1 + %dst.1 = getelementptr inbounds i64, ptr %dst, i64 %idx.1 + %ext.1 = zext i32 %l.0 to i64 + %add.1 = add i64 %ext.1, 2 + store i64 %add.1, ptr %dst.1, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 100 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @test_2xi64_add_cast_interleave_group(ptr noalias %dst, ptr %src) { +; VF2-LABEL: define void @test_2xi64_add_cast_interleave_group( +; VF2-SAME: ptr noalias [[DST:%.*]], ptr [[SRC:%.*]]) { +; VF2-NEXT: [[ENTRY:.*:]] +; VF2-NEXT: br label %[[VECTOR_PH:.*]] +; VF2: [[VECTOR_PH]]: +; VF2-NEXT: br label %[[VECTOR_BODY:.*]] +; VF2: [[VECTOR_BODY]]: +; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF2-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]] +; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP1]], align 8 +; VF2-NEXT: [[TMP2:%.*]] = add <2 x i32> [[WIDE_LOAD]], splat (i32 2) +; VF2-NEXT: [[TMP3:%.*]] = zext <2 x i32> [[TMP2]] to <2 x i64> +; VF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP0]] +; VF2-NEXT: [[TMP5:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP3]], <4 x i32> +; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP5]], <4 x i64> poison, <4 x i32> +; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP4]], align 8 +; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VF2-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF2-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; VF2: [[MIDDLE_BLOCK]]: +; VF2-NEXT: br label %[[EXIT:.*]] +; VF2: [[EXIT]]: +; VF2-NEXT: ret void +; +; VF4-LABEL: define void @test_2xi64_add_cast_interleave_group( +; VF4-SAME: ptr noalias [[DST:%.*]], ptr [[SRC:%.*]]) { +; VF4-NEXT: [[ENTRY:.*:]] +; VF4-NEXT: br label %[[VECTOR_PH:.*]] +; VF4: [[VECTOR_PH]]: +; VF4-NEXT: br label %[[VECTOR_BODY:.*]] +; VF4: [[VECTOR_BODY]]: +; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF4-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]] +; VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 8 +; VF4-NEXT: [[TMP2:%.*]] = add <4 x i32> [[WIDE_LOAD]], splat (i32 2) +; VF4-NEXT: [[TMP3:%.*]] = zext <4 x i32> [[TMP2]] to <4 x i64> +; VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP0]] +; VF4-NEXT: [[TMP5:%.*]] = shufflevector <4 x i64> [[TMP3]], <4 x i64> [[TMP3]], <8 x i32> +; VF4-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i64> [[TMP5]], <8 x i64> poison, <8 x i32> +; VF4-NEXT: store <8 x i64> [[INTERLEAVED_VEC]], ptr [[TMP4]], align 8 +; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; VF4-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF4-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; VF4: [[MIDDLE_BLOCK]]: +; VF4-NEXT: br label %[[EXIT:.*]] +; VF4: [[EXIT]]: +; VF4-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %idx.0 = shl nsw i64 %iv, 1 + %gep.src.0 = getelementptr inbounds i32, ptr %src, i64 %iv + %l.0 = load i32 , ptr %gep.src.0, align 8 + %add.0 = add i32 %l.0, 2 + %ext.0 = zext i32 %add.0 to i64 + %dst.0 = getelementptr inbounds i64, ptr %dst, i64 %idx.0 + store i64 %ext.0, ptr %dst.0, align 8 + %idx.1 = add i64 %idx.0, 1 + %add.1 = add i32 %l.0, 2 + %ext.1 = zext i32 %add.1 to i64 + %dst.1 = getelementptr inbounds i64, ptr %dst, i64 %idx.1 + store i64 %ext.1, ptr %dst.1, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 100 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @test_2xi64_mismatching_add_cast_interleave_group(ptr noalias %dst, ptr %src) { +; VF2-LABEL: define void @test_2xi64_mismatching_add_cast_interleave_group( +; VF2-SAME: ptr noalias [[DST:%.*]], ptr [[SRC:%.*]]) { +; VF2-NEXT: [[ENTRY:.*:]] +; VF2-NEXT: br label %[[VECTOR_PH:.*]] +; VF2: [[VECTOR_PH]]: +; VF2-NEXT: br label %[[VECTOR_BODY:.*]] +; VF2: [[VECTOR_BODY]]: +; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF2-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]] +; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP1]], align 8 +; VF2-NEXT: [[TMP2:%.*]] = add <2 x i32> [[WIDE_LOAD]], splat (i32 2) +; VF2-NEXT: [[TMP3:%.*]] = zext <2 x i32> [[TMP2]] to <2 x i64> +; VF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP0]] +; VF2-NEXT: [[TMP5:%.*]] = sub <2 x i32> [[WIDE_LOAD]], splat (i32 2) +; VF2-NEXT: [[TMP6:%.*]] = zext <2 x i32> [[TMP5]] to <2 x i64> +; VF2-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP6]], <4 x i32> +; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> poison, <4 x i32> +; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP4]], align 8 +; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VF2-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF2-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; VF2: [[MIDDLE_BLOCK]]: +; VF2-NEXT: br label %[[EXIT:.*]] +; VF2: [[EXIT]]: +; VF2-NEXT: ret void +; +; VF4-LABEL: define void @test_2xi64_mismatching_add_cast_interleave_group( +; VF4-SAME: ptr noalias [[DST:%.*]], ptr [[SRC:%.*]]) { +; VF4-NEXT: [[ENTRY:.*:]] +; VF4-NEXT: br label %[[VECTOR_PH:.*]] +; VF4: [[VECTOR_PH]]: +; VF4-NEXT: br label %[[VECTOR_BODY:.*]] +; VF4: [[VECTOR_BODY]]: +; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF4-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]] +; VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 8 +; VF4-NEXT: [[TMP2:%.*]] = add <4 x i32> [[WIDE_LOAD]], splat (i32 2) +; VF4-NEXT: [[TMP3:%.*]] = zext <4 x i32> [[TMP2]] to <4 x i64> +; VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP0]] +; VF4-NEXT: [[TMP5:%.*]] = sub <4 x i32> [[WIDE_LOAD]], splat (i32 2) +; VF4-NEXT: [[TMP6:%.*]] = zext <4 x i32> [[TMP5]] to <4 x i64> +; VF4-NEXT: [[TMP7:%.*]] = shufflevector <4 x i64> [[TMP3]], <4 x i64> [[TMP6]], <8 x i32> +; VF4-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i64> [[TMP7]], <8 x i64> poison, <8 x i32> +; VF4-NEXT: store <8 x i64> [[INTERLEAVED_VEC]], ptr [[TMP4]], align 8 +; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; VF4-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF4-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; VF4: [[MIDDLE_BLOCK]]: +; VF4-NEXT: br label %[[EXIT:.*]] +; VF4: [[EXIT]]: +; VF4-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %idx.0 = shl nsw i64 %iv, 1 + %gep.src.0 = getelementptr inbounds i32, ptr %src, i64 %iv + %l.0 = load i32 , ptr %gep.src.0, align 8 + %add.0 = add i32 %l.0, 2 + %ext.0 = zext i32 %add.0 to i64 + %dst.0 = getelementptr inbounds i64, ptr %dst, i64 %idx.0 + store i64 %ext.0, ptr %dst.0, align 8 + %idx.1 = add i64 %idx.0, 1 + %add.1 = sub i32 %l.0, 2 + %ext.1 = zext i32 %add.1 to i64 + %dst.1 = getelementptr inbounds i64, ptr %dst, i64 %idx.1 + store i64 %ext.1, ptr %dst.1, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 100 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @test_2xi64_add_mismatching_cast_interleave_group(ptr noalias %dst, ptr %src) { +; VF2-LABEL: define void @test_2xi64_add_mismatching_cast_interleave_group( +; VF2-SAME: ptr noalias [[DST:%.*]], ptr [[SRC:%.*]]) { +; VF2-NEXT: [[ENTRY:.*:]] +; VF2-NEXT: br label %[[VECTOR_PH:.*]] +; VF2: [[VECTOR_PH]]: +; VF2-NEXT: br label %[[VECTOR_BODY:.*]] +; VF2: [[VECTOR_BODY]]: +; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF2-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]] +; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP1]], align 8 +; VF2-NEXT: [[TMP2:%.*]] = add <2 x i32> [[WIDE_LOAD]], splat (i32 2) +; VF2-NEXT: [[TMP3:%.*]] = zext <2 x i32> [[TMP2]] to <2 x i64> +; VF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP0]] +; VF2-NEXT: [[TMP5:%.*]] = sext <2 x i32> [[TMP2]] to <2 x i64> +; VF2-NEXT: [[TMP6:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP5]], <4 x i32> +; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP6]], <4 x i64> poison, <4 x i32> +; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP4]], align 8 +; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VF2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF2-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; VF2: [[MIDDLE_BLOCK]]: +; VF2-NEXT: br label %[[EXIT:.*]] +; VF2: [[EXIT]]: +; VF2-NEXT: ret void +; +; VF4-LABEL: define void @test_2xi64_add_mismatching_cast_interleave_group( +; VF4-SAME: ptr noalias [[DST:%.*]], ptr [[SRC:%.*]]) { +; VF4-NEXT: [[ENTRY:.*:]] +; VF4-NEXT: br label %[[VECTOR_PH:.*]] +; VF4: [[VECTOR_PH]]: +; VF4-NEXT: br label %[[VECTOR_BODY:.*]] +; VF4: [[VECTOR_BODY]]: +; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF4-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]] +; VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 8 +; VF4-NEXT: [[TMP2:%.*]] = add <4 x i32> [[WIDE_LOAD]], splat (i32 2) +; VF4-NEXT: [[TMP3:%.*]] = zext <4 x i32> [[TMP2]] to <4 x i64> +; VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP0]] +; VF4-NEXT: [[TMP5:%.*]] = sext <4 x i32> [[TMP2]] to <4 x i64> +; VF4-NEXT: [[TMP6:%.*]] = shufflevector <4 x i64> [[TMP3]], <4 x i64> [[TMP5]], <8 x i32> +; VF4-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i64> [[TMP6]], <8 x i64> poison, <8 x i32> +; VF4-NEXT: store <8 x i64> [[INTERLEAVED_VEC]], ptr [[TMP4]], align 8 +; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; VF4-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF4-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; VF4: [[MIDDLE_BLOCK]]: +; VF4-NEXT: br label %[[EXIT:.*]] +; VF4: [[EXIT]]: +; VF4-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %idx.0 = shl nsw i64 %iv, 1 + %gep.src.0 = getelementptr inbounds i32, ptr %src, i64 %iv + %l.0 = load i32 , ptr %gep.src.0, align 8 + %add.0 = add i32 %l.0, 2 + %ext.0 = zext i32 %add.0 to i64 + %dst.0 = getelementptr inbounds i64, ptr %dst, i64 %idx.0 + store i64 %ext.0, ptr %dst.0, align 8 + %idx.1 = add i64 %idx.0, 1 + %add.1 = add i32 %l.0, 2 + %ext.1 = sext i32 %add.1 to i64 + %dst.1 = getelementptr inbounds i64, ptr %dst, i64 %idx.1 + store i64 %ext.1, ptr %dst.1, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 100 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @test_2xi64_sub_mismatching_ops_cast_interleave_group(ptr noalias %dst, ptr %src) { +; VF2-LABEL: define void @test_2xi64_sub_mismatching_ops_cast_interleave_group( +; VF2-SAME: ptr noalias [[DST:%.*]], ptr [[SRC:%.*]]) { +; VF2-NEXT: [[ENTRY:.*:]] +; VF2-NEXT: br label %[[VECTOR_PH:.*]] +; VF2: [[VECTOR_PH]]: +; VF2-NEXT: br label %[[VECTOR_BODY:.*]] +; VF2: [[VECTOR_BODY]]: +; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF2-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]] +; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP1]], align 8 +; VF2-NEXT: [[TMP2:%.*]] = sub <2 x i32> [[WIDE_LOAD]], splat (i32 2) +; VF2-NEXT: [[TMP3:%.*]] = zext <2 x i32> [[TMP2]] to <2 x i64> +; VF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP0]] +; VF2-NEXT: [[TMP5:%.*]] = sub <2 x i32> splat (i32 2), [[WIDE_LOAD]] +; VF2-NEXT: [[TMP6:%.*]] = zext <2 x i32> [[TMP5]] to <2 x i64> +; VF2-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP6]], <4 x i32> +; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> poison, <4 x i32> +; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP4]], align 8 +; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VF2-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF2-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; VF2: [[MIDDLE_BLOCK]]: +; VF2-NEXT: br label %[[EXIT:.*]] +; VF2: [[EXIT]]: +; VF2-NEXT: ret void +; +; VF4-LABEL: define void @test_2xi64_sub_mismatching_ops_cast_interleave_group( +; VF4-SAME: ptr noalias [[DST:%.*]], ptr [[SRC:%.*]]) { +; VF4-NEXT: [[ENTRY:.*:]] +; VF4-NEXT: br label %[[VECTOR_PH:.*]] +; VF4: [[VECTOR_PH]]: +; VF4-NEXT: br label %[[VECTOR_BODY:.*]] +; VF4: [[VECTOR_BODY]]: +; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF4-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]] +; VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 8 +; VF4-NEXT: [[TMP2:%.*]] = sub <4 x i32> [[WIDE_LOAD]], splat (i32 2) +; VF4-NEXT: [[TMP3:%.*]] = zext <4 x i32> [[TMP2]] to <4 x i64> +; VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP0]] +; VF4-NEXT: [[TMP5:%.*]] = sub <4 x i32> splat (i32 2), [[WIDE_LOAD]] +; VF4-NEXT: [[TMP6:%.*]] = zext <4 x i32> [[TMP5]] to <4 x i64> +; VF4-NEXT: [[TMP7:%.*]] = shufflevector <4 x i64> [[TMP3]], <4 x i64> [[TMP6]], <8 x i32> +; VF4-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i64> [[TMP7]], <8 x i64> poison, <8 x i32> +; VF4-NEXT: store <8 x i64> [[INTERLEAVED_VEC]], ptr [[TMP4]], align 8 +; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; VF4-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF4-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; VF4: [[MIDDLE_BLOCK]]: +; VF4-NEXT: br label %[[EXIT:.*]] +; VF4: [[EXIT]]: +; VF4-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %idx.0 = shl nsw i64 %iv, 1 + %gep.src.0 = getelementptr inbounds i32, ptr %src, i64 %iv + %l.0 = load i32 , ptr %gep.src.0, align 8 + %add.0 = sub i32 %l.0, 2 + %ext.0 = zext i32 %add.0 to i64 + %dst.0 = getelementptr inbounds i64, ptr %dst, i64 %idx.0 + store i64 %ext.0, ptr %dst.0, align 8 + %idx.1 = add i64 %idx.0, 1 + %add.1 = sub i32 2, %l.0 + %ext.1 = zext i32 %add.1 to i64 + %dst.1 = getelementptr inbounds i64, ptr %dst, i64 %idx.1 + store i64 %ext.1, ptr %dst.1, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 100 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/interleave_vec.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/interleave_vec.ll index f2ae327778f4a..54b7f2afe1ed0 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/interleave_vec.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/interleave_vec.ll @@ -925,20 +925,20 @@ define void @same_op8_splat(ptr noalias noundef %a, ptr noundef %b, ptr noundef ; CHECK-SAME: ptr noalias noundef captures(none) [[A:%.*]], ptr noundef readonly captures(none) [[B:%.*]], ptr noundef readonly captures(none) [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[C]], align 4 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i64 0 -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <32 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 3 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x float>, ptr [[TMP5]], align 4 +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <32 x float>, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[WIDE_VEC19:%.*]] = load <16 x float>, ptr [[TMP6]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <16 x float> [[WIDE_VEC]], [[TMP1]] -; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = fadd fast <16 x float> [[WIDE_VEC19]], [[TMP4]] -; CHECK-NEXT: store <16 x float> [[INTERLEAVED_VEC]], ptr [[TMP6]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[WIDE_VEC19:%.*]] = load <32 x float>, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <32 x float> [[WIDE_VEC]], [[TMP1]] +; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = fadd fast <32 x float> [[WIDE_VEC19]], [[TMP4]] +; CHECK-NEXT: store <32 x float> [[INTERLEAVED_VEC]], ptr [[TMP6]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 144 ; CHECK-NEXT: br i1 [[TMP25]], label %[[FOR_END11:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[FOR_END11]]: diff --git a/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll b/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll index d818335f075e5..e48c2b46a138a 100644 --- a/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll +++ b/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll @@ -141,5 +141,5 @@ return: ;. ; CHECK: [[PROF0]] = !{!"function_entry_count", i32 10} ; CHECK: [[PROF1]] = !{!"branch_weights", i32 58, i32 5} -; CHECK: [[PROF2]] = !{!"branch_weights", i32 56, i32 5} +; CHECK: [[PROF2]] = !{!"branch_weights", i32 53, i32 5} ;. diff --git a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/basic.ll b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/basic.ll index c005316f07f06..4c8c829a59f3c 100644 --- a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/basic.ll +++ b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/basic.ll @@ -10,10 +10,15 @@ define float @sinf(float %x) { ret float %x } +; CHECK: declare void @_Unwind_Resume(...) + +; CHECK: declare void @__umodti3(...) + ; CHECK: declare void @acosf(...) -; CHECK: declare nofpclass(ninf nsub nnorm) float @sqrtf(float) [[SQRT_ATTRS:#[0-9]+]] ; CHECK: declare nofpclass(ninf nsub nnorm) double @sqrt(double) [[SQRT_ATTRS:#[0-9]+]] -; CHECK: declare void @__umodti3(...) +; CHECK: declare nofpclass(ninf nsub nnorm) float @sqrtf(float) [[SQRT_ATTRS:#[0-9]+]] + +; CHECK: declare void @truncl(...) diff --git a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/sincos_stret.ll b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/sincos_stret.ll index f0f09e97d9dba..57cb016bcb7f3 100644 --- a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/sincos_stret.ll +++ b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/sincos_stret.ll @@ -7,14 +7,14 @@ ; RUN: %if arm-registered-target %{ opt -S -passes=declare-runtime-libcalls -mtriple=armv7-apple-ios6 < %s | FileCheck -check-prefix=NONE %s %} ; RUN: %if x86-registered-target %{ opt -S -passes=declare-runtime-libcalls -mtriple=x86_64-apple-macos10.8 < %s | FileCheck -check-prefix=NONE %s %} -; X64: declare <2 x float> @__sincosf_stret(float) [[SINCOS_ATTRS:#[0-9]+]] ; X64: declare { double, double } @__sincos_stret(double) [[SINCOS_ATTRS:#[0-9]+]] +; X64: declare <2 x float> @__sincosf_stret(float) [[SINCOS_ATTRS:#[0-9]+]] -; STRUCT: declare { float, float } @__sincosf_stret(float) [[SINCOS_ATTRS:#[0-9]+]] ; STRUCT: declare { double, double } @__sincos_stret(double) [[SINCOS_ATTRS:#[0-9]+]] +; STRUCT: declare { float, float } @__sincosf_stret(float) [[SINCOS_ATTRS:#[0-9]+]] -; SRET: declare void @__sincosf_stret(ptr sret({ float, float }) align 4, float) [[SINCOS_ATTRS:#[0-9]+]] ; SRET: declare void @__sincos_stret(ptr sret({ double, double }) align 4, double) [[SINCOS_ATTRS:#[0-9]+]] +; SRET: declare void @__sincosf_stret(ptr sret({ float, float }) align 4, float) [[SINCOS_ATTRS:#[0-9]+]] ; CHECK: attributes [[SINCOS_ATTRS]] = { nocallback nofree nosync nounwind willreturn memory(errnomem: write) } ; SRET: attributes [[SINCOS_ATTRS]] = { nocallback nofree nosync nounwind willreturn memory(argmem: write, errnomem: write) } diff --git a/llvm/utils/TableGen/Basic/CMakeLists.txt b/llvm/utils/TableGen/Basic/CMakeLists.txt index b4a66ecce6440..2030e9add7f30 100644 --- a/llvm/utils/TableGen/Basic/CMakeLists.txt +++ b/llvm/utils/TableGen/Basic/CMakeLists.txt @@ -16,6 +16,7 @@ add_llvm_library(LLVMTableGenBasic OBJECT EXCLUDE_FROM_ALL DISABLE_LLVM_LINK_LLV IntrinsicEmitter.cpp RISCVTargetDefEmitter.cpp RuntimeLibcallsEmitter.cpp + RuntimeLibcalls.cpp SDNodeProperties.cpp TableGen.cpp TargetFeaturesEmitter.cpp diff --git a/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp b/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp index d33bf45595e2e..0bb743dc8a7f5 100644 --- a/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp +++ b/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp @@ -359,7 +359,6 @@ static void emitDirectivesDecl(const RecordKeeper &Records, raw_ostream &OS) { OS << " static constexpr bool is_iterable = true;\n"; OS << "};\n"; } - LlvmNS.close(); } // Given a list of spellings (for a given clause/directive), order them @@ -931,27 +930,20 @@ static void generateClauseSet(ArrayRef VerClauses, // Generate an enum set for the 4 kinds of clauses linked to a directive. static void generateDirectiveClauseSets(const DirectiveLanguage &DirLang, Frontend FE, raw_ostream &OS) { + IfDefEmitter Scope(OS, "GEN_" + getFESpelling(FE).upper() + + "_DIRECTIVE_CLAUSE_SETS"); - std::string IfDefName{"GEN_"}; - IfDefName += getFESpelling(FE).upper(); - IfDefName += "_DIRECTIVE_CLAUSE_SETS"; - IfDefEmitter Scope(OS, IfDefName); - - StringRef Namespace = - getFESpelling(FE == Frontend::Flang ? Frontend::LLVM : FE); + std::string Namespace = + getFESpelling(FE == Frontend::Flang ? Frontend::LLVM : FE).str(); // The namespace has to be different for clang vs flang, as 2 structs with the // same name but different layout is UB. So just put the 'clang' on in the // clang namespace. - OS << "namespace " << Namespace << " {\n"; - - // Open namespaces defined in the directive language. - SmallVector Namespaces; - SplitString(DirLang.getCppNamespace(), Namespaces, "::"); - for (auto Ns : Namespaces) - OS << "namespace " << Ns << " {\n"; + // Additionally, open namespaces defined in the directive language. + if (!DirLang.getCppNamespace().empty()) + Namespace += "::" + DirLang.getCppNamespace().str(); + NamespaceEmitter NS(OS, Namespace); for (const Directive Dir : DirLang.getDirectives()) { - OS << "\n"; OS << "// Sets for " << Dir.getSpellingForIdentifier() << "\n"; generateClauseSet(Dir.getAllowedClauses(), OS, "allowedClauses_", Dir, @@ -963,12 +955,6 @@ static void generateDirectiveClauseSets(const DirectiveLanguage &DirLang, generateClauseSet(Dir.getRequiredClauses(), OS, "requiredClauses_", Dir, DirLang, FE); } - - // Closing namespaces - for (auto Ns : reverse(Namespaces)) - OS << "} // namespace " << Ns << "\n"; - - OS << "} // namespace " << Namespace << "\n"; } // Generate a map of directive (key) with DirectiveClauses struct as values. @@ -976,10 +962,8 @@ static void generateDirectiveClauseSets(const DirectiveLanguage &DirLang, // allowances (allowed, allowed once, allowed exclusive and required). static void generateDirectiveClauseMap(const DirectiveLanguage &DirLang, Frontend FE, raw_ostream &OS) { - std::string IfDefName{"GEN_"}; - IfDefName += getFESpelling(FE).upper(); - IfDefName += "_DIRECTIVE_CLAUSE_MAP"; - IfDefEmitter Scope(OS, IfDefName); + IfDefEmitter Scope(OS, "GEN_" + getFESpelling(FE).upper() + + "_DIRECTIVE_CLAUSE_MAP"); OS << "{\n"; diff --git a/llvm/utils/TableGen/Basic/RuntimeLibcalls.cpp b/llvm/utils/TableGen/Basic/RuntimeLibcalls.cpp new file mode 100644 index 0000000000000..1e609a2a8880b --- /dev/null +++ b/llvm/utils/TableGen/Basic/RuntimeLibcalls.cpp @@ -0,0 +1,93 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "RuntimeLibcalls.h" +#include "llvm/TableGen/Error.h" + +using namespace llvm; + +RuntimeLibcalls::RuntimeLibcalls(const RecordKeeper &Records) { + ArrayRef AllRuntimeLibcalls = + Records.getAllDerivedDefinitions("RuntimeLibcall"); + + RuntimeLibcallDefList.reserve(AllRuntimeLibcalls.size()); + + size_t CallTypeEnumVal = 0; + for (const Record *RuntimeLibcallDef : AllRuntimeLibcalls) { + RuntimeLibcallDefList.emplace_back(RuntimeLibcallDef, CallTypeEnumVal++); + Def2RuntimeLibcall[RuntimeLibcallDef] = &RuntimeLibcallDefList.back(); + } + + for (RuntimeLibcall &LibCall : RuntimeLibcallDefList) + Def2RuntimeLibcall[LibCall.getDef()] = &LibCall; + + ArrayRef AllRuntimeLibcallImplsRaw = + Records.getAllDerivedDefinitions("RuntimeLibcallImpl"); + + SmallVector AllRuntimeLibcallImpls( + AllRuntimeLibcallImplsRaw); + + // Sort by libcall impl name and secondarily by the enum name. + sort(AllRuntimeLibcallImpls, [](const Record *A, const Record *B) { + return std::pair(A->getValueAsString("LibCallFuncName"), A->getName()) < + std::pair(B->getValueAsString("LibCallFuncName"), B->getName()); + }); + + RuntimeLibcallImplDefList.reserve(AllRuntimeLibcallImpls.size()); + + size_t LibCallImplEnumVal = 1; + for (const Record *LibCallImplDef : AllRuntimeLibcallImpls) { + RuntimeLibcallImplDefList.emplace_back(LibCallImplDef, Def2RuntimeLibcall, + LibCallImplEnumVal++); + + const RuntimeLibcallImpl &LibCallImpl = RuntimeLibcallImplDefList.back(); + Def2RuntimeLibcallImpl[LibCallImplDef] = &LibCallImpl; + + if (LibCallImpl.isDefault()) { + const RuntimeLibcall *Provides = LibCallImpl.getProvides(); + if (!Provides) + PrintFatalError(LibCallImplDef->getLoc(), + "default implementations must provide a libcall"); + LibCallToDefaultImpl[Provides] = &LibCallImpl; + } + } +} + +void LibcallPredicateExpander::expand(SetTheory &ST, const Record *Def, + SetTheory::RecSet &Elts) { + assert(Def->isSubClassOf("LibcallImpls")); + + SetTheory::RecSet TmpElts; + + ST.evaluate(Def->getValueInit("MemberList"), TmpElts, Def->getLoc()); + + Elts.insert(TmpElts.begin(), TmpElts.end()); + + AvailabilityPredicate AP(Def->getValueAsDef("AvailabilityPredicate")); + const Record *CCClass = Def->getValueAsOptionalDef("CallingConv"); + + // This is assuming we aren't conditionally applying a calling convention to + // some subsets, and not another, but this doesn't appear to be used. + + for (const Record *LibcallImplDef : TmpElts) { + const RuntimeLibcallImpl *LibcallImpl = + Libcalls.getRuntimeLibcallImpl(LibcallImplDef); + if (!AP.isAlwaysAvailable() || CCClass) { + auto [It, Inserted] = Func2Preds.insert({LibcallImpl, {{}, CCClass}}); + if (!Inserted) { + PrintError( + Def, + "combining nested libcall set predicates currently unhandled: '" + + LibcallImpl->getLibcallFuncName() + "'"); + } + + It->second.first.push_back(AP.getDef()); + It->second.second = CCClass; + } + } +} diff --git a/llvm/utils/TableGen/Basic/RuntimeLibcalls.h b/llvm/utils/TableGen/Basic/RuntimeLibcalls.h new file mode 100644 index 0000000000000..6c9897602b2fa --- /dev/null +++ b/llvm/utils/TableGen/Basic/RuntimeLibcalls.h @@ -0,0 +1,189 @@ +//===------------------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_UTILS_TABLEGEN_COMMON_RUNTIMELIBCALLS_H +#define LLVM_UTILS_TABLEGEN_COMMON_RUNTIMELIBCALLS_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/TableGen/Record.h" +#include "llvm/TableGen/SetTheory.h" + +namespace llvm { + +class AvailabilityPredicate { + const Record *TheDef; + StringRef PredicateString; + +public: + AvailabilityPredicate(const Record *Def) : TheDef(Def) { + if (TheDef) + PredicateString = TheDef->getValueAsString("Cond"); + } + + const Record *getDef() const { return TheDef; } + + bool isAlwaysAvailable() const { return PredicateString.empty(); } + + void emitIf(raw_ostream &OS) const { + OS << "if (" << PredicateString << ") {\n"; + } + + void emitEndIf(raw_ostream &OS) const { OS << "}\n"; } + + void emitTableVariableNameSuffix(raw_ostream &OS) const { + if (TheDef) + OS << '_' << TheDef->getName(); + } +}; + +class RuntimeLibcalls; +class RuntimeLibcallImpl; + +/// Used to apply predicates to nested sets of libcalls. +struct LibcallPredicateExpander : SetTheory::Expander { + const RuntimeLibcalls &Libcalls; + DenseMap, const Record *>> &Func2Preds; + + LibcallPredicateExpander( + const RuntimeLibcalls &Libcalls, + DenseMap, const Record *>> + &Func2Preds) + : Libcalls(Libcalls), Func2Preds(Func2Preds) {} + + void expand(SetTheory &ST, const Record *Def, + SetTheory::RecSet &Elts) override; +}; + +class RuntimeLibcall { + const Record *TheDef = nullptr; + const size_t EnumVal; + +public: + RuntimeLibcall() = delete; + RuntimeLibcall(const Record *Def, size_t EnumVal) + : TheDef(Def), EnumVal(EnumVal) { + assert(Def); + } + + ~RuntimeLibcall() { assert(TheDef); } + + const Record *getDef() const { return TheDef; } + + StringRef getName() const { return TheDef->getName(); } + + size_t getEnumVal() const { return EnumVal; } + + void emitEnumEntry(raw_ostream &OS) const { + OS << "RTLIB::" << TheDef->getValueAsString("Name"); + } +}; + +class RuntimeLibcallImpl { + const Record *TheDef; + const RuntimeLibcall *Provides = nullptr; + const size_t EnumVal; + +public: + RuntimeLibcallImpl( + const Record *Def, + const DenseMap &ProvideMap, + size_t EnumVal) + : TheDef(Def), EnumVal(EnumVal) { + if (const Record *ProvidesDef = Def->getValueAsDef("Provides")) + Provides = ProvideMap.lookup(ProvidesDef); + } + + ~RuntimeLibcallImpl() = default; + + const Record *getDef() const { return TheDef; } + + StringRef getName() const { return TheDef->getName(); } + + size_t getEnumVal() const { return EnumVal; } + + const RuntimeLibcall *getProvides() const { return Provides; } + + StringRef getLibcallFuncName() const { + return TheDef->getValueAsString("LibCallFuncName"); + } + + const Record *getCallingConv() const { + return TheDef->getValueAsOptionalDef("CallingConv"); + } + + void emitQuotedLibcallFuncName(raw_ostream &OS) const { + OS << '\"' << getLibcallFuncName() << '\"'; + } + + bool isDefault() const { return TheDef->getValueAsBit("IsDefault"); } + + void emitEnumEntry(raw_ostream &OS) const { + OS << "RTLIB::impl_" << this->getName(); + } + + void emitSetImplCall(raw_ostream &OS) const { + OS << "setLibcallImpl("; + Provides->emitEnumEntry(OS); + OS << ", "; + emitEnumEntry(OS); + OS << "); // " << getLibcallFuncName() << '\n'; + } + + void emitTableEntry(raw_ostream &OS) const { + OS << '{'; + Provides->emitEnumEntry(OS); + OS << ", "; + emitEnumEntry(OS); + OS << "}, // " << getLibcallFuncName() << '\n'; + } + + void emitSetCallingConv(raw_ostream &OS) const {} +}; + +struct LibcallsWithCC { + std::vector LibcallImpls; + const Record *CallingConv = nullptr; +}; + +class RuntimeLibcalls { +private: + DenseMap Def2RuntimeLibcall; + DenseMap Def2RuntimeLibcallImpl; + + std::vector RuntimeLibcallDefList; + std::vector RuntimeLibcallImplDefList; + + DenseMap + LibCallToDefaultImpl; + +public: + RuntimeLibcalls(const RecordKeeper &Records); + + ArrayRef getRuntimeLibcallDefList() const { + return RuntimeLibcallDefList; + } + + ArrayRef getRuntimeLibcallImplDefList() const { + return RuntimeLibcallImplDefList; + } + + const RuntimeLibcall *getRuntimeLibcall(const Record *Def) const { + return Def2RuntimeLibcall.lookup(Def); + } + + const RuntimeLibcallImpl *getRuntimeLibcallImpl(const Record *Def) const { + return Def2RuntimeLibcallImpl.lookup(Def); + } +}; + +} // namespace llvm + +#endif // LLVM_UTILS_TABLEGEN_COMMON_RUNTIMELIBCALLS_H diff --git a/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp b/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp index 6a36f471678bf..7aca87a63d0a2 100644 --- a/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp +++ b/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp @@ -8,6 +8,8 @@ #define DEBUG_TYPE "runtime-libcall-emitter" +#include "RuntimeLibcalls.h" + #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Debug.h" @@ -65,160 +67,12 @@ template <> struct DenseMapInfo { return LHS == RHS; } }; -} // namespace llvm - -namespace { - -class AvailabilityPredicate { - const Record *TheDef; - StringRef PredicateString; - -public: - AvailabilityPredicate(const Record *Def) : TheDef(Def) { - if (TheDef) - PredicateString = TheDef->getValueAsString("Cond"); - } - - const Record *getDef() const { return TheDef; } - - bool isAlwaysAvailable() const { return PredicateString.empty(); } - - void emitIf(raw_ostream &OS) const { - OS << "if (" << PredicateString << ") {\n"; - } - - void emitEndIf(raw_ostream &OS) const { OS << "}\n"; } - - void emitTableVariableNameSuffix(raw_ostream &OS) const { - if (TheDef) - OS << '_' << TheDef->getName(); - } -}; - -class RuntimeLibcallEmitter; -class RuntimeLibcallImpl; - -/// Used to apply predicates to nested sets of libcalls. -struct LibcallPredicateExpander : SetTheory::Expander { - const RuntimeLibcallEmitter &LibcallEmitter; - DenseMap, const Record *>> &Func2Preds; - - LibcallPredicateExpander( - const RuntimeLibcallEmitter &LibcallEmitter, - DenseMap, const Record *>> - &Func2Preds) - : LibcallEmitter(LibcallEmitter), Func2Preds(Func2Preds) {} - - void expand(SetTheory &ST, const Record *Def, - SetTheory::RecSet &Elts) override; -}; - -class RuntimeLibcall { - const Record *TheDef = nullptr; - const size_t EnumVal; - -public: - RuntimeLibcall() = delete; - RuntimeLibcall(const Record *Def, size_t EnumVal) - : TheDef(Def), EnumVal(EnumVal) { - assert(Def); - } - - ~RuntimeLibcall() { assert(TheDef); } - - const Record *getDef() const { return TheDef; } - - StringRef getName() const { return TheDef->getName(); } - - size_t getEnumVal() const { return EnumVal; } - - void emitEnumEntry(raw_ostream &OS) const { - OS << "RTLIB::" << TheDef->getValueAsString("Name"); - } -}; - -class RuntimeLibcallImpl { - const Record *TheDef; - const RuntimeLibcall *Provides = nullptr; - const size_t EnumVal; - -public: - RuntimeLibcallImpl( - const Record *Def, - const DenseMap &ProvideMap, - size_t EnumVal) - : TheDef(Def), EnumVal(EnumVal) { - if (const Record *ProvidesDef = Def->getValueAsDef("Provides")) - Provides = ProvideMap.lookup(ProvidesDef); - } - - ~RuntimeLibcallImpl() = default; - - const Record *getDef() const { return TheDef; } - - StringRef getName() const { return TheDef->getName(); } - - size_t getEnumVal() const { return EnumVal; } - - const RuntimeLibcall *getProvides() const { return Provides; } - - StringRef getLibcallFuncName() const { - return TheDef->getValueAsString("LibCallFuncName"); - } - - const Record *getCallingConv() const { - return TheDef->getValueAsOptionalDef("CallingConv"); - } - - void emitQuotedLibcallFuncName(raw_ostream &OS) const { - OS << '\"' << getLibcallFuncName() << '\"'; - } - - bool isDefault() const { return TheDef->getValueAsBit("IsDefault"); } - - void emitEnumEntry(raw_ostream &OS) const { - OS << "RTLIB::impl_" << this->getName(); - } - - void emitSetImplCall(raw_ostream &OS) const { - OS << "setLibcallImpl("; - Provides->emitEnumEntry(OS); - OS << ", "; - emitEnumEntry(OS); - OS << "); // " << getLibcallFuncName() << '\n'; - } - - void emitTableEntry(raw_ostream &OS) const { - OS << '{'; - Provides->emitEnumEntry(OS); - OS << ", "; - emitEnumEntry(OS); - OS << "}, // " << getLibcallFuncName() << '\n'; - } - - void emitSetCallingConv(raw_ostream &OS) const {} -}; - -struct LibcallsWithCC { - std::vector LibcallImpls; - const Record *CallingConv = nullptr; -}; class RuntimeLibcallEmitter { private: const RecordKeeper &Records; - DenseMap Def2RuntimeLibcall; - DenseMap Def2RuntimeLibcallImpl; + RuntimeLibcalls Libcalls; - std::vector RuntimeLibcallDefList; - std::vector RuntimeLibcallImplDefList; - - DenseMap - LibCallToDefaultImpl; - -private: void emitGetRuntimeLibcallEnum(raw_ostream &OS) const; void emitNameMatchHashTable(raw_ostream &OS, @@ -229,61 +83,7 @@ class RuntimeLibcallEmitter { void emitSystemRuntimeLibrarySetCalls(raw_ostream &OS) const; public: - RuntimeLibcallEmitter(const RecordKeeper &R) : Records(R) { - - ArrayRef AllRuntimeLibcalls = - Records.getAllDerivedDefinitions("RuntimeLibcall"); - - RuntimeLibcallDefList.reserve(AllRuntimeLibcalls.size()); - - size_t CallTypeEnumVal = 0; - for (const Record *RuntimeLibcallDef : AllRuntimeLibcalls) { - RuntimeLibcallDefList.emplace_back(RuntimeLibcallDef, CallTypeEnumVal++); - Def2RuntimeLibcall[RuntimeLibcallDef] = &RuntimeLibcallDefList.back(); - } - - for (RuntimeLibcall &LibCall : RuntimeLibcallDefList) - Def2RuntimeLibcall[LibCall.getDef()] = &LibCall; - - ArrayRef AllRuntimeLibcallImplsRaw = - Records.getAllDerivedDefinitions("RuntimeLibcallImpl"); - - SmallVector AllRuntimeLibcallImpls( - AllRuntimeLibcallImplsRaw); - - // Sort by libcall impl name and secondarily by the enum name. - sort(AllRuntimeLibcallImpls, [](const Record *A, const Record *B) { - return std::pair(A->getValueAsString("LibCallFuncName"), A->getName()) < - std::pair(B->getValueAsString("LibCallFuncName"), B->getName()); - }); - - RuntimeLibcallImplDefList.reserve(AllRuntimeLibcallImpls.size()); - - size_t LibCallImplEnumVal = 1; - for (const Record *LibCallImplDef : AllRuntimeLibcallImpls) { - RuntimeLibcallImplDefList.emplace_back(LibCallImplDef, Def2RuntimeLibcall, - LibCallImplEnumVal++); - - const RuntimeLibcallImpl &LibCallImpl = RuntimeLibcallImplDefList.back(); - Def2RuntimeLibcallImpl[LibCallImplDef] = &LibCallImpl; - - if (LibCallImpl.isDefault()) { - const RuntimeLibcall *Provides = LibCallImpl.getProvides(); - if (!Provides) - PrintFatalError(LibCallImplDef->getLoc(), - "default implementations must provide a libcall"); - LibCallToDefaultImpl[Provides] = &LibCallImpl; - } - } - } - - const RuntimeLibcall *getRuntimeLibcall(const Record *Def) const { - return Def2RuntimeLibcall.lookup(Def); - } - - const RuntimeLibcallImpl *getRuntimeLibcallImpl(const Record *Def) const { - return Def2RuntimeLibcallImpl.lookup(Def); - } + RuntimeLibcallEmitter(const RecordKeeper &R) : Records(R), Libcalls(R) {} void run(raw_ostream &OS); }; @@ -297,24 +97,25 @@ void RuntimeLibcallEmitter::emitGetRuntimeLibcallEnum(raw_ostream &OS) const { "namespace RTLIB {\n" "enum Libcall : unsigned short {\n"; - for (const RuntimeLibcall &LibCall : RuntimeLibcallDefList) { + for (const RuntimeLibcall &LibCall : Libcalls.getRuntimeLibcallDefList()) { StringRef Name = LibCall.getName(); OS << " " << Name << " = " << LibCall.getEnumVal() << ",\n"; } - OS << " UNKNOWN_LIBCALL = " << RuntimeLibcallDefList.size() + OS << " UNKNOWN_LIBCALL = " << Libcalls.getRuntimeLibcallDefList().size() << "\n};\n\n" "enum LibcallImpl : unsigned short {\n" " Unsupported = 0,\n"; - for (const RuntimeLibcallImpl &LibCall : RuntimeLibcallImplDefList) { + for (const RuntimeLibcallImpl &LibCall : + Libcalls.getRuntimeLibcallImplDefList()) { OS << " impl_" << LibCall.getName() << " = " << LibCall.getEnumVal() << ", // " << LibCall.getLibcallFuncName() << '\n'; } OS << "};\n" << "constexpr size_t NumLibcallImpls = " - << RuntimeLibcallImplDefList.size() + 1 + << Libcalls.getRuntimeLibcallImplDefList().size() + 1 << ";\n" "} // End namespace RTLIB\n" "} // End namespace llvm\n"; @@ -394,6 +195,8 @@ constructPerfectHashTable(ArrayRef Keywords, /// Generate hash table based lookup by name. void RuntimeLibcallEmitter::emitNameMatchHashTable( raw_ostream &OS, StringToOffsetTable &OffsetTable) const { + ArrayRef RuntimeLibcallImplDefList = + Libcalls.getRuntimeLibcallImplDefList(); std::vector Hashes(RuntimeLibcallImplDefList.size()); std::vector TableValues(RuntimeLibcallImplDefList.size()); DenseSet SeenFuncNames; @@ -495,7 +298,8 @@ void RuntimeLibcallEmitter::emitGetInitRuntimeLibcallNames( { IfDefEmitter IfDef(OS, "GET_INIT_RUNTIME_LIBCALL_NAMES"); - for (const RuntimeLibcallImpl &LibCallImpl : RuntimeLibcallImplDefList) + for (const RuntimeLibcallImpl &LibCallImpl : + Libcalls.getRuntimeLibcallImplDefList()) Table.GetOrAddStringOffset(LibCallImpl.getLibcallFuncName()); Table.EmitStringTableDef(OS, "RuntimeLibcallImplNameTable"); @@ -505,7 +309,8 @@ const uint16_t RTLIB::RuntimeLibcallsInfo::RuntimeLibcallNameOffsetTable[] = { OS << formatv(" {}, // {}\n", Table.GetStringOffset(""), ""); // Unsupported entry - for (const RuntimeLibcallImpl &LibCallImpl : RuntimeLibcallImplDefList) { + for (const RuntimeLibcallImpl &LibCallImpl : + Libcalls.getRuntimeLibcallImplDefList()) { StringRef ImplName = LibCallImpl.getLibcallFuncName(); OS << formatv(" {}, // {}\n", Table.GetStringOffset(ImplName), ImplName); } @@ -516,7 +321,8 @@ const uint8_t RTLIB::RuntimeLibcallsInfo::RuntimeLibcallNameSizeTable[] = { )"; OS << " 0,\n"; - for (const RuntimeLibcallImpl &LibCallImpl : RuntimeLibcallImplDefList) + for (const RuntimeLibcallImpl &LibCallImpl : + Libcalls.getRuntimeLibcallImplDefList()) OS << " " << LibCallImpl.getLibcallFuncName().size() << ",\n"; OS << "};\n\n"; @@ -525,7 +331,8 @@ const uint8_t RTLIB::RuntimeLibcallsInfo::RuntimeLibcallNameSizeTable[] = { "ImplToLibcall[RTLIB::NumLibcallImpls] = {\n" " RTLIB::UNKNOWN_LIBCALL, // RTLIB::Unsupported\n"; - for (const RuntimeLibcallImpl &LibCallImpl : RuntimeLibcallImplDefList) { + for (const RuntimeLibcallImpl &LibCallImpl : + Libcalls.getRuntimeLibcallImplDefList()) { const RuntimeLibcall *Provides = LibCallImpl.getProvides(); OS << " "; Provides->emitEnumEntry(OS); @@ -533,6 +340,7 @@ const uint8_t RTLIB::RuntimeLibcallsInfo::RuntimeLibcallNameSizeTable[] = { LibCallImpl.emitEnumEntry(OS); OS << '\n'; } + OS << "};\n\n"; } @@ -544,11 +352,8 @@ void RuntimeLibcallEmitter::emitSystemRuntimeLibrarySetCalls( OS << "void llvm::RTLIB::RuntimeLibcallsInfo::setTargetRuntimeLibcallSets(" "const llvm::Triple &TT, ExceptionHandling ExceptionModel, " "FloatABI::ABIType FloatABI, EABI EABIVersion, " - "StringRef ABIName) {\n" - " struct LibcallImplPair {\n" - " RTLIB::Libcall Func;\n" - " RTLIB::LibcallImpl Impl;\n" - " };\n"; + "StringRef ABIName) {\n"; + ArrayRef AllLibs = Records.getAllDerivedDefinitions("SystemRuntimeLibrary"); @@ -579,7 +384,7 @@ void RuntimeLibcallEmitter::emitSystemRuntimeLibrarySetCalls( std::pair, const Record *>> Func2Preds; Sets.addExpander("LibcallImpls", std::make_unique( - *this, Func2Preds)); + Libcalls, Func2Preds)); const SetTheory::RecVec *Elements = Sets.expand(R->getValueAsDef("MemberList")); @@ -592,11 +397,12 @@ void RuntimeLibcallEmitter::emitSystemRuntimeLibrarySetCalls( constexpr unsigned BitsPerStorageElt = 64; DenseMap Pred2Funcs; - SmallVector BitsetValues( - divideCeil(RuntimeLibcallImplDefList.size() + 1, BitsPerStorageElt)); + SmallVector BitsetValues(divideCeil( + Libcalls.getRuntimeLibcallImplDefList().size() + 1, BitsPerStorageElt)); for (const Record *Elt : *Elements) { - const RuntimeLibcallImpl *LibCallImpl = getRuntimeLibcallImpl(Elt); + const RuntimeLibcallImpl *LibCallImpl = + Libcalls.getRuntimeLibcallImpl(Elt); if (!LibCallImpl) { PrintError(R, "entry for SystemLibrary is not a RuntimeLibcallImpl"); PrintNote(Elt->getLoc(), "invalid entry `" + Elt->getName() + "`"); @@ -703,7 +509,7 @@ void RuntimeLibcallEmitter::emitSystemRuntimeLibrarySetCalls( Funcs.erase(UniqueI, Funcs.end()); OS << indent(IndentDepth + 2) - << "static const LibcallImplPair LibraryCalls"; + << "static const RTLIB::LibcallImpl LibraryCalls"; SubsetPredicate.emitTableVariableNameSuffix(OS); if (FuncsWithCC.CallingConv) OS << '_' << FuncsWithCC.CallingConv->getName(); @@ -711,18 +517,18 @@ void RuntimeLibcallEmitter::emitSystemRuntimeLibrarySetCalls( OS << "[] = {\n"; for (const RuntimeLibcallImpl *LibCallImpl : Funcs) { OS << indent(IndentDepth + 6); - LibCallImpl->emitTableEntry(OS); + LibCallImpl->emitEnumEntry(OS); + OS << ", // " << LibCallImpl->getLibcallFuncName() << '\n'; } OS << indent(IndentDepth + 2) << "};\n\n" << indent(IndentDepth + 2) - << "for (const auto [Func, Impl] : LibraryCalls"; + << "for (const RTLIB::LibcallImpl Impl : LibraryCalls"; SubsetPredicate.emitTableVariableNameSuffix(OS); if (FuncsWithCC.CallingConv) OS << '_' << FuncsWithCC.CallingConv->getName(); - OS << ") {\n" - << indent(IndentDepth + 4) << "setLibcallImpl(Func, Impl);\n"; + OS << ") {\n" << indent(IndentDepth + 4) << "setAvailable(Impl);\n"; if (FuncsWithCC.CallingConv) { StringRef CCEnum = @@ -759,44 +565,10 @@ void RuntimeLibcallEmitter::run(raw_ostream &OS) { emitGetInitRuntimeLibcallNames(OS); { - IfDefEmitter IfDef(OS, "GET_SET_TARGET_RUNTIME_LIBCALL_SETS"); + IfDefEmitter IfDef(OS, "GET_RUNTIME_LIBCALLS_INFO"); emitSystemRuntimeLibrarySetCalls(OS); } } -void LibcallPredicateExpander::expand(SetTheory &ST, const Record *Def, - SetTheory::RecSet &Elts) { - assert(Def->isSubClassOf("LibcallImpls")); - - SetTheory::RecSet TmpElts; - - ST.evaluate(Def->getValueInit("MemberList"), TmpElts, Def->getLoc()); - - Elts.insert(TmpElts.begin(), TmpElts.end()); - - AvailabilityPredicate AP(Def->getValueAsDef("AvailabilityPredicate")); - const Record *CCClass = Def->getValueAsOptionalDef("CallingConv"); - - // This is assuming we aren't conditionally applying a calling convention to - // some subsets, and not another, but this doesn't appear to be used. - - for (const Record *LibcallImplDef : TmpElts) { - const RuntimeLibcallImpl *LibcallImpl = - LibcallEmitter.getRuntimeLibcallImpl(LibcallImplDef); - if (!AP.isAlwaysAvailable() || CCClass) { - auto [It, Inserted] = Func2Preds.insert({LibcallImpl, {{}, CCClass}}); - if (!Inserted) { - PrintError( - Def, - "combining nested libcall set predicates currently unhandled: '" + - LibcallImpl->getLibcallFuncName() + "'"); - } - - It->second.first.push_back(AP.getDef()); - It->second.second = CCClass; - } - } -} - static TableGen::Emitter::OptClass X("gen-runtime-libcalls", "Generate RuntimeLibcalls"); diff --git a/llvm/utils/TableGen/SDNodeInfoEmitter.cpp b/llvm/utils/TableGen/SDNodeInfoEmitter.cpp index 64f03dae83e7d..dd18d29e6c676 100644 --- a/llvm/utils/TableGen/SDNodeInfoEmitter.cpp +++ b/llvm/utils/TableGen/SDNodeInfoEmitter.cpp @@ -10,6 +10,7 @@ #include "Common/CodeGenDAGPatterns.h" // For SDNodeInfo. #include "llvm/Support/CommandLine.h" #include "llvm/Support/FormatVariadic.h" +#include "llvm/TableGen/CodeGenHelpers.h" #include "llvm/TableGen/Error.h" #include "llvm/TableGen/StringToOffsetTable.h" #include "llvm/TableGen/TableGenBackend.h" @@ -129,9 +130,8 @@ SDNodeInfoEmitter::SDNodeInfoEmitter(const RecordKeeper &RK) } void SDNodeInfoEmitter::emitEnum(raw_ostream &OS) const { - OS << "#ifdef GET_SDNODE_ENUM\n"; - OS << "#undef GET_SDNODE_ENUM\n\n"; - OS << "namespace llvm::" << TargetSDNodeNamespace << " {\n\n"; + IfDefEmitter IfDef(OS, "GET_SDNODE_ENUM"); + NamespaceEmitter NS(OS, "llvm::" + TargetSDNodeNamespace); if (!NodesByName.empty()) { StringRef FirstName = NodesByName.begin()->first; @@ -145,14 +145,11 @@ void SDNodeInfoEmitter::emitEnum(raw_ostream &OS) const { OS << "};\n\n"; OS << "static constexpr unsigned GENERATED_OPCODE_END = " << LastName - << " + 1;\n\n"; + << " + 1;\n"; } else { OS << "static constexpr unsigned GENERATED_OPCODE_END = " - "ISD::BUILTIN_OP_END;\n\n"; + "ISD::BUILTIN_OP_END;\n"; } - - OS << "} // namespace llvm::" << TargetSDNodeNamespace << "\n\n"; - OS << "#endif // GET_SDNODE_ENUM\n\n"; } std::vector SDNodeInfoEmitter::emitNodeNames(raw_ostream &OS) const { @@ -324,9 +321,8 @@ static void emitDesc(raw_ostream &OS, StringRef EnumName, void SDNodeInfoEmitter::emitDescs(raw_ostream &OS) const { StringRef TargetName = Target.getName(); - OS << "#ifdef GET_SDNODE_DESC\n"; - OS << "#undef GET_SDNODE_DESC\n\n"; - OS << "namespace llvm {\n"; + IfDefEmitter IfDef(OS, "GET_SDNODE_DESC"); + NamespaceEmitter NS(OS, "llvm"); std::vector NameOffsets = emitNodeNames(OS); std::vector> ConstraintOffsetsAndCounts = @@ -343,11 +339,8 @@ void SDNodeInfoEmitter::emitDescs(raw_ostream &OS) const { OS << formatv("static const SDNodeInfo {0}GenSDNodeInfo(\n" " /*NumOpcodes=*/{1}, {0}SDNodeDescs,\n" - " {0}SDNodeNames, {0}SDTypeConstraints);\n\n", + " {0}SDNodeNames, {0}SDTypeConstraints);\n", TargetName, NodesByName.size()); - - OS << "} // namespace llvm\n\n"; - OS << "#endif // GET_SDNODE_DESC\n\n"; } void SDNodeInfoEmitter::run(raw_ostream &OS) const { diff --git a/llvm/utils/gn/secondary/bolt/lib/Passes/BUILD.gn b/llvm/utils/gn/secondary/bolt/lib/Passes/BUILD.gn index 393309ee39bfe..a261f2866be47 100644 --- a/llvm/utils/gn/secondary/bolt/lib/Passes/BUILD.gn +++ b/llvm/utils/gn/secondary/bolt/lib/Passes/BUILD.gn @@ -12,7 +12,7 @@ static_library("Passes") { "//llvm/utils/gn/build/libs/pthread", ] sources = [ - "ADRRelaxationPass.cpp", + "AArch64RelaxationPass.cpp", "Aligner.cpp", "AllocCombiner.cpp", "AsmDump.cpp", diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/bugprone/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/bugprone/BUILD.gn index 2f84999621e1b..3c3fdf7e16885 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/bugprone/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/bugprone/BUILD.gn @@ -82,6 +82,7 @@ static_library("bugprone") { "SmartPtrArrayMismatchCheck.cpp", "SpuriouslyWakeUpFunctionsCheck.cpp", "StandaloneEmptyCheck.cpp", + "StdNamespaceModificationCheck.cpp", "StringConstructorCheck.cpp", "StringIntegerAssignmentCheck.cpp", "StringLiteralWithEmbeddedNulCheck.cpp", diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/cert/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/cert/BUILD.gn index ec642b6afad66..1eae289143b5b 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/cert/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/cert/BUILD.gn @@ -16,7 +16,6 @@ static_library("cert") { ] sources = [ "CERTTidyModule.cpp", - "DontModifyStdNamespaceCheck.cpp", "FloatLoopCounter.cpp", "LimitedRandomnessCheck.cpp", "MutatingCopyCheck.cpp", diff --git a/llvm/utils/gn/secondary/clang/unittests/CodeGen/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/CodeGen/BUILD.gn index 065fc6cdd74a3..bd8d9610c2a4a 100644 --- a/llvm/utils/gn/secondary/clang/unittests/CodeGen/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/unittests/CodeGen/BUILD.gn @@ -17,6 +17,7 @@ unittest("ClangCodeGenTests") { "BufferSourceTest.cpp", "CheckTargetFeaturesTest.cpp", "CodeGenExternalTest.cpp", + "DemangleTrapReasonInDebugInfo.cpp", "TBAAMetadataTest.cpp", ] } diff --git a/llvm/utils/gn/secondary/lldb/source/Plugins/LanguageRuntime/CPlusPlus/BUILD.gn b/llvm/utils/gn/secondary/lldb/source/Plugins/LanguageRuntime/CPlusPlus/BUILD.gn index 9848efef70568..fa99fa8649caf 100644 --- a/llvm/utils/gn/secondary/lldb/source/Plugins/LanguageRuntime/CPlusPlus/BUILD.gn +++ b/llvm/utils/gn/secondary/lldb/source/Plugins/LanguageRuntime/CPlusPlus/BUILD.gn @@ -1,10 +1,16 @@ static_library("CPlusPlus") { output_name = "lldbPluginCPPRuntime" - configs += [ "//llvm/utils/gn/build:lldb_code" ] + configs += [ + "//llvm/utils/gn/build:clang_code", + "//llvm/utils/gn/build:lldb_code", + ] deps = [ "//lldb/source/Core", "//lldb/source/Symbol", "//lldb/source/Target", ] - sources = [ "CPPLanguageRuntime.cpp" ] + sources = [ + "CPPLanguageRuntime.cpp", + "VerboseTrapFrameRecognizer.cpp", + ] } diff --git a/llvm/utils/gn/secondary/lldb/source/Target/BUILD.gn b/llvm/utils/gn/secondary/lldb/source/Target/BUILD.gn index a863baf912051..783eb96283596 100644 --- a/llvm/utils/gn/secondary/lldb/source/Target/BUILD.gn +++ b/llvm/utils/gn/secondary/lldb/source/Target/BUILD.gn @@ -105,6 +105,5 @@ static_library("Target") { "UnixSignals.cpp", "UnwindAssembly.cpp", "UnwindLLDB.cpp", - "VerboseTrapFrameRecognizer.cpp", ] } diff --git a/llvm/utils/gn/secondary/llvm/lib/BinaryFormat/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/BinaryFormat/BUILD.gn index 1a890f6733597..a234d2be67f66 100644 --- a/llvm/utils/gn/secondary/llvm/lib/BinaryFormat/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/BinaryFormat/BUILD.gn @@ -12,7 +12,6 @@ static_library("BinaryFormat") { "ELF.cpp", "MachO.cpp", "Magic.cpp", - "Minidump.cpp", "MsgPackDocument.cpp", "MsgPackDocumentYAML.cpp", "MsgPackReader.cpp", diff --git a/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn index 444670212cafb..eb41df208941a 100644 --- a/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn @@ -88,6 +88,7 @@ static_library("CodeGen") { "LatencyPriorityQueue.cpp", "LazyMachineBlockFrequencyInfo.cpp", "LexicalScopes.cpp", + "LibcallLoweringInfo.cpp", "LiveDebugValues/InstrRefBasedImpl.cpp", "LiveDebugValues/LiveDebugValues.cpp", "LiveDebugValues/VarLocBasedImpl.cpp", diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn index a1f5b475e2096..ad72c0069237d 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn @@ -151,6 +151,7 @@ static_library("LLVMRISCVCodeGen") { "RISCVMoveMerger.cpp", "RISCVOptWInstrs.cpp", "RISCVPostRAExpandPseudoInsts.cpp", + "RISCVPromoteConstant.cpp", "RISCVPushPopOptimizer.cpp", "RISCVRedundantCopyElimination.cpp", "RISCVRegisterInfo.cpp", diff --git a/llvm/utils/gn/secondary/llvm/utils/TableGen/Basic/BUILD.gn b/llvm/utils/gn/secondary/llvm/utils/TableGen/Basic/BUILD.gn index 43916cef756ff..918132b38b6ed 100644 --- a/llvm/utils/gn/secondary/llvm/utils/TableGen/Basic/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/utils/TableGen/Basic/BUILD.gn @@ -10,6 +10,7 @@ source_set("Basic") { "DirectiveEmitter.cpp", "IntrinsicEmitter.cpp", "RISCVTargetDefEmitter.cpp", + "RuntimeLibcalls.cpp", "RuntimeLibcallsEmitter.cpp", "SDNodeProperties.cpp", "TableGen.cpp", diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td index 10f0cc254ea97..80bc0e5986e51 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -949,7 +949,7 @@ def NVVM_MBarrierTestWaitOp : NVVM_Op<"mbarrier.test.wait">, }]; string llvmBuilder = [{ - auto [id, args] = NVVM::MBarrierArriveNocompleteOp::getIntrinsicIDAndArgs( + auto [id, args] = NVVM::MBarrierTestWaitOp::getIntrinsicIDAndArgs( *op, moduleTranslation, builder); $res = createIntrinsicCall(builder, id, args); }]; diff --git a/mlir/test/Target/LLVMIR/nvvm/mbarriers.mlir b/mlir/test/Target/LLVMIR/nvvm/mbarriers.mlir new file mode 100644 index 0000000000000..9bb3b082777fd --- /dev/null +++ b/mlir/test/Target/LLVMIR/nvvm/mbarriers.mlir @@ -0,0 +1,116 @@ +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s + +llvm.func @cp_async_mbarrier_arrive(%bar_shared: !llvm.ptr<3>, %bar_gen: !llvm.ptr) { + // CHECK-LABEL: define void @cp_async_mbarrier_arrive(ptr addrspace(3) %0, ptr %1) { + // CHECK-NEXT: call void @llvm.nvvm.cp.async.mbarrier.arrive(ptr %1) + // CHECK-NEXT: call void @llvm.nvvm.cp.async.mbarrier.arrive.noinc(ptr %1) + // CHECK-NEXT: call void @llvm.nvvm.cp.async.mbarrier.arrive.shared(ptr addrspace(3) %0) + // CHECK-NEXT: call void @llvm.nvvm.cp.async.mbarrier.arrive.noinc.shared(ptr addrspace(3) %0) + // CHECK-NEXT: ret void + // CHECK-NEXT: } + nvvm.cp.async.mbarrier.arrive %bar_gen : !llvm.ptr + nvvm.cp.async.mbarrier.arrive %bar_gen {noinc = true} : !llvm.ptr + nvvm.cp.async.mbarrier.arrive %bar_shared : !llvm.ptr<3> + nvvm.cp.async.mbarrier.arrive %bar_shared {noinc = true} : !llvm.ptr<3> + llvm.return +} + +llvm.func @mbarrier_init_generic(%barrier: !llvm.ptr) { + // CHECK-LABEL: define void @mbarrier_init_generic(ptr %0) { + // CHECK-NEXT: %2 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() + // CHECK-NEXT: call void @llvm.nvvm.mbarrier.init(ptr %0, i32 %2) + // CHECK-NEXT: ret void + // CHECK-NEXT: } + %count = nvvm.read.ptx.sreg.ntid.x : i32 + nvvm.mbarrier.init %barrier, %count : !llvm.ptr, i32 + llvm.return +} + +llvm.func @mbarrier_init_shared(%barrier: !llvm.ptr<3>) { + // CHECK-LABEL: define void @mbarrier_init_shared(ptr addrspace(3) %0) { + // CHECK-NEXT: %2 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() + // CHECK-NEXT: call void @llvm.nvvm.mbarrier.init.shared(ptr addrspace(3) %0, i32 %2) + // CHECK-NEXT: ret void + // CHECK-NEXT: } + %count = nvvm.read.ptx.sreg.ntid.x : i32 + nvvm.mbarrier.init %barrier, %count : !llvm.ptr<3>, i32 + llvm.return +} + +llvm.func @mbarrier_inval_generic(%barrier: !llvm.ptr) { + // CHECK-LABEL: define void @mbarrier_inval_generic(ptr %0) { + // CHECK-NEXT: call void @llvm.nvvm.mbarrier.inval(ptr %0) + // CHECK-NEXT: ret void + // CHECK-NEXT: } + nvvm.mbarrier.inval %barrier : !llvm.ptr + llvm.return +} + +llvm.func @mbarrier_inval_shared(%barrier: !llvm.ptr<3>) { + // CHECK-LABEL: define void @mbarrier_inval_shared(ptr addrspace(3) %0) { + // CHECK-NEXT: call void @llvm.nvvm.mbarrier.inval.shared(ptr addrspace(3) %0) + // CHECK-NEXT: ret void + // CHECK-NEXT: } + nvvm.mbarrier.inval %barrier : !llvm.ptr<3> + llvm.return +} + +llvm.func @mbarrier_arrive(%barrier: !llvm.ptr) { + // CHECK-LABEL: define void @mbarrier_arrive(ptr %0) { + // CHECK-NEXT: %2 = call i64 @llvm.nvvm.mbarrier.arrive(ptr %0) + // CHECK-NEXT: ret void + // CHECK-NEXT: } + %0 = nvvm.mbarrier.arrive %barrier : !llvm.ptr -> i64 + llvm.return +} + +llvm.func @mbarrier_arrive_shared(%barrier: !llvm.ptr<3>) { + // CHECK-LABEL: define void @mbarrier_arrive_shared(ptr addrspace(3) %0) { + // CHECK-NEXT: %2 = call i64 @llvm.nvvm.mbarrier.arrive.shared(ptr addrspace(3) %0) + // CHECK-NEXT: ret void + // CHECK-NEXT: } + %0 = nvvm.mbarrier.arrive %barrier : !llvm.ptr<3> -> i64 + llvm.return +} + +llvm.func @mbarrier_arrive_nocomplete(%barrier: !llvm.ptr) { + // CHECK-LABEL: define void @mbarrier_arrive_nocomplete(ptr %0) { + // CHECK-NEXT: %2 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() + // CHECK-NEXT: %3 = call i64 @llvm.nvvm.mbarrier.arrive.noComplete(ptr %0, i32 %2) + // CHECK-NEXT: ret void + // CHECK-NEXT: } + %count = nvvm.read.ptx.sreg.ntid.x : i32 + %0 = nvvm.mbarrier.arrive.nocomplete %barrier, %count : !llvm.ptr, i32 -> i64 + llvm.return +} + +llvm.func @mbarrier_arrive_nocomplete_shared(%barrier: !llvm.ptr<3>) { + // CHECK-LABEL: define void @mbarrier_arrive_nocomplete_shared(ptr addrspace(3) %0) { + // CHECK-NEXT: %2 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() + // CHECK-NEXT: %3 = call i64 @llvm.nvvm.mbarrier.arrive.noComplete.shared(ptr addrspace(3) %0, i32 %2) + // CHECK-NEXT: ret void + // CHECK-NEXT: } + %count = nvvm.read.ptx.sreg.ntid.x : i32 + %0 = nvvm.mbarrier.arrive.nocomplete %barrier, %count : !llvm.ptr<3>, i32 -> i64 + llvm.return +} + +llvm.func @mbarrier_test_wait(%barrier: !llvm.ptr, %token : i64) -> i1 { + // CHECK-LABEL: define i1 @mbarrier_test_wait(ptr %0, i64 %1) { + // CHECK-NEXT: %3 = call i1 @llvm.nvvm.mbarrier.test.wait(ptr %0, i64 %1) + // CHECK-NEXT: ret i1 %3 + // CHECK-NEXT: } + %isComplete = nvvm.mbarrier.test.wait %barrier, %token : !llvm.ptr, i64 -> i1 + llvm.return %isComplete : i1 +} + +llvm.func @mbarrier_test_wait_shared(%barrier: !llvm.ptr<3>, %token : i64) { + // CHECK-LABEL: define void @mbarrier_test_wait_shared(ptr addrspace(3) %0, i64 %1) { + // CHECK-NEXT: %3 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() + // CHECK-NEXT: %4 = call i1 @llvm.nvvm.mbarrier.test.wait.shared(ptr addrspace(3) %0, i64 %1) + // CHECK-NEXT: ret void + // CHECK-NEXT: } + %count = nvvm.read.ptx.sreg.ntid.x : i32 + %isComplete = nvvm.mbarrier.test.wait %barrier, %token : !llvm.ptr<3>, i64 -> i1 + llvm.return +} diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir index 3fc09f371a347..1ec55408e97a5 100644 --- a/mlir/test/Target/LLVMIR/nvvmir.mlir +++ b/mlir/test/Target/LLVMIR/nvvmir.mlir @@ -531,19 +531,6 @@ llvm.func @async_cp_zfill(%dst: !llvm.ptr<3>, %src: !llvm.ptr<1>, %cpSize: i32) llvm.return } -// CHECK-LABEL: @cp_async_mbarrier_arrive -llvm.func @cp_async_mbarrier_arrive(%bar_shared: !llvm.ptr<3>, %bar_gen: !llvm.ptr) { - // CHECK: call void @llvm.nvvm.cp.async.mbarrier.arrive(ptr %{{.*}}) - nvvm.cp.async.mbarrier.arrive %bar_gen : !llvm.ptr - // CHECK: call void @llvm.nvvm.cp.async.mbarrier.arrive.noinc(ptr %{{.*}}) - nvvm.cp.async.mbarrier.arrive %bar_gen {noinc = true} : !llvm.ptr - // CHECK: call void @llvm.nvvm.cp.async.mbarrier.arrive.shared(ptr addrspace(3) %{{.*}}) - nvvm.cp.async.mbarrier.arrive %bar_shared : !llvm.ptr<3> - // CHECK: call void @llvm.nvvm.cp.async.mbarrier.arrive.noinc.shared(ptr addrspace(3) %{{.*}}) - nvvm.cp.async.mbarrier.arrive %bar_shared {noinc = true} : !llvm.ptr<3> - llvm.return -} - // CHECK-LABEL: @llvm_nvvm_setmaxregister llvm.func @llvm_nvvm_setmaxregister() { // CHECK: call void @llvm.nvvm.setmaxnreg.inc.sync.aligned.u32(i32 256) diff --git a/polly/lib/Transform/ScheduleOptimizer.cpp b/polly/lib/Transform/ScheduleOptimizer.cpp index cb08397c201f2..f01d3decd9a1c 100644 --- a/polly/lib/Transform/ScheduleOptimizer.cpp +++ b/polly/lib/Transform/ScheduleOptimizer.cpp @@ -932,13 +932,14 @@ static void runIslScheduleOptimizer( POLLY_DEBUG(dbgs() << "Schedule optimizer calculation exceeds ISL quota\n"); return; } else if (isl_ctx_last_error(Ctx) != isl_error_none) { - const char *File = isl_ctx_last_error_file(Ctx); - int Line = isl_ctx_last_error_line(Ctx); - const char *Msg = isl_ctx_last_error_msg(Ctx); - POLLY_DEBUG( - dbgs() - << "ISL reported an error during the computation of a new schedule at " - << File << ":" << Line << ": " << Msg); + POLLY_DEBUG({ + const char *File = isl_ctx_last_error_file(Ctx); + int Line = isl_ctx_last_error_line(Ctx); + const char *Msg = isl_ctx_last_error_msg(Ctx); + dbgs() << "ISL reported an error during the computation of a new " + "schedule at " + << File << ":" << Line << ": " << Msg; + }); isl_ctx_reset_error(Ctx); return; } else if (Schedule.is_null()) { diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index 8d225d63cdf3e..b65fe64acdea0 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -1805,6 +1805,7 @@ libc_support_library( ":__support_cpp_optional", ":__support_macros_attributes", ":__support_macros_config", + ":types_wchar_t", ":types_wint_t", ], ) @@ -1859,6 +1860,7 @@ libc_function( hdrs = ["src/ctype/isalnum.h"], deps = [ ":__support_common", + ":__support_cpp_limits", ":__support_ctype_utils", ], ) @@ -1869,6 +1871,7 @@ libc_function( hdrs = ["src/ctype/isalpha.h"], deps = [ ":__support_common", + ":__support_cpp_limits", ":__support_ctype_utils", ], ) @@ -1909,6 +1912,7 @@ libc_function( hdrs = ["src/ctype/isdigit.h"], deps = [ ":__support_common", + ":__support_cpp_limits", ":__support_ctype_utils", ], ) @@ -1919,6 +1923,7 @@ libc_function( hdrs = ["src/ctype/isgraph.h"], deps = [ ":__support_common", + ":__support_cpp_limits", ":__support_ctype_utils", ], ) @@ -1929,6 +1934,7 @@ libc_function( hdrs = ["src/ctype/islower.h"], deps = [ ":__support_common", + ":__support_cpp_limits", ":__support_ctype_utils", ], ) @@ -1949,6 +1955,7 @@ libc_function( hdrs = ["src/ctype/ispunct.h"], deps = [ ":__support_common", + ":__support_cpp_limits", ":__support_ctype_utils", ], ) @@ -1959,6 +1966,7 @@ libc_function( hdrs = ["src/ctype/isspace.h"], deps = [ ":__support_common", + ":__support_cpp_limits", ":__support_ctype_utils", ], ) @@ -1969,6 +1977,7 @@ libc_function( hdrs = ["src/ctype/isupper.h"], deps = [ ":__support_common", + ":__support_cpp_limits", ":__support_ctype_utils", ], ) @@ -1979,6 +1988,7 @@ libc_function( hdrs = ["src/ctype/isxdigit.h"], deps = [ ":__support_common", + ":__support_cpp_limits", ":__support_ctype_utils", ], ) @@ -1999,6 +2009,7 @@ libc_function( hdrs = ["src/ctype/tolower.h"], deps = [ ":__support_common", + ":__support_cpp_limits", ":__support_ctype_utils", ], ) @@ -2009,6 +2020,7 @@ libc_function( hdrs = ["src/ctype/toupper.h"], deps = [ ":__support_common", + ":__support_cpp_limits", ":__support_ctype_utils", ], )