diff --git a/clang-tools-extra/clang-tidy/misc/CoroutineHostileRAIICheck.cpp b/clang-tools-extra/clang-tidy/misc/CoroutineHostileRAIICheck.cpp index a2d3d3ff1512d..a7b74944690b4 100644 --- a/clang-tools-extra/clang-tidy/misc/CoroutineHostileRAIICheck.cpp +++ b/clang-tools-extra/clang-tidy/misc/CoroutineHostileRAIICheck.cpp @@ -73,7 +73,9 @@ CoroutineHostileRAIICheck::CoroutineHostileRAIICheck(StringRef Name, RAIITypesList(utils::options::parseStringList( Options.get("RAIITypesList", "std::lock_guard;std::scoped_lock"))), AllowedAwaitablesList(utils::options::parseStringList( - Options.get("AllowedAwaitablesList", ""))) {} + Options.get("AllowedAwaitablesList", ""))), + AllowedCallees( + utils::options::parseStringList(Options.get("AllowedCallees", ""))) {} void CoroutineHostileRAIICheck::registerMatchers(MatchFinder *Finder) { // A suspension happens with co_await or co_yield. @@ -81,7 +83,9 @@ void CoroutineHostileRAIICheck::registerMatchers(MatchFinder *Finder) { hasAttr(attr::Kind::ScopedLockable))))) .bind("scoped-lockable"); auto OtherRAII = varDecl(typeWithNameIn(RAIITypesList)).bind("raii"); - auto AllowedSuspend = awaitable(typeWithNameIn(AllowedAwaitablesList)); + auto AllowedSuspend = awaitable( + anyOf(typeWithNameIn(AllowedAwaitablesList), + callExpr(callee(functionDecl(hasAnyName(AllowedCallees)))))); Finder->addMatcher( expr(anyOf(coawaitExpr(unless(AllowedSuspend)), coyieldExpr()), forEachPrevStmt( @@ -111,5 +115,7 @@ void CoroutineHostileRAIICheck::storeOptions( utils::options::serializeStringList(RAIITypesList)); Options.store(Opts, "SafeAwaitableList", utils::options::serializeStringList(AllowedAwaitablesList)); + Options.store(Opts, "SafeCallees", + utils::options::serializeStringList(AllowedCallees)); } } // namespace clang::tidy::misc diff --git a/clang-tools-extra/clang-tidy/misc/CoroutineHostileRAIICheck.h b/clang-tools-extra/clang-tidy/misc/CoroutineHostileRAIICheck.h index 768b62ef07f90..12ad1b1e0e220 100644 --- a/clang-tools-extra/clang-tidy/misc/CoroutineHostileRAIICheck.h +++ b/clang-tools-extra/clang-tidy/misc/CoroutineHostileRAIICheck.h @@ -46,6 +46,9 @@ class CoroutineHostileRAIICheck : public ClangTidyCheck { // List of fully qualified awaitable types which are considered safe to // co_await. std::vector AllowedAwaitablesList; + // List of callees whose return values are considered safe to directly + // co_await. + std::vector AllowedCallees; }; } // namespace clang::tidy::misc diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 8637a9ab6d9f6..f25c4cacdacb7 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -423,6 +423,11 @@ Changes in existing checks positives on return of non-const pointer and fix false positives on pointer-to-member operator. +- Improved :doc:`misc-coroutine-hostile-raii + ` check by adding the option + `AllowedCallees`, that allows exempting safely awaitable callees from the + check. + - Improved :doc:`misc-header-include-cycle ` check performance. diff --git a/clang-tools-extra/docs/clang-tidy/checks/misc/coroutine-hostile-raii.rst b/clang-tools-extra/docs/clang-tidy/checks/misc/coroutine-hostile-raii.rst index 0b054e4e20bd6..be80d39e4abf9 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/misc/coroutine-hostile-raii.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/misc/coroutine-hostile-raii.rst @@ -81,3 +81,23 @@ Options Eg: `my::safe::awaitable;other::awaitable` Default is an empty string. +.. option:: AllowedCallees + + A semicolon-separated list of callee function names which can + be safely awaited while having hostile RAII objects in scope. + Example usage: + + .. code-block:: c++ + + // Consider option AllowedCallees = "noop" + task noop() { co_return; } + + task coro() { + // This persists across the co_await but is not flagged + // because the awaitable is considered safe to await on. + const std::lock_guard l(&mu_); + co_await noop(); + } + + Eg: `my::safe::await;other::await` + Default is an empty string. diff --git a/clang-tools-extra/test/clang-tidy/checkers/misc/coroutine-hostile-raii.cpp b/clang-tools-extra/test/clang-tidy/checkers/misc/coroutine-hostile-raii.cpp index c23c355dac1b2..ec6ddec56e1f2 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/misc/coroutine-hostile-raii.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/misc/coroutine-hostile-raii.cpp @@ -1,7 +1,8 @@ // RUN: %check_clang_tidy -std=c++20 %s misc-coroutine-hostile-raii %t \ // RUN: -config="{CheckOptions: {\ // RUN: misc-coroutine-hostile-raii.RAIITypesList: 'my::Mutex; ::my::other::Mutex', \ -// RUN: misc-coroutine-hostile-raii.AllowedAwaitablesList: 'safe::awaitable; ::transformable::awaitable' \ +// RUN: misc-coroutine-hostile-raii.AllowedAwaitablesList: 'safe::awaitable; ::transformable::awaitable', \ +// RUN: misc-coroutine-hostile-raii.AllowedCallees: 'safe::AwaitFunc; ::safe::Obj::AwaitMethod' \ // RUN: }}" namespace std { @@ -145,12 +146,18 @@ namespace safe { void await_suspend(std::coroutine_handle<>) noexcept {} void await_resume() noexcept {} }; + std::suspend_always AwaitFunc(); + struct Obj { + std::suspend_always AwaitMethod(); + }; } // namespace safe ReturnObject RAIISafeSuspendTest() { absl::Mutex a; co_await safe::awaitable{}; using other = safe::awaitable; co_await other{}; + co_await safe::AwaitFunc(); + co_await safe::Obj().AwaitMethod(); } // ================================================================================ diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h index 3288f5b12c77e..6c7e3d055456a 100644 --- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h +++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h @@ -465,6 +465,15 @@ class CIRBaseBuilderTy : public mlir::OpBuilder { return createCompare(ptr.getLoc(), cir::CmpOpKind::eq, ptr, nullPtr); } + mlir::Value createAddrSpaceCast(mlir::Location loc, mlir::Value src, + mlir::Type newTy) { + return createCast(loc, cir::CastKind::address_space, src, newTy); + } + + mlir::Value createAddrSpaceCast(mlir::Value src, mlir::Type newTy) { + return createAddrSpaceCast(src.getLoc(), src, newTy); + } + //===--------------------------------------------------------------------===// // Binary Operators //===--------------------------------------------------------------------===// diff --git a/clang/include/clang/CIR/Dialect/IR/CIRTypes.h b/clang/include/clang/CIR/Dialect/IR/CIRTypes.h index 45f646f1c9dfa..939e774a6ea67 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIRTypes.h +++ b/clang/include/clang/CIR/Dialect/IR/CIRTypes.h @@ -13,7 +13,9 @@ #ifndef CLANG_CIR_DIALECT_IR_CIRTYPES_H #define CLANG_CIR_DIALECT_IR_CIRTYPES_H +#include "mlir/IR/Attributes.h" #include "mlir/IR/BuiltinAttributes.h" +#include "mlir/IR/MLIRContext.h" #include "mlir/IR/Types.h" #include "mlir/Interfaces/DataLayoutInterfaces.h" #include "clang/Basic/AddressSpaces.h" @@ -38,6 +40,15 @@ bool isValidFundamentalIntWidth(unsigned width); /// void, or abstract types. bool isSized(mlir::Type ty); +//===----------------------------------------------------------------------===// +// AddressSpace helpers +//===----------------------------------------------------------------------===// +cir::TargetAddressSpaceAttr toCIRTargetAddressSpace(mlir::MLIRContext &context, + clang::LangAS langAS); + +bool isMatchingAddressSpace(cir::TargetAddressSpaceAttr cirAS, + clang::LangAS as); + } // namespace cir //===----------------------------------------------------------------------===// diff --git a/clang/include/clang/Frontend/CompilerInvocation.h b/clang/include/clang/Frontend/CompilerInvocation.h index e147d2ba6087e..51787d914e1ec 100644 --- a/clang/include/clang/Frontend/CompilerInvocation.h +++ b/clang/include/clang/Frontend/CompilerInvocation.h @@ -147,6 +147,13 @@ class CompilerInvocationBase { } /// @} + /// Visitation. + /// @{ + /// Visits paths stored in the invocation. The callback may return true to + /// short-circuit the visitation, or return false to continue visiting. + void visitPaths(llvm::function_ref Callback) const; + /// @} + /// Command line generation. /// @{ using StringAllocator = llvm::function_ref; @@ -181,6 +188,12 @@ class CompilerInvocationBase { /// This is a (less-efficient) wrapper over generateCC1CommandLine(). std::vector getCC1CommandLine() const; +protected: + /// Visits paths stored in the invocation. This is generally unsafe to call + /// directly, and each sub-class need to ensure calling this doesn't violate + /// its invariants. + void visitPathsImpl(llvm::function_ref Predicate); + private: /// Generate command line options from DiagnosticOptions. static void GenerateDiagnosticArgs(const DiagnosticOptions &Opts, diff --git a/clang/include/clang/Frontend/FrontendOptions.h b/clang/include/clang/Frontend/FrontendOptions.h index c919a53ae089e..ba7da56cb9fce 100644 --- a/clang/include/clang/Frontend/FrontendOptions.h +++ b/clang/include/clang/Frontend/FrontendOptions.h @@ -241,6 +241,8 @@ class FrontendInputFile { /// Whether we're dealing with a 'system' input (vs. a 'user' input). bool IsSystem = false; + friend class CompilerInvocationBase; + public: FrontendInputFile() = default; FrontendInputFile(StringRef File, InputKind Kind, bool IsSystem = false) diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index c18f158525e85..0f868c1c7c5da 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -151,14 +151,14 @@ struct UnsafeQualTypeDenseMapInfo { }; /// An ID number that refers to a macro in an AST file. -using MacroID = uint32_t; +using MacroID = uint64_t; /// A global ID number that refers to a macro in an AST file. -using GlobalMacroID = uint32_t; +using GlobalMacroID = uint64_t; /// A local to a module ID number that refers to a macro in an /// AST file. -using LocalMacroID = uint32_t; +using LocalMacroID = uint64_t; /// The number of predefined macro IDs. const unsigned int NUM_PREDEF_MACRO_IDS = 1; @@ -179,7 +179,7 @@ using CXXCtorInitializersID = uint32_t; /// An ID number that refers to an entity in the detailed /// preprocessing record. -using PreprocessedEntityID = uint32_t; +using PreprocessedEntityID = uint64_t; /// An ID number that refers to a submodule in a module file. using SubmoduleID = uint32_t; diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index af856a8097ab1..4d6fa585ebd45 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -800,14 +800,6 @@ class ASTReader /// files. llvm::DenseSet LoadedUndefs; - using GlobalMacroMapType = - ContinuousRangeMap; - - /// Mapping from global macro IDs to the module in which the - /// macro resides along with the offset that should be added to the - /// global macro ID to produce a local ID. - GlobalMacroMapType GlobalMacroMap; - /// A vector containing submodules that have already been loaded. /// /// This vector is indexed by the Submodule ID (-1). NULL submodule entries @@ -1655,8 +1647,7 @@ class ASTReader /// Returns the first preprocessed entity ID that begins or ends after /// \arg Loc. - serialization::PreprocessedEntityID - findPreprocessedEntity(SourceLocation Loc, bool EndsAfter) const; + unsigned findPreprocessedEntity(SourceLocation Loc, bool EndsAfter) const; /// Find the next module that contains entities and return the ID /// of the first entry. @@ -1664,9 +1655,8 @@ class ASTReader /// \param SLocMapI points at a chunk of a module that contains no /// preprocessed entities or the entities it contains are not the /// ones we are looking for. - serialization::PreprocessedEntityID - findNextPreprocessedEntity( - GlobalSLocOffsetMapType::const_iterator SLocMapI) const; + unsigned findNextPreprocessedEntity( + GlobalSLocOffsetMapType::const_iterator SLocMapI) const; /// Returns (ModuleFile, Local index) pair for \p GlobalIndex of a /// preprocessed entity. @@ -1748,6 +1738,14 @@ class ASTReader std::pair translateIdentifierIDToIndex(serialization::IdentifierID ID) const; + /// Translate an \param MacroID ID to the index of MacrosLoaded + /// array and the corresponding module file. + std::pair + translateMacroIDToIndex(serialization::MacroID ID) const; + + unsigned translatePreprocessedEntityIDToIndex( + serialization::PreprocessedEntityID ID) const; + /// Translate an \param TypeID ID to the index of TypesLoaded /// array and the corresponding module file. std::pair @@ -2163,6 +2161,14 @@ class ASTReader LocalDeclID mapGlobalIDToModuleFileGlobalID(ModuleFile &M, GlobalDeclID GlobalID); + /// Reads a macro ID from the given position in a record in the + /// given module. + /// + /// \returns The declaration ID read from the record, adjusted to a global + /// Macro ID. + serialization::MacroID + ReadMacroID(ModuleFile &F, const RecordDataImpl &Record, unsigned &Idx); + /// Reads a declaration ID from the given position in a record in the /// given module. /// @@ -2388,7 +2394,8 @@ class ASTReader /// Retrieve the global macro ID corresponding to the given local /// ID within the given module file. - serialization::MacroID getGlobalMacroID(ModuleFile &M, unsigned LocalID); + serialization::MacroID getGlobalMacroID(ModuleFile &M, + serialization::MacroID LocalID); /// Read the source location entry with index ID. bool ReadSLocEntry(int ID) override; @@ -2572,8 +2579,8 @@ class ASTReader /// Determine the global preprocessed entity ID that corresponds to /// the given local ID within the given module. - serialization::PreprocessedEntityID - getGlobalPreprocessedEntityID(ModuleFile &M, unsigned LocalID) const; + serialization::PreprocessedEntityID getGlobalPreprocessedEntityID( + ModuleFile &M, serialization::PreprocessedEntityID LocalID) const; /// Add a macro to deserialize its macro directive history. /// diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h index 28c3e55864057..c77c98dffc39f 100644 --- a/clang/include/clang/Serialization/ASTWriter.h +++ b/clang/include/clang/Serialization/ASTWriter.h @@ -782,6 +782,10 @@ class ASTWriter : public ASTDeserializationListener, void AddLookupOffsets(const LookupBlockOffsets &Offsets, RecordDataImpl &Record); + /// Emit a reference to a macro. + void AddMacroRef(MacroInfo *MI, const IdentifierInfo *Name, + RecordDataImpl &Record); + /// Emit a reference to a declaration. void AddDeclRef(const Decl *D, RecordDataImpl &Record); // Emit a reference to a declaration if the declaration was emitted. diff --git a/clang/include/clang/Serialization/ModuleFile.h b/clang/include/clang/Serialization/ModuleFile.h index f20cb2f9f35ae..783e2ba7a1f94 100644 --- a/clang/include/clang/Serialization/ModuleFile.h +++ b/clang/include/clang/Serialization/ModuleFile.h @@ -353,9 +353,6 @@ class ModuleFile { /// Base macro ID for macros local to this module. serialization::MacroID BaseMacroID = 0; - /// Remapping table for macro IDs in this module. - ContinuousRangeMap MacroRemap; - /// The offset of the start of the set of defined macros. uint64_t MacroStartOffset = 0; @@ -372,9 +369,6 @@ class ModuleFile { /// this module. serialization::PreprocessedEntityID BasePreprocessedEntityID = 0; - /// Remapping table for preprocessed entity IDs in this module. - ContinuousRangeMap PreprocessedEntityRemap; - const PPEntityOffset *PreprocessedEntityOffsets = nullptr; unsigned NumPreprocessedEntities = 0; diff --git a/clang/lib/Analysis/LifetimeSafety/Origins.cpp b/clang/lib/Analysis/LifetimeSafety/Origins.cpp index ea51a75324e06..0f2eaa94a5987 100644 --- a/clang/lib/Analysis/LifetimeSafety/Origins.cpp +++ b/clang/lib/Analysis/LifetimeSafety/Origins.cpp @@ -34,6 +34,8 @@ Origin &OriginManager::addOrigin(OriginID ID, const clang::Expr &E) { // TODO: Mark this method as const once we remove the call to getOrCreate. OriginID OriginManager::get(const Expr &E) { + if (auto *ParenIgnored = E.IgnoreParens(); ParenIgnored != &E) + return get(*ParenIgnored); auto It = ExprToOriginID.find(&E); if (It != ExprToOriginID.end()) return It->second; diff --git a/clang/lib/CIR/CodeGen/Address.h b/clang/lib/CIR/CodeGen/Address.h index a67cbad7033a3..c8ce530a7b0d3 100644 --- a/clang/lib/CIR/CodeGen/Address.h +++ b/clang/lib/CIR/CodeGen/Address.h @@ -16,9 +16,11 @@ #include "mlir/IR/Value.h" #include "clang/AST/CharUnits.h" +#include "clang/CIR/Dialect/IR/CIRAttrs.h" #include "clang/CIR/Dialect/IR/CIRTypes.h" #include "clang/CIR/MissingFeatures.h" #include "llvm/ADT/PointerIntPair.h" +#include "llvm/Support/Casting.h" namespace clang::CIRGen { @@ -114,6 +116,11 @@ class Address { return elementType; } + cir::TargetAddressSpaceAttr getAddressSpace() const { + auto ptrTy = mlir::dyn_cast(getType()); + return ptrTy.getAddrSpace(); + } + clang::CharUnits getAlignment() const { return alignment; } /// Get the operation which defines this address. diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp index 4e6a5ee7ee210..eec4d10bb49b8 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "CIRGenCall.h" -#include "CIRGenConstantEmitter.h" #include "CIRGenFunction.h" #include "CIRGenModule.h" #include "CIRGenValue.h" @@ -22,6 +21,7 @@ #include "clang/AST/Expr.h" #include "clang/AST/GlobalDecl.h" #include "clang/Basic/Builtins.h" +#include "clang/CIR/Dialect/IR/CIRTypes.h" #include "clang/CIR/MissingFeatures.h" #include "llvm/Support/ErrorHandling.h" @@ -193,11 +193,16 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID, // default (e.g. in C / C++ auto vars are in the generic address space). At // the AST level this is handled within CreateTempAlloca et al., but for the // builtin / dynamic alloca we have to handle it here. - assert(!cir::MissingFeatures::addressSpace()); + + if (!cir::isMatchingAddressSpace( + getCIRAllocaAddressSpace(), + e->getType()->getPointeeType().getAddressSpace())) { + cgm.errorNYI(e->getSourceRange(), "Non-default address space for alloca"); + } // Bitcast the alloca to the expected type. - return RValue::get( - builder.createBitcast(allocaAddr, builder.getVoidPtrTy())); + return RValue::get(builder.createBitcast( + allocaAddr, builder.getVoidPtrTy(getCIRAllocaAddressSpace()))); } case Builtin::BIcos: diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp index 9bb76894c13f1..c55fcabef0b3f 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp @@ -22,7 +22,11 @@ #include "clang/AST/Decl.h" #include "clang/AST/Expr.h" #include "clang/AST/ExprCXX.h" +#include "clang/Basic/AddressSpaces.h" +#include "clang/Basic/TargetInfo.h" +#include "clang/CIR/Dialect/IR/CIRAttrs.h" #include "clang/CIR/Dialect/IR/CIRDialect.h" +#include "clang/CIR/Dialect/IR/CIRTypes.h" #include "clang/CIR/MissingFeatures.h" #include @@ -1205,7 +1209,6 @@ LValue CIRGenFunction::emitCastLValue(const CastExpr *e) { case CK_AtomicToNonAtomic: case CK_ToUnion: case CK_BaseToDerived: - case CK_AddressSpaceConversion: case CK_ObjCObjectLValueCast: case CK_VectorSplat: case CK_ConstructorConversion: @@ -1219,7 +1222,27 @@ LValue CIRGenFunction::emitCastLValue(const CastExpr *e) { return {}; } + case CK_AddressSpaceConversion: { + LValue lv = emitLValue(e->getSubExpr()); + QualType destTy = getContext().getPointerType(e->getType()); + + clang::LangAS srcLangAS = e->getSubExpr()->getType().getAddressSpace(); + cir::TargetAddressSpaceAttr srcAS; + if (clang::isTargetAddressSpace(srcLangAS)) + srcAS = cir::toCIRTargetAddressSpace(getMLIRContext(), srcLangAS); + else + cgm.errorNYI( + e->getSourceRange(), + "emitCastLValue: address space conversion from unknown address " + "space"); + mlir::Value v = getTargetHooks().performAddrSpaceCast( + *this, lv.getPointer(), srcAS, convertType(destTy)); + + return makeAddrLValue(Address(v, convertTypeForMem(e->getType()), + lv.getAddress().getAlignment()), + e->getType(), lv.getBaseInfo()); + } case CK_LValueBitCast: { // This must be a reinterpret_cast (or c-style equivalent). const auto *ce = cast(e); @@ -2233,6 +2256,8 @@ Address CIRGenFunction::createTempAllocaWithoutCast( /// This creates a alloca and inserts it into the entry block. The alloca is /// casted to default address space if necessary. +// TODO(cir): Implement address space casting to match classic codegen's +// CreateTempAlloca behavior with DestLangAS parameter Address CIRGenFunction::createTempAlloca(mlir::Type ty, CharUnits align, mlir::Location loc, const Twine &name, mlir::Value arraySize, @@ -2247,7 +2272,21 @@ Address CIRGenFunction::createTempAlloca(mlir::Type ty, CharUnits align, // be different from the type defined by the language. For example, // in C++ the auto variables are in the default address space. Therefore // cast alloca to the default address space when necessary. - assert(!cir::MissingFeatures::addressSpace()); + + LangAS allocaAS = alloca.getAddressSpace() + ? clang::getLangASFromTargetAS( + alloca.getAddressSpace().getValue().getUInt()) + : clang::LangAS::Default; + LangAS dstTyAS = clang::LangAS::Default; + if (getCIRAllocaAddressSpace()) { + dstTyAS = clang::getLangASFromTargetAS( + getCIRAllocaAddressSpace().getValue().getUInt()); + } + + if (dstTyAS != allocaAS) { + getTargetHooks().performAddrSpaceCast(*this, v, getCIRAllocaAddressSpace(), + builder.getPointerTo(ty, dstTyAS)); + } return Address(v, ty, align); } diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp index 4461875fcf678..1c4f51c11dc5e 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp @@ -15,6 +15,7 @@ #include "clang/AST/Expr.h" #include "clang/AST/StmtVisitor.h" +#include "clang/CIR/Dialect/IR/CIRTypes.h" #include "clang/CIR/MissingFeatures.h" #include "mlir/IR/Location.h" @@ -91,6 +92,7 @@ class ScalarExprEmitter : public StmtVisitor { //===--------------------------------------------------------------------===// // Utilities //===--------------------------------------------------------------------===// + mlir::Type convertType(QualType ty) { return cgf.convertType(ty); } mlir::Value emitComplexToScalarConversion(mlir::Location loc, mlir::Value value, CastKind kind, @@ -1888,6 +1890,35 @@ mlir::Value ScalarExprEmitter::VisitCastExpr(CastExpr *ce) { return cgf.getBuilder().createBitcast(cgf.getLoc(subExpr->getSourceRange()), src, dstTy); } + case CK_AddressSpaceConversion: { + Expr::EvalResult result; + if (subExpr->EvaluateAsRValue(result, cgf.getContext()) && + result.Val.isNullPointer()) { + // If e has side effect, it is emitted even if its final result is a + // null pointer. In that case, a DCE pass should be able to + // eliminate the useless instructions emitted during translating E. + if (result.HasSideEffects) + Visit(subExpr); + return cgf.cgm.emitNullConstant(destTy, + cgf.getLoc(subExpr->getExprLoc())); + } + + clang::QualType srcTy = subExpr->IgnoreImpCasts()->getType(); + if (srcTy->isPointerType() || srcTy->isReferenceType()) + srcTy = srcTy->getPointeeType(); + + clang::LangAS srcLangAS = srcTy.getAddressSpace(); + cir::TargetAddressSpaceAttr subExprAS; + if (clang::isTargetAddressSpace(srcLangAS)) + subExprAS = cir::toCIRTargetAddressSpace(cgf.getMLIRContext(), srcLangAS); + else + cgf.cgm.errorNYI(subExpr->getSourceRange(), + "non-target address space conversion"); + // Since target may map different address spaces in AST to the same address + // space, an address space conversion may end up as a bitcast. + return cgf.cgm.getTargetCIRGenInfo().performAddrSpaceCast( + cgf, Visit(subExpr), subExprAS, convertType(destTy)); + } case CK_AtomicToNonAtomic: { cgf.getCIRGenModule().errorNYI(subExpr->getSourceRange(), diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h index b71a28c54dbef..4f5948b6e4467 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.h +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -222,6 +222,10 @@ class CIRGenFunction : public CIRGenTypeCache { const TargetInfo &getTarget() const { return cgm.getTarget(); } mlir::MLIRContext &getMLIRContext() { return cgm.getMLIRContext(); } + const TargetCIRGenInfo &getTargetHooks() const { + return cgm.getTargetCIRGenInfo(); + } + // --------------------- // Opaque value handling // --------------------- diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp index 9f9b2db4771df..c1f2581eb96e3 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp @@ -1424,6 +1424,23 @@ CIRGenModule::getAddrOfConstantStringFromLiteral(const StringLiteral *s, return builder.getGlobalViewAttr(ptrTy, gv); } +// TODO(cir): this could be a common AST helper for both CIR and LLVM codegen. +LangAS CIRGenModule::getLangTempAllocaAddressSpace() const { + if (getLangOpts().OpenCL) + return LangAS::opencl_private; + + // For temporaries inside functions, CUDA treats them as normal variables. + // LangAS::cuda_device, on the other hand, is reserved for those variables + // explicitly marked with __device__. + if (getLangOpts().CUDAIsDevice) + return LangAS::Default; + + if (getLangOpts().SYCLIsDevice || + (getLangOpts().OpenMP && getLangOpts().OpenMPIsTargetDevice)) + errorNYI("SYCL or OpenMP temp address space"); + return LangAS::Default; +} + void CIRGenModule::emitExplicitCastExprType(const ExplicitCastExpr *e, CIRGenFunction *cgf) { if (cgf && e->getType()->isVariablyModifiedType()) diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.h b/clang/lib/CIR/CodeGen/CIRGenModule.h index 186913d1bac9d..dc28d9e8e9d33 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.h +++ b/clang/lib/CIR/CodeGen/CIRGenModule.h @@ -297,6 +297,12 @@ class CIRGenModule : public CIRGenTypeCache { getAddrOfConstantStringFromLiteral(const StringLiteral *s, llvm::StringRef name = ".str"); + /// Returns the address space for temporary allocations in the language. This + /// ensures that the allocated variable's address space matches the + /// expectations of the AST, rather than using the target's allocation address + /// space, which may lead to type mismatches in other parts of the IR. + LangAS getLangTempAllocaAddressSpace() const; + /// Set attributes which are common to any form of a global definition (alias, /// Objective-C method, function, global variable). /// diff --git a/clang/lib/CIR/CodeGen/CIRGenTypes.cpp b/clang/lib/CIR/CodeGen/CIRGenTypes.cpp index 03618d4a8a8a6..efc2c6c0ba500 100644 --- a/clang/lib/CIR/CodeGen/CIRGenTypes.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenTypes.cpp @@ -404,7 +404,7 @@ mlir::Type CIRGenTypes::convertType(QualType type) { const ReferenceType *refTy = cast(ty); QualType elemTy = refTy->getPointeeType(); auto pointeeType = convertTypeForMem(elemTy); - resultType = builder.getPointerTo(pointeeType); + resultType = builder.getPointerTo(pointeeType, elemTy.getAddressSpace()); assert(resultType && "Cannot get pointer type?"); break; } diff --git a/clang/lib/CIR/CodeGen/TargetInfo.cpp b/clang/lib/CIR/CodeGen/TargetInfo.cpp index 62a8c59abe604..377c532e492d9 100644 --- a/clang/lib/CIR/CodeGen/TargetInfo.cpp +++ b/clang/lib/CIR/CodeGen/TargetInfo.cpp @@ -1,5 +1,8 @@ #include "TargetInfo.h" #include "ABIInfo.h" +#include "CIRGenFunction.h" +#include "clang/CIR/Dialect/IR/CIRAttrs.h" +#include "clang/CIR/Dialect/IR/CIRDialect.h" using namespace clang; using namespace clang::CIRGen; @@ -68,3 +71,14 @@ bool TargetCIRGenInfo::isNoProtoCallVariadic( // For everything else, we just prefer false unless we opt out. return false; } + +mlir::Value TargetCIRGenInfo::performAddrSpaceCast( + CIRGenFunction &cgf, mlir::Value v, cir::TargetAddressSpaceAttr srcAddr, + mlir::Type destTy, bool isNonNull) const { + // Since target may map different address spaces in AST to the same address + // space, an address space conversion may end up as a bitcast. + if (cir::GlobalOp globalOp = v.getDefiningOp()) + cgf.cgm.errorNYI("Global op addrspace cast"); + // Try to preserve the source's name to make IR more readable. + return cgf.getBuilder().createAddrSpaceCast(v, destTy); +} diff --git a/clang/lib/CIR/CodeGen/TargetInfo.h b/clang/lib/CIR/CodeGen/TargetInfo.h index dbb0312c76040..72682641a460b 100644 --- a/clang/lib/CIR/CodeGen/TargetInfo.h +++ b/clang/lib/CIR/CodeGen/TargetInfo.h @@ -17,6 +17,7 @@ #include "ABIInfo.h" #include "CIRGenTypes.h" #include "clang/Basic/AddressSpaces.h" +#include "clang/CIR/Dialect/IR/CIRAttrs.h" #include #include @@ -33,6 +34,8 @@ bool isEmptyFieldForLayout(const ASTContext &context, const FieldDecl *fd); /// if the [[no_unique_address]] attribute would have made them empty. bool isEmptyRecordForLayout(const ASTContext &context, QualType t); +class CIRGenFunction; + class TargetCIRGenInfo { std::unique_ptr info; @@ -48,6 +51,15 @@ class TargetCIRGenInfo { virtual cir::TargetAddressSpaceAttr getCIRAllocaAddressSpace() const { return {}; } + /// Perform address space cast of an expression of pointer type. + /// \param V is the value to be casted to another address space. + /// \param DestTy is the destination pointer type. + /// \param srcAS is theaddress space of \p V. + /// \param IsNonNull is the flag indicating \p V is known to be non null. + virtual mlir::Value performAddrSpaceCast(CIRGenFunction &cgf, mlir::Value v, + cir::TargetAddressSpaceAttr srcAddr, + mlir::Type destTy, + bool isNonNull = false) const; /// Determine whether a call to an unprototyped functions under /// the given calling convention should use the variadic diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp index 7ba03ce40140c..9ac5efe0e41c7 100644 --- a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp @@ -383,6 +383,16 @@ LogicalResult cir::CastOp::verify() { mlir::Type resType = getType(); mlir::Type srcType = getSrc().getType(); + // Verify address space casts for pointer types. given that + // casts for within a different address space are illegal. + auto srcPtrTy = mlir::dyn_cast(srcType); + auto resPtrTy = mlir::dyn_cast(resType); + if (srcPtrTy && resPtrTy && (getKind() != cir::CastKind::address_space)) + if (srcPtrTy.getAddrSpace() != resPtrTy.getAddrSpace()) { + return emitOpError() << "result type address space does not match the " + "address space of the operand"; + } + if (mlir::isa(srcType) && mlir::isa(resType)) { // Use the element type of the vector to verify the cast kind. (Except for diff --git a/clang/lib/CIR/Dialect/IR/CIRTypes.cpp b/clang/lib/CIR/Dialect/IR/CIRTypes.cpp index f7907c76c8ccb..bb87056048ec5 100644 --- a/clang/lib/CIR/Dialect/IR/CIRTypes.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRTypes.cpp @@ -12,11 +12,16 @@ #include "clang/CIR/Dialect/IR/CIRTypes.h" +#include "mlir/IR/BuiltinAttributes.h" #include "mlir/IR/DialectImplementation.h" +#include "mlir/IR/MLIRContext.h" +#include "clang/Basic/AddressSpaces.h" #include "clang/CIR/Dialect/IR/CIRAttrs.h" #include "clang/CIR/Dialect/IR/CIRDialect.h" #include "clang/CIR/Dialect/IR/CIRTypesDetails.h" #include "clang/CIR/MissingFeatures.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/APSInt.h" #include "llvm/ADT/TypeSwitch.h" //===----------------------------------------------------------------------===// @@ -807,6 +812,27 @@ mlir::LogicalResult cir::VectorType::verify( // TargetAddressSpace definitions //===----------------------------------------------------------------------===// +cir::TargetAddressSpaceAttr +cir::toCIRTargetAddressSpace(mlir::MLIRContext &context, clang::LangAS langAS) { + return cir::TargetAddressSpaceAttr::get( + &context, + IntegerAttr::get(&context, + llvm::APSInt(clang::toTargetAddressSpace(langAS)))); +} + +bool cir::isMatchingAddressSpace(cir::TargetAddressSpaceAttr cirAS, + clang::LangAS as) { + // If there is no CIR target attr, consider it "default" and only match + // when the AST address space is LangAS::Default. + if (!cirAS) + return as == clang::LangAS::Default; + + if (!isTargetAddressSpace(as)) + return false; + + return cirAS.getValue().getUInt() == toTargetAddressSpace(as); +} + mlir::ParseResult parseTargetAddressSpace(mlir::AsmParser &p, cir::TargetAddressSpaceAttr &attr) { if (failed(p.parseKeyword("target_address_space"))) diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 0782dc1b585c3..88b7adb918b87 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -5409,6 +5409,86 @@ std::string CompilerInvocation::getModuleHash() const { return toString(llvm::APInt(64, Hash), 36, /*Signed=*/false); } +void CompilerInvocationBase::visitPathsImpl( + llvm::function_ref Predicate) { +#define RETURN_IF(PATH) \ + do { \ + if (Predicate(PATH)) \ + return; \ + } while (0) + +#define RETURN_IF_MANY(PATHS) \ + do { \ + if (llvm::any_of(PATHS, Predicate)) \ + return; \ + } while (0) + + auto &HeaderSearchOpts = *this->HSOpts; + // Header search paths. + RETURN_IF(HeaderSearchOpts.Sysroot); + for (auto &Entry : HeaderSearchOpts.UserEntries) + if (Entry.IgnoreSysRoot) + RETURN_IF(Entry.Path); + RETURN_IF(HeaderSearchOpts.ResourceDir); + RETURN_IF(HeaderSearchOpts.ModuleCachePath); + RETURN_IF(HeaderSearchOpts.ModuleUserBuildPath); + for (auto &[Name, File] : HeaderSearchOpts.PrebuiltModuleFiles) + RETURN_IF(File); + RETURN_IF_MANY(HeaderSearchOpts.PrebuiltModulePaths); + RETURN_IF_MANY(HeaderSearchOpts.VFSOverlayFiles); + + // Preprocessor options. + auto &PPOpts = *this->PPOpts; + RETURN_IF_MANY(PPOpts.MacroIncludes); + RETURN_IF_MANY(PPOpts.Includes); + RETURN_IF(PPOpts.ImplicitPCHInclude); + + // Frontend options. + auto &FrontendOpts = *this->FrontendOpts; + for (auto &Input : FrontendOpts.Inputs) { + if (Input.isBuffer()) + continue; + + RETURN_IF(Input.File); + } + RETURN_IF(FrontendOpts.CodeCompletionAt.FileName); + RETURN_IF_MANY(FrontendOpts.ModuleMapFiles); + RETURN_IF_MANY(FrontendOpts.ModuleFiles); + RETURN_IF_MANY(FrontendOpts.ModulesEmbedFiles); + RETURN_IF_MANY(FrontendOpts.ASTMergeFiles); + RETURN_IF(FrontendOpts.OverrideRecordLayoutsFile); + RETURN_IF(FrontendOpts.StatsFile); + + // Filesystem options. + auto &FileSystemOpts = *this->FSOpts; + RETURN_IF(FileSystemOpts.WorkingDir); + + // Codegen options. + auto &CodeGenOpts = *this->CodeGenOpts; + RETURN_IF(CodeGenOpts.DebugCompilationDir); + RETURN_IF(CodeGenOpts.CoverageCompilationDir); + + // Sanitizer options. + RETURN_IF_MANY(LangOpts->NoSanitizeFiles); + + // Coverage mappings. + RETURN_IF(CodeGenOpts.ProfileInstrumentUsePath); + RETURN_IF(CodeGenOpts.SampleProfileFile); + RETURN_IF(CodeGenOpts.ProfileRemappingFile); + + // Dependency output options. + for (auto &ExtraDep : DependencyOutputOpts->ExtraDeps) + RETURN_IF(ExtraDep.first); +} + +void CompilerInvocationBase::visitPaths( + llvm::function_ref Callback) const { + // The const_cast here is OK, because visitPathsImpl() itself doesn't modify + // the invocation, and our callback takes immutable StringRefs. + return const_cast(this)->visitPathsImpl( + [&Callback](std::string &Path) { return Callback(StringRef(Path)); }); +} + void CompilerInvocationBase::generateCC1CommandLine( ArgumentConsumer Consumer) const { llvm::Triple T(getTargetOpts().Triple); diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 823a94ad821e0..b0c7bae46f09e 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -2228,9 +2228,10 @@ MacroInfo *ASTReader::ReadMacroRecord(ModuleFile &F, uint64_t Offset) { // We have a macro definition. Register the association PreprocessedEntityID GlobalID = getGlobalPreprocessedEntityID(F, Record[NextIndex]); + unsigned Index = translatePreprocessedEntityIDToIndex(GlobalID); PreprocessingRecord &PPRec = *PP.getPreprocessingRecord(); PreprocessingRecord::PPEntityID PPID = - PPRec.getPPEntityID(GlobalID - 1, /*isLoaded=*/true); + PPRec.getPPEntityID(Index, /*isLoaded=*/true); MacroDefinitionRecord *PPDef = cast_or_null( PPRec.getPreprocessedEntity(PPID)); if (PPDef) @@ -2261,16 +2262,22 @@ MacroInfo *ASTReader::ReadMacroRecord(ModuleFile &F, uint64_t Offset) { PreprocessedEntityID ASTReader::getGlobalPreprocessedEntityID(ModuleFile &M, - unsigned LocalID) const { + PreprocessedEntityID LocalID) const { if (!M.ModuleOffsetMap.empty()) ReadModuleOffsetMap(M); - ContinuousRangeMap::const_iterator - I = M.PreprocessedEntityRemap.find(LocalID - NUM_PREDEF_PP_ENTITY_IDS); - assert(I != M.PreprocessedEntityRemap.end() - && "Invalid index into preprocessed entity index remap"); + unsigned ModuleFileIndex = LocalID >> 32; + LocalID &= llvm::maskTrailingOnes(32); + ModuleFile *MF = + ModuleFileIndex ? M.TransitiveImports[ModuleFileIndex - 1] : &M; + assert(MF && "malformed identifier ID encoding?"); - return LocalID + I->second; + if (!ModuleFileIndex) { + assert(LocalID >= NUM_PREDEF_PP_ENTITY_IDS); + LocalID -= NUM_PREDEF_PP_ENTITY_IDS; + } + + return (static_cast(MF->Index + 1) << 32) | LocalID; } OptionalFileEntryRef @@ -2547,6 +2554,13 @@ void ASTReader::markIdentifierUpToDate(const IdentifierInfo *II) { IdentifierGeneration[II] = getGeneration(); } +MacroID ASTReader::ReadMacroID(ModuleFile &F, const RecordDataImpl &Record, + unsigned &Idx) { + uint64_t ModuleFileIndex = Record[Idx++] << 32; + uint64_t LocalIndex = Record[Idx++]; + return getGlobalMacroID(F, (ModuleFileIndex | LocalIndex)); +} + void ASTReader::resolvePendingMacro(IdentifierInfo *II, const PendingMacroInfo &PMInfo) { ModuleFile &M = *PMInfo.M; @@ -2597,9 +2611,10 @@ void ASTReader::resolvePendingMacro(IdentifierInfo *II, case PP_MODULE_MACRO: { ModuleMacros.push_back(ModuleMacroRecord()); auto &Info = ModuleMacros.back(); - Info.SubModID = getGlobalSubmoduleID(M, Record[0]); - Info.MI = getMacro(getGlobalMacroID(M, Record[1])); - for (int I = 2, N = Record.size(); I != N; ++I) + unsigned Idx = 0; + Info.SubModID = getGlobalSubmoduleID(M, Record[Idx++]); + Info.MI = getMacro(ReadMacroID(M, Record, Idx)); + for (int I = Idx, N = Record.size(); I != N; ++I) Info.Overrides.push_back(getGlobalSubmoduleID(M, Record[I])); continue; } @@ -4111,8 +4126,6 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F, assert(Blob.size() % sizeof(PPEntityOffset) == 0); F.NumPreprocessedEntities = Blob.size() / sizeof(PPEntityOffset); - unsigned LocalBasePreprocessedEntityID = Record[0]; - unsigned StartingID; if (!PP.getPreprocessingRecord()) PP.createPreprocessingRecord(); @@ -4127,12 +4140,6 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F, // Introduce the global -> local mapping for preprocessed entities in // this module. GlobalPreprocessedEntityMap.insert(std::make_pair(StartingID, &F)); - - // Introduce the local -> global mapping for preprocessed entities in - // this module. - F.PreprocessedEntityRemap.insertOrReplace( - std::make_pair(LocalBasePreprocessedEntityID, - F.BasePreprocessedEntityID - LocalBasePreprocessedEntityID)); } break; @@ -4343,21 +4350,11 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F, "duplicate MACRO_OFFSET record in AST file"); F.MacroOffsets = (const uint32_t *)Blob.data(); F.LocalNumMacros = Record[0]; - unsigned LocalBaseMacroID = Record[1]; - F.MacroOffsetsBase = Record[2] + F.ASTBlockStartOffset; + F.MacroOffsetsBase = Record[1] + F.ASTBlockStartOffset; F.BaseMacroID = getTotalNumMacros(); - if (F.LocalNumMacros > 0) { - // Introduce the global -> local mapping for macros within this module. - GlobalMacroMap.insert(std::make_pair(getTotalNumMacros() + 1, &F)); - - // Introduce the local -> global mapping for macros within this module. - F.MacroRemap.insertOrReplace( - std::make_pair(LocalBaseMacroID, - F.BaseMacroID - LocalBaseMacroID)); - + if (F.LocalNumMacros > 0) MacrosLoaded.resize(MacrosLoaded.size() + F.LocalNumMacros); - } break; } @@ -4463,8 +4460,6 @@ void ASTReader::ReadModuleOffsetMap(ModuleFile &F) const { F.ModuleOffsetMap = StringRef(); using RemapBuilder = ContinuousRangeMap::Builder; - RemapBuilder MacroRemap(F.MacroRemap); - RemapBuilder PreprocessedEntityRemap(F.PreprocessedEntityRemap); RemapBuilder SubmoduleRemap(F.SubmoduleRemap); RemapBuilder SelectorRemap(F.SelectorRemap); @@ -4494,10 +4489,6 @@ void ASTReader::ReadModuleOffsetMap(ModuleFile &F) const { ImportedModuleVector.push_back(OM); - uint32_t MacroIDOffset = - endian::readNext(Data); - uint32_t PreprocessedEntityIDOffset = - endian::readNext(Data); uint32_t SubmoduleIDOffset = endian::readNext(Data); uint32_t SelectorIDOffset = @@ -4511,9 +4502,6 @@ void ASTReader::ReadModuleOffsetMap(ModuleFile &F) const { static_cast(BaseOffset - Offset))); }; - mapOffset(MacroIDOffset, OM->BaseMacroID, MacroRemap); - mapOffset(PreprocessedEntityIDOffset, OM->BasePreprocessedEntityID, - PreprocessedEntityRemap); mapOffset(SubmoduleIDOffset, OM->BaseSubmoduleID, SubmoduleRemap); mapOffset(SelectorIDOffset, OM->BaseSelectorID, SelectorRemap); } @@ -6725,11 +6713,23 @@ SourceRange ASTReader::ReadSkippedRange(unsigned GlobalIndex) { return Range; } +unsigned +ASTReader::translatePreprocessedEntityIDToIndex(PreprocessedEntityID ID) const { + unsigned ModuleFileIndex = ID >> 32; + assert(ModuleFileIndex && "not translating loaded MacroID?"); + assert(getModuleManager().size() > ModuleFileIndex - 1); + ModuleFile &MF = getModuleManager()[ModuleFileIndex - 1]; + + ID &= llvm::maskTrailingOnes(32); + return MF.BasePreprocessedEntityID + ID; +} + PreprocessedEntity *ASTReader::ReadPreprocessedEntity(unsigned Index) { - PreprocessedEntityID PPID = Index+1; std::pair PPInfo = getModulePreprocessedEntity(Index); ModuleFile &M = *PPInfo.first; unsigned LocalIndex = PPInfo.second; + PreprocessedEntityID PPID = + (static_cast(M.Index + 1) << 32) | LocalIndex; const PPEntityOffset &PPOffs = M.PreprocessedEntityOffsets[LocalIndex]; if (!PP.getPreprocessingRecord()) { @@ -6777,8 +6777,9 @@ PreprocessedEntity *ASTReader::ReadPreprocessedEntity(unsigned Index) { else { PreprocessedEntityID GlobalID = getGlobalPreprocessedEntityID(M, Record[1]); - Def = cast( - PPRec.getLoadedPreprocessedEntity(GlobalID - 1)); + unsigned Index = translatePreprocessedEntityIDToIndex(GlobalID); + Def = + cast(PPRec.getLoadedPreprocessedEntity(Index)); } MacroExpansion *ME; @@ -6831,8 +6832,8 @@ PreprocessedEntity *ASTReader::ReadPreprocessedEntity(unsigned Index) { /// \param SLocMapI points at a chunk of a module that contains no /// preprocessed entities or the entities it contains are not the ones we are /// looking for. -PreprocessedEntityID ASTReader::findNextPreprocessedEntity( - GlobalSLocOffsetMapType::const_iterator SLocMapI) const { +unsigned ASTReader::findNextPreprocessedEntity( + GlobalSLocOffsetMapType::const_iterator SLocMapI) const { ++SLocMapI; for (GlobalSLocOffsetMapType::const_iterator EndI = GlobalSLocOffsetMap.end(); SLocMapI != EndI; ++SLocMapI) { @@ -6875,8 +6876,8 @@ struct PPEntityComp { } // namespace -PreprocessedEntityID ASTReader::findPreprocessedEntity(SourceLocation Loc, - bool EndsAfter) const { +unsigned ASTReader::findPreprocessedEntity(SourceLocation Loc, + bool EndsAfter) const { if (SourceMgr.isLocalSourceLocation(Loc)) return getTotalNumPreprocessedEntities(); @@ -6936,9 +6937,8 @@ std::pair return std::make_pair(0,0); assert(!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(),Range.getBegin())); - PreprocessedEntityID BeginID = - findPreprocessedEntity(Range.getBegin(), false); - PreprocessedEntityID EndID = findPreprocessedEntity(Range.getEnd(), true); + unsigned BeginID = findPreprocessedEntity(Range.getBegin(), false); + unsigned EndID = findPreprocessedEntity(Range.getEnd(), true); return std::make_pair(BeginID, EndID); } @@ -8963,7 +8963,6 @@ LLVM_DUMP_METHOD void ASTReader::dump() { llvm::errs() << "*** PCH/ModuleFile Remappings:\n"; dumpModuleIDMap("Global bit offset map", GlobalBitOffsetsMap); dumpModuleIDMap("Global source location entry map", GlobalSLocEntryMap); - dumpModuleIDMap("Global macro map", GlobalMacroMap); dumpModuleIDMap("Global submodule map", GlobalSubmoduleMap); dumpModuleIDMap("Global selector map", GlobalSelectorMap); dumpModuleIDMap("Global preprocessed entity map", @@ -9746,6 +9745,21 @@ IdentifierID ASTReader::getGlobalIdentifierID(ModuleFile &M, uint64_t LocalID) { return ((IdentifierID)(MF->Index + 1) << 32) | LocalID; } +std::pair +ASTReader::translateMacroIDToIndex(MacroID ID) const { + if (ID == 0) + return {nullptr, 0}; + + unsigned ModuleFileIndex = ID >> 32; + assert(ModuleFileIndex && "not translating loaded MacroID?"); + assert(getModuleManager().size() > ModuleFileIndex - 1); + ModuleFile &MF = getModuleManager()[ModuleFileIndex - 1]; + + unsigned LocalID = ID & llvm::maskTrailingOnes(32); + assert(LocalID < MF.LocalNumMacros); + return {&MF, MF.BaseMacroID + LocalID}; +} + MacroInfo *ASTReader::getMacro(MacroID ID) { if (ID == 0) return nullptr; @@ -9755,36 +9769,40 @@ MacroInfo *ASTReader::getMacro(MacroID ID) { return nullptr; } - ID -= NUM_PREDEF_MACRO_IDS; - if (!MacrosLoaded[ID]) { - GlobalMacroMapType::iterator I - = GlobalMacroMap.find(ID + NUM_PREDEF_MACRO_IDS); - assert(I != GlobalMacroMap.end() && "Corrupted global macro map"); - ModuleFile *M = I->second; - unsigned Index = ID - M->BaseMacroID; - MacrosLoaded[ID] = - ReadMacroRecord(*M, M->MacroOffsetsBase + M->MacroOffsets[Index]); + auto [M, Index] = translateMacroIDToIndex(ID); + if (!MacrosLoaded[Index]) { + assert(M != nullptr && "Untranslated Macro ID?"); + assert(Index >= M->BaseMacroID); + unsigned LocalIndex = Index - M->BaseMacroID; + uint64_t DataOffset = M->MacroOffsetsBase + M->MacroOffsets[LocalIndex]; + MacrosLoaded[Index] = ReadMacroRecord(*M, DataOffset); if (DeserializationListener) - DeserializationListener->MacroRead(ID + NUM_PREDEF_MACRO_IDS, - MacrosLoaded[ID]); + DeserializationListener->MacroRead(ID, MacrosLoaded[Index]); } - return MacrosLoaded[ID]; + return MacrosLoaded[Index]; } -MacroID ASTReader::getGlobalMacroID(ModuleFile &M, unsigned LocalID) { +MacroID ASTReader::getGlobalMacroID(ModuleFile &M, MacroID LocalID) { if (LocalID < NUM_PREDEF_MACRO_IDS) return LocalID; if (!M.ModuleOffsetMap.empty()) ReadModuleOffsetMap(M); - ContinuousRangeMap::iterator I - = M.MacroRemap.find(LocalID - NUM_PREDEF_MACRO_IDS); - assert(I != M.MacroRemap.end() && "Invalid index into macro index remap"); + unsigned ModuleFileIndex = LocalID >> 32; + LocalID &= llvm::maskTrailingOnes(32); + ModuleFile *MF = + ModuleFileIndex ? M.TransitiveImports[ModuleFileIndex - 1] : &M; + assert(MF && "malformed identifier ID encoding?"); - return LocalID + I->second; + if (!ModuleFileIndex) { + assert(LocalID >= NUM_PREDEF_MACRO_IDS); + LocalID -= NUM_PREDEF_MACRO_IDS; + } + + return (static_cast(MF->Index + 1) << 32) | LocalID; } serialization::SubmoduleID diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index e4618d60a8acb..e8c0d3f2b4ee9 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -2691,7 +2691,7 @@ void ASTWriter::WritePreprocessor(const Preprocessor &PP, bool IsModule) { Record.push_back(VisMD->isPublic()); } ModuleMacroRecord.push_back(getSubmoduleID(WritingModule)); - ModuleMacroRecord.push_back(getMacroRef(MD->getMacroInfo(), Name)); + AddMacroRef(MD->getMacroInfo(), Name, ModuleMacroRecord); Stream.EmitRecord(PP_MODULE_MACRO, ModuleMacroRecord); ModuleMacroRecord.clear(); EmittedModuleMacros = true; @@ -2720,7 +2720,7 @@ void ASTWriter::WritePreprocessor(const Preprocessor &PP, bool IsModule) { // Emit a record indicating this submodule exports this macro. ModuleMacroRecord.push_back(getSubmoduleID(Macro->getOwningModule())); - ModuleMacroRecord.push_back(getMacroRef(Macro->getMacroInfo(), Name)); + AddMacroRef(Macro->getMacroInfo(), Name, ModuleMacroRecord); for (auto *M : Macro->overrides()) ModuleMacroRecord.push_back(getSubmoduleID(M->getOwningModule())); @@ -2819,14 +2819,12 @@ void ASTWriter::WritePreprocessor(const Preprocessor &PP, bool IsModule) { auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(MACRO_OFFSET)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // # of macros - Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // first ID Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 32)); // base offset Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); unsigned MacroOffsetAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); { RecordData::value_type Record[] = {MACRO_OFFSET, MacroOffsets.size(), - FirstMacroID - NUM_PREDEF_MACRO_IDS, MacroOffsetsBase - ASTBlockStartOffset}; Stream.EmitRecordWithBlob(MacroOffsetAbbrev, Record, bytes(MacroOffsets)); } @@ -2859,9 +2857,7 @@ void ASTWriter::WritePreprocessorDetail(PreprocessingRecord &PPRec, InclusionAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); } - unsigned FirstPreprocessorEntityID - = (Chain ? PPRec.getNumLoadedPreprocessedEntities() : 0) - + NUM_PREDEF_PP_ENTITY_IDS; + unsigned FirstPreprocessorEntityID = NUM_PREDEF_PP_ENTITY_IDS; unsigned NextPreprocessorEntityID = FirstPreprocessorEntityID; RecordData Record; for (PreprocessingRecord::iterator E = PPRec.local_begin(), @@ -2925,13 +2921,10 @@ void ASTWriter::WritePreprocessorDetail(PreprocessingRecord &PPRec, auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(PPD_ENTITIES_OFFSETS)); - Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // first pp entity Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); unsigned PPEOffsetAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); - RecordData::value_type Record[] = {PPD_ENTITIES_OFFSETS, - FirstPreprocessorEntityID - - NUM_PREDEF_PP_ENTITY_IDS}; + RecordData::value_type Record[] = {PPD_ENTITIES_OFFSETS}; Stream.EmitRecordWithBlob(PPEOffsetAbbrev, Record, bytes(PreprocessedEntityOffsets)); } @@ -6100,9 +6093,6 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema *SemaPtr, StringRef isysroot, // These values should be unique within a chain, since they will be read // as keys into ContinuousRangeMaps. - writeBaseIDOrNone(M.BaseMacroID, M.LocalNumMacros); - writeBaseIDOrNone(M.BasePreprocessedEntityID, - M.NumPreprocessedEntities); writeBaseIDOrNone(M.BaseSubmoduleID, M.LocalNumSubmodules); writeBaseIDOrNone(M.BaseSelectorID, M.LocalNumSelectors); } @@ -6903,6 +6893,13 @@ void ASTWriter::AddLookupOffsets(const LookupBlockOffsets &Offsets, Record.push_back(Offsets.TULocalOffset); } +void ASTWriter::AddMacroRef(MacroInfo *MI, const IdentifierInfo *Name, + RecordDataImpl &Record) { + MacroID MacroRef = getMacroRef(MI, Name); + Record.push_back(MacroRef >> 32); + Record.push_back(MacroRef & llvm::maskTrailingOnes(32)); +} + void ASTWriter::AddEmittedDeclRef(const Decl *D, RecordDataImpl &Record) { if (!wasDeclEmitted(D)) return; @@ -7383,12 +7380,8 @@ void ASTWriter::ReaderInitialized(ASTReader *Reader) { Chain = Reader; - // Note, this will get called multiple times, once one the reader starts up - // and again each time it's done reading a PCH or module. - FirstMacroID = NUM_PREDEF_MACRO_IDS + Chain->getTotalNumMacros(); FirstSubmoduleID = NUM_PREDEF_SUBMODULE_IDS + Chain->getTotalNumSubmodules(); FirstSelectorID = NUM_PREDEF_SELECTOR_IDS + Chain->getTotalNumSelectors(); - NextMacroID = FirstMacroID; NextSelectorID = FirstSelectorID; NextSubmoduleID = FirstSubmoduleID; } @@ -7416,6 +7409,14 @@ void ASTWriter::IdentifierRead(IdentifierID ID, IdentifierInfo *II) { void ASTWriter::MacroRead(serialization::MacroID ID, MacroInfo *MI) { // Always keep the highest ID. See \p TypeRead() for more information. MacroID &StoredID = MacroIDs[MI]; + unsigned OriginalModuleFileIndex = StoredID >> 32; + + // Always keep the local macro ID. See \p TypeRead() for more information. + if (OriginalModuleFileIndex == 0 && StoredID) + return; + + // Otherwise, keep the highest ID since the module file comes later has + // higher module file indexes. if (ID > StoredID) StoredID = ID; } diff --git a/clang/lib/Serialization/ModuleFile.cpp b/clang/lib/Serialization/ModuleFile.cpp index 4858cdbda5545..7f631eafcaf35 100644 --- a/clang/lib/Serialization/ModuleFile.cpp +++ b/clang/lib/Serialization/ModuleFile.cpp @@ -65,7 +65,6 @@ LLVM_DUMP_METHOD void ModuleFile::dump() { llvm::errs() << " Base macro ID: " << BaseMacroID << '\n' << " Number of macros: " << LocalNumMacros << '\n'; - dumpLocalRemap("Macro ID local -> global map", MacroRemap); llvm::errs() << " Base submodule ID: " << BaseSubmoduleID << '\n' << " Number of submodules: " << LocalNumSubmodules << '\n'; @@ -79,8 +78,6 @@ LLVM_DUMP_METHOD void ModuleFile::dump() { << '\n' << " Number of preprocessed entities: " << NumPreprocessedEntities << '\n'; - dumpLocalRemap("Preprocessed entity ID local -> global map", - PreprocessedEntityRemap); llvm::errs() << " Base type index: " << BaseTypeIndex << '\n' << " Number of types: " << LocalNumTypes << '\n'; diff --git a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp index e07a208748b77..0022597348a82 100644 --- a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp +++ b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp @@ -471,82 +471,13 @@ static bool isSafeToIgnoreCWD(const CowCompilerInvocation &CI) { // Check if the command line input uses relative paths. // It is not safe to ignore the current working directory if any of the // command line inputs use relative paths. -#define IF_RELATIVE_RETURN_FALSE(PATH) \ - do { \ - if (!PATH.empty() && !llvm::sys::path::is_absolute(PATH)) \ - return false; \ - } while (0) - -#define IF_ANY_RELATIVE_RETURN_FALSE(PATHS) \ - do { \ - if (llvm::any_of(PATHS, [](const auto &P) { \ - return !P.empty() && !llvm::sys::path::is_absolute(P); \ - })) \ - return false; \ - } while (0) - - // Header search paths. - const auto &HeaderSearchOpts = CI.getHeaderSearchOpts(); - IF_RELATIVE_RETURN_FALSE(HeaderSearchOpts.Sysroot); - for (auto &Entry : HeaderSearchOpts.UserEntries) - if (Entry.IgnoreSysRoot) - IF_RELATIVE_RETURN_FALSE(Entry.Path); - IF_RELATIVE_RETURN_FALSE(HeaderSearchOpts.ResourceDir); - IF_RELATIVE_RETURN_FALSE(HeaderSearchOpts.ModuleCachePath); - IF_RELATIVE_RETURN_FALSE(HeaderSearchOpts.ModuleUserBuildPath); - for (auto I = HeaderSearchOpts.PrebuiltModuleFiles.begin(), - E = HeaderSearchOpts.PrebuiltModuleFiles.end(); - I != E;) { - auto Current = I++; - IF_RELATIVE_RETURN_FALSE(Current->second); - } - IF_ANY_RELATIVE_RETURN_FALSE(HeaderSearchOpts.PrebuiltModulePaths); - IF_ANY_RELATIVE_RETURN_FALSE(HeaderSearchOpts.VFSOverlayFiles); - - // Preprocessor options. - const auto &PPOpts = CI.getPreprocessorOpts(); - IF_ANY_RELATIVE_RETURN_FALSE(PPOpts.MacroIncludes); - IF_ANY_RELATIVE_RETURN_FALSE(PPOpts.Includes); - IF_RELATIVE_RETURN_FALSE(PPOpts.ImplicitPCHInclude); - - // Frontend options. - const auto &FrontendOpts = CI.getFrontendOpts(); - for (const FrontendInputFile &Input : FrontendOpts.Inputs) { - if (Input.isBuffer()) - continue; // FIXME: Can this happen when parsing command-line? - - IF_RELATIVE_RETURN_FALSE(Input.getFile()); - } - IF_RELATIVE_RETURN_FALSE(FrontendOpts.CodeCompletionAt.FileName); - IF_ANY_RELATIVE_RETURN_FALSE(FrontendOpts.ModuleMapFiles); - IF_ANY_RELATIVE_RETURN_FALSE(FrontendOpts.ModuleFiles); - IF_ANY_RELATIVE_RETURN_FALSE(FrontendOpts.ModulesEmbedFiles); - IF_ANY_RELATIVE_RETURN_FALSE(FrontendOpts.ASTMergeFiles); - IF_RELATIVE_RETURN_FALSE(FrontendOpts.OverrideRecordLayoutsFile); - IF_RELATIVE_RETURN_FALSE(FrontendOpts.StatsFile); - - // Filesystem options. - const auto &FileSystemOpts = CI.getFileSystemOpts(); - IF_RELATIVE_RETURN_FALSE(FileSystemOpts.WorkingDir); - - // Codegen options. - const auto &CodeGenOpts = CI.getCodeGenOpts(); - IF_RELATIVE_RETURN_FALSE(CodeGenOpts.DebugCompilationDir); - IF_RELATIVE_RETURN_FALSE(CodeGenOpts.CoverageCompilationDir); - - // Sanitizer options. - IF_ANY_RELATIVE_RETURN_FALSE(CI.getLangOpts().NoSanitizeFiles); - - // Coverage mappings. - IF_RELATIVE_RETURN_FALSE(CodeGenOpts.ProfileInstrumentUsePath); - IF_RELATIVE_RETURN_FALSE(CodeGenOpts.SampleProfileFile); - IF_RELATIVE_RETURN_FALSE(CodeGenOpts.ProfileRemappingFile); - - // Dependency output options. - for (auto &ExtraDep : CI.getDependencyOutputOpts().ExtraDeps) - IF_RELATIVE_RETURN_FALSE(ExtraDep.first); - - return true; + bool AnyRelative = false; + CI.visitPaths([&](StringRef Path) { + assert(!AnyRelative && "Continuing path visitation despite returning true"); + AnyRelative |= !Path.empty() && !llvm::sys::path::is_absolute(Path); + return AnyRelative; + }); + return !AnyRelative; } static std::string getModuleContextHash(const ModuleDeps &MD, diff --git a/clang/test/CIR/CodeGen/address-space-conversion.cpp b/clang/test/CIR/CodeGen/address-space-conversion.cpp new file mode 100644 index 0000000000000..ca026be60ee71 --- /dev/null +++ b/clang/test/CIR/CodeGen/address-space-conversion.cpp @@ -0,0 +1,92 @@ +// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR +// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll +// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM +// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll +// RUN: FileCheck --input-file=%t.ll %s -check-prefix=OGCG + +using pi1_t = int __attribute__((address_space(1))) *; +using pi2_t = int __attribute__((address_space(2))) *; + +using ri1_t = int __attribute__((address_space(1))) &; +using ri2_t = int __attribute__((address_space(2))) &; + +// CIR: cir.func dso_local @{{.*test_ptr.*}} +// LLVM: define dso_local void @{{.*test_ptr.*}} +// OGCG: define dso_local void @{{.*test_ptr.*}} +void test_ptr() { + pi1_t ptr1; + pi2_t ptr2 = (pi2_t)ptr1; + // CIR: %[[#PTR1:]] = cir.load{{.*}} %{{[0-9]+}} : !cir.ptr>, !cir.ptr + // CIR-NEXT: %[[#CAST:]] = cir.cast address_space %[[#PTR1]] : !cir.ptr -> !cir.ptr + // CIR-NEXT: cir.store{{.*}} %[[#CAST]], %{{[0-9]+}} : !cir.ptr, !cir.ptr> + + // LLVM: %[[#PTR1:]] = load ptr addrspace(1), ptr %{{.*}} + // LLVM-NEXT: %[[#CAST:]] = addrspacecast ptr addrspace(1) %[[#PTR1]] to ptr addrspace(2) + // LLVM-NEXT: store ptr addrspace(2) %[[#CAST]], ptr %{{.*}} + + // OGCG: %{{.*}} = load ptr addrspace(1), ptr %{{.*}} + // OGCG-NEXT: %{{.*}} = addrspacecast ptr addrspace(1) %{{.*}} to ptr addrspace(2) + // OGCG-NEXT: store ptr addrspace(2) %{{.*}}, ptr %{{.*}} +} + +// CIR: cir.func dso_local @{{.*test_ref.*}} +// LLVM: define dso_local void @{{.*test_ref.*}} +// OGCG: define dso_local void @{{.*test_ref.*}} +void test_ref() { + pi1_t ptr; + ri1_t ref1 = *ptr; + ri2_t ref2 = (ri2_t)ref1; + // CIR: %[[#DEREF:]] = cir.load deref{{.*}} %{{[0-9]+}} : !cir.ptr>, !cir.ptr + // CIR-NEXT: cir.store{{.*}} %[[#DEREF]], %{{[0-9]+}} : !cir.ptr, !cir.ptr> + // CIR-NEXT: %[[#REF1:]] = cir.load %{{[0-9]+}} : !cir.ptr>, !cir.ptr + // CIR-NEXT: %[[#CAST:]] = cir.cast address_space %[[#REF1]] : !cir.ptr -> !cir.ptr + // CIR-NEXT: cir.store{{.*}} %[[#CAST]], %{{[0-9]+}} : !cir.ptr, !cir.ptr> + + // LLVM: %[[#DEREF:]] = load ptr addrspace(1), ptr %{{.*}} + // LLVM-NEXT: store ptr addrspace(1) %[[#DEREF]], ptr %{{.*}} + // LLVM-NEXT: %[[#REF1:]] = load ptr addrspace(1), ptr %{{.*}} + // LLVM-NEXT: %[[#CAST:]] = addrspacecast ptr addrspace(1) %[[#REF1]] to ptr addrspace(2) + // LLVM-NEXT: store ptr addrspace(2) %[[#CAST]], ptr %{{.*}} + + // OGCG: %{{.*}} = load ptr addrspace(1), ptr %{{.*}} + // OGCG-NEXT: store ptr addrspace(1) %{{.*}}, ptr %{{.*}} + // OGCG-NEXT: %{{.*}} = load ptr addrspace(1), ptr %{{.*}} + // OGCG-NEXT: %{{.*}} = addrspacecast ptr addrspace(1) %{{.*}} to ptr addrspace(2) + // OGCG-NEXT: store ptr addrspace(2) %{{.*}}, ptr %{{.*}} +} + +// CIR: cir.func dso_local @{{.*test_nullptr.*}} +// LLVM: define dso_local void @{{.*test_nullptr.*}} +// OGCG: define dso_local void @{{.*test_nullptr.*}} +void test_nullptr() { + constexpr pi1_t null1 = nullptr; + pi2_t ptr = (pi2_t)null1; + // CIR: %[[#NULL1:]] = cir.const #cir.ptr : !cir.ptr + // CIR-NEXT: cir.store{{.*}} %[[#NULL1]], %{{[0-9]+}} : !cir.ptr, !cir.ptr> + // CIR-NEXT: %[[#NULL2:]] = cir.const #cir.ptr : !cir.ptr + // CIR-NEXT: cir.store{{.*}} %[[#NULL2]], %{{[0-9]+}} : !cir.ptr, !cir.ptr> + + // LLVM: store ptr addrspace(1) null, ptr %{{.*}} + // LLVM-NEXT: store ptr addrspace(2) null, ptr %{{.*}} + + // OGCG: store ptr addrspace(1) null, ptr %{{.*}} + // OGCG-NEXT: store ptr addrspace(2) null, ptr %{{.*}} +} + +// CIR: cir.func dso_local @{{.*test_side_effect.*}} +// LLVM: define dso_local void @{{.*test_side_effect.*}} +// OGCG: define dso_local void @{{.*test_side_effect.*}} +void test_side_effect(pi1_t b) { + pi2_t p = (pi2_t)(*b++, (int*)0); + // CIR: %[[#DEREF:]] = cir.load deref{{.*}} %{{[0-9]+}} : !cir.ptr>, !cir.ptr + // CIR: %[[#STRIDE:]] = cir.ptr_stride %[[#DEREF]], %{{[0-9]+}} : (!cir.ptr, !s32i) -> !cir.ptr + // CIR: %[[#NULL:]] = cir.const #cir.ptr : !cir.ptr + // CIR-NEXT: cir.store{{.*}} %[[#NULL]], %{{[0-9]+}} : !cir.ptr, !cir.ptr> + + // LLVM: %{{[0-9]+}} = getelementptr {{.*}}i32, ptr addrspace(1) %{{[0-9]+}}, i{{32|64}} 1 + // LLVM: store ptr addrspace(2) null, ptr %{{.*}} + + // OGCG: %{{.*}} = getelementptr{{.*}} i32, ptr addrspace(1) %{{.*}}, i32 1 + // OGCG: store ptr addrspace(2) null, ptr %{{.*}} +} diff --git a/clang/test/CIR/IR/invalid-addrspace.cir b/clang/test/CIR/IR/invalid-addrspace.cir index 8f188b840bdec..4b6a388b1e4a8 100644 --- a/clang/test/CIR/IR/invalid-addrspace.cir +++ b/clang/test/CIR/IR/invalid-addrspace.cir @@ -24,4 +24,3 @@ cir.func @address_space2(%p : !cir.ptr) { cir.func @address_space3(%p : !cir.ptr) { cir.return } - diff --git a/clang/test/Driver/hip-temps-linux.hip b/clang/test/Driver/hip-temps-linux.hip index 83a7528dd4560..e4c6282ba6fbd 100644 --- a/clang/test/Driver/hip-temps-linux.hip +++ b/clang/test/Driver/hip-temps-linux.hip @@ -1,18 +1,18 @@ // REQUIRES: x86-registered-target // REQUIRES: amdgpu-registered-target -// REQUIRES: system-linux +// UNSUPPORTED: system-windows // Check no temporary files or directores are left after compilation. // RUN: rm -rf %t/mytmp // RUN: mkdir -p %t/mytmp -// RUN: env TMPDIR="%t/mytmp" %clang --target=x86_64-linux-gnu -nogpulib -nogpuinc \ +// RUN: env TMP="%t/mytmp" TMPDIR="%t/mytmp" %clang --target=x86_64-linux-gnu -nogpulib -nogpuinc \ // RUN: --rocm-path=%S/Inputs/rocm -nostdinc -nostdlib -c \ // RUN: --offload-arch=gfx1030 -emit-llvm -v %s 2>&1 | \ -// RUN: FileCheck -check-prefixes=CHECK %s +// RUN: FileCheck -check-prefixes=CHECK -DOUTPUT_PATH="%t%{fs-sep}mytmp%{fs-sep}" %s // RUN: ls %t/mytmp >%t/mytmp.txt 2>&1 // RUN: touch %t/empty.txt // RUN: diff %t/mytmp.txt %t/empty.txt -// CHECK: -o {{.*}}/mytmp/hip-temps-linux-gfx1030-{{.*}}.bc +// CHECK: -o {{"?}}[[OUTPUT_PATH]]hip-temps-linux-gfx1030-{{.*}}.bc{{"?}} int main() {} diff --git a/clang/test/Modules/no-transitive-macro-change.cpp b/clang/test/Modules/no-transitive-macro-change.cpp new file mode 100644 index 0000000000000..fced26490c27f --- /dev/null +++ b/clang/test/Modules/no-transitive-macro-change.cpp @@ -0,0 +1,23 @@ +// RUN: rm -rf %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header \ +// RUN: %t/a.h -o %t/a.pcm +// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header \ +// RUN: %t/b.h -o %t/b.pcm -fmodule-file=%t/a.pcm +// RUN: echo "#define A2 44" >> %t/a.h +// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header \ +// RUN: %t/a.h -o %t/a.v1.pcm +// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header \ +// RUN: %t/b.h -o %t/b.v1.pcm -fmodule-file=%t/a.v1.pcm +// RUN: not diff %t/b.pcm %t/b.v1.pcm &> /dev/null + +//--- a.h +#pragma once +#define A 43 + +//--- b.h +#pragma once +import "a.h"; +#define B 43 +const int a = A; diff --git a/clang/test/Sema/warn-lifetime-safety.cpp b/clang/test/Sema/warn-lifetime-safety.cpp index 3460a8675bf04..b9368db550805 100644 --- a/clang/test/Sema/warn-lifetime-safety.cpp +++ b/clang/test/Sema/warn-lifetime-safety.cpp @@ -655,3 +655,34 @@ void conditional_operator_lifetimebound_nested_deep(bool cond) { } // expected-note 4 {{destroyed here}} (void)*p; // expected-note 4 {{later used here}} } + +void parentheses(bool cond) { + MyObj* p; + { + MyObj a; + p = &((((a)))); // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note {{destroyed here}} + (void)*p; // expected-note {{later used here}} + + { + MyObj a; + p = ((GetPointer((a)))); // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note {{destroyed here}} + (void)*p; // expected-note {{later used here}} + + { + MyObj a, b, c, d; + p = &(cond ? (cond ? a // expected-warning {{object whose reference is captured does not live long enough}}. + : b) // expected-warning {{object whose reference is captured does not live long enough}}. + : (cond ? c // expected-warning {{object whose reference is captured does not live long enough}}. + : d)); // expected-warning {{object whose reference is captured does not live long enough}}. + } // expected-note 4 {{destroyed here}} + (void)*p; // expected-note 4 {{later used here}} + + { + MyObj a, b, c, d; + p = ((cond ? (((cond ? &a : &b))) // expected-warning 2 {{object whose reference is captured does not live long enough}}. + : &(((cond ? c : d))))); // expected-warning 2 {{object whose reference is captured does not live long enough}}. + } // expected-note 4 {{destroyed here}} + (void)*p; // expected-note 4 {{later used here}} +} diff --git a/clang/unittests/Analysis/LifetimeSafetyTest.cpp b/clang/unittests/Analysis/LifetimeSafetyTest.cpp index 9d61d56e078e3..601308c53f9a9 100644 --- a/clang/unittests/Analysis/LifetimeSafetyTest.cpp +++ b/clang/unittests/Analysis/LifetimeSafetyTest.cpp @@ -700,6 +700,23 @@ TEST_F(LifetimeAnalysisTest, GslPointerInConditionalOperator) { EXPECT_THAT(Origin("v"), HasLoansTo({"a", "b"}, "p1")); } +TEST_F(LifetimeAnalysisTest, ExtraParenthesis) { + SetupTest(R"( + void target() { + MyObj a; + View x = ((View((((a)))))); + View y = ((View{(((x)))})); + View z = ((View(((y))))); + View p = ((View{((x))})); + POINT(p1); + } + )"); + EXPECT_THAT(Origin("x"), HasLoansTo({"a"}, "p1")); + EXPECT_THAT(Origin("y"), HasLoansTo({"a"}, "p1")); + EXPECT_THAT(Origin("z"), HasLoansTo({"a"}, "p1")); + EXPECT_THAT(Origin("p"), HasLoansTo({"a"}, "p1")); +} + // FIXME: Handle temporaries. TEST_F(LifetimeAnalysisTest, ViewFromTemporary) { SetupTest(R"( diff --git a/compiler-rt/lib/interception/interception_win.cpp b/compiler-rt/lib/interception/interception_win.cpp index 246a22c56c31a..856872425117a 100644 --- a/compiler-rt/lib/interception/interception_win.cpp +++ b/compiler-rt/lib/interception/interception_win.cpp @@ -646,6 +646,7 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) { case 0xC033: // 33 C0 : xor eax, eax case 0xC933: // 33 C9 : xor ecx, ecx case 0xD233: // 33 D2 : xor edx, edx + case 0xFF33: // 33 FF : xor edi, edi case 0x9066: // 66 90 : xchg %ax,%ax (Two-byte NOP) case 0xDB84: // 84 DB : test bl,bl case 0xC084: // 84 C0 : test al,al @@ -764,6 +765,7 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) { switch (0x00FFFFFF & *(u32 *)address) { case 0x10b70f: // 0f b7 10 : movzx edx, WORD PTR [rax] + case 0x02b70f: // 0f b7 02 : movzx eax, WORD PTR [rdx] case 0xc00b4d: // 4d 0b c0 : or r8, r8 case 0xc03345: // 45 33 c0 : xor r8d, r8d case 0xc08548: // 48 85 c0 : test rax, rax @@ -799,6 +801,7 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) { case 0xc9854d: // 4d 85 c9 : test r9, r9 case 0xc98b4c: // 4c 8b c9 : mov r9, rcx case 0xd12948: // 48 29 d1 : sub rcx, rdx + case 0xc22b4c: // 4c 2b c2 : sub r8, rdx case 0xca2b48: // 48 2b ca : sub rcx, rdx case 0xca3b48: // 48 3b ca : cmp rcx, rdx case 0xd12b48: // 48 2b d1 : sub rdx, rcx @@ -813,6 +816,7 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) { case 0xd9f748: // 48 f7 d9 : neg rcx case 0xc03145: // 45 31 c0 : xor r8d,r8d case 0xc93145: // 45 31 c9 : xor r9d,r9d + case 0xd23345: // 45 33 d2 : xor r10d, r10d case 0xdb3345: // 45 33 db : xor r11d, r11d case 0xc08445: // 45 84 c0 : test r8b,r8b case 0xd28445: // 45 84 d2 : test r10b,r10b diff --git a/compiler-rt/lib/interception/tests/interception_win_test.cpp b/compiler-rt/lib/interception/tests/interception_win_test.cpp index f11c1d1458556..e3dc4cfbe9b2c 100644 --- a/compiler-rt/lib/interception/tests/interception_win_test.cpp +++ b/compiler-rt/lib/interception/tests/interception_win_test.cpp @@ -841,6 +841,7 @@ const struct InstructionSizeData { { 1, {0xCC}, 0, "CC : int 3 i.e. registering weak functions)"}, { 2, {0x31, 0xC0}, 0, "31 C0 : xor eax, eax"}, { 2, {0x31, 0xC9}, 0, "31 C9 : xor ecx, ecx"}, + { 2, {0x33, 0xFF}, 0, "33 FF : xor edi, edi"}, { 2, {0x31, 0xD2}, 0, "31 D2 : xor edx, edx"}, { 2, {0x33, 0xC0}, 0, "33 C0 : xor eax, eax"}, { 2, {0x33, 0xC9}, 0, "33 C9 : xor ecx, ecx"}, @@ -895,6 +896,7 @@ const struct InstructionSizeData { { 3, {0x0f, 0xb6, 0x11}, 0, "0f b6 11 : movzx edx, BYTE PTR [rcx]"}, { 3, {0x0f, 0xb6, 0xc2}, 0, "0f b6 c2 : movzx eax, dl"}, { 3, {0x0f, 0xb6, 0xd2}, 0, "0f b6 d2 : movzx edx, dl"}, + { 3, {0x0f, 0xb7, 0x02}, 0, "0f b7 02 : movzx eax, WORD PTR [rdx]"}, { 3, {0x0f, 0xb7, 0x10}, 0, "0f b7 10 : movzx edx, WORD PTR [rax]"}, { 3, {0x0f, 0xbe, 0xd2}, 0, "0f be d2 : movsx edx, dl"}, { 3, {0x41, 0x8b, 0xc0}, 0, "41 8b c0 : mov eax, r8d"}, @@ -906,6 +908,7 @@ const struct InstructionSizeData { { 3, {0x45, 0x31, 0xc9}, 0, "45 31 c9 : xor r9d,r9d"}, { 3, {0x45, 0x33, 0xc0}, 0, "45 33 c0 : xor r8d, r8d"}, { 3, {0x45, 0x33, 0xc9}, 0, "45 33 c9 : xor r9d, r9d"}, + { 3, {0x45, 0x33, 0xd2}, 0, "45 33 d2 : xor r10d, r10d"}, { 3, {0x45, 0x33, 0xdb}, 0, "45 33 db : xor r11d, r11d"}, { 3, {0x45, 0x84, 0xc0}, 0, "45 84 c0 : test r8b,r8b"}, { 3, {0x45, 0x84, 0xd2}, 0, "45 84 d2 : test r10b,r10b"}, @@ -950,6 +953,7 @@ const struct InstructionSizeData { { 3, {0x49, 0xff, 0xc5}, 0, "49 ff c5 : inc r13"}, { 3, {0x49, 0xff, 0xc6}, 0, "49 ff c6 : inc r14"}, { 3, {0x49, 0xff, 0xc7}, 0, "49 ff c7 : inc r15"}, + { 3, {0x4c, 0x2b, 0xc2}, 0, "4c 2b c2 : sub r8, rdx"}, { 3, {0x4c, 0x8b, 0xc1}, 0, "4c 8b c1 : mov r8, rcx"}, { 3, {0x4c, 0x8b, 0xc9}, 0, "4c 8b c9 : mov r9, rcx"}, { 3, {0x4c, 0x8b, 0xd1}, 0, "4c 8b d1 : mov r10, rcx"}, diff --git a/compiler-rt/test/lit.common.cfg.py b/compiler-rt/test/lit.common.cfg.py index 9d2f02189b8bd..3f7dd8e402b78 100644 --- a/compiler-rt/test/lit.common.cfg.py +++ b/compiler-rt/test/lit.common.cfg.py @@ -195,16 +195,14 @@ def push_dynamic_library_lookup_path(config, new_path): # Normalize the path for comparison if test_cc_resource_dir is not None: test_cc_resource_dir = os.path.realpath(test_cc_resource_dir) -if lit_config.debug: - lit_config.note(f"Resource dir for {config.clang} is {test_cc_resource_dir}") +lit_config.dbg(f"Resource dir for {config.clang} is {test_cc_resource_dir}") local_build_resource_dir = os.path.realpath(config.compiler_rt_output_dir) if test_cc_resource_dir != local_build_resource_dir and config.test_standalone_build_libs: if config.compiler_id == "Clang": - if lit_config.debug: - lit_config.note( - f"Overriding test compiler resource dir to use " - f'libraries in "{config.compiler_rt_libdir}"' - ) + lit_config.dbg( + f"Overriding test compiler resource dir to use " + f'libraries in "{config.compiler_rt_libdir}"' + ) # Ensure that we use the just-built static libraries when linking by # overriding the Clang resource directory. Additionally, we want to use # the builtin headers shipped with clang (e.g. stdint.h), so we diff --git a/libc/test/UnitTest/CMakeLists.txt b/libc/test/UnitTest/CMakeLists.txt index 3197b3d7fd01b..54e41ece5f4d9 100644 --- a/libc/test/UnitTest/CMakeLists.txt +++ b/libc/test/UnitTest/CMakeLists.txt @@ -83,7 +83,7 @@ add_unittest_framework_library( ) set(libc_death_test_srcs LibcDeathTestExecutors.cpp) -if(${LIBC_TARGET_OS} STREQUAL "linux") +if(${LIBC_TARGET_OS} STREQUAL "linux" OR ${LIBC_TARGET_OS} STREQUAL "darwin") list(APPEND libc_death_test_srcs ExecuteFunctionUnix.cpp) endif() diff --git a/libclc/clc/lib/generic/atomic/clc_atomic_def.inc b/libclc/clc/lib/generic/atomic/clc_atomic_def.inc index 14a09b1f09f5c..75561430b33ad 100644 --- a/libclc/clc/lib/generic/atomic/clc_atomic_def.inc +++ b/libclc/clc/lib/generic/atomic/clc_atomic_def.inc @@ -21,47 +21,50 @@ #ifdef __CLC_HAS_ATOMIC -#ifndef __CLC_PTR_CASTTYPE -#define __CLC_PTR_CASTTYPE __CLC_GENTYPE +#ifndef __CLC_CASTTYPE +#define __CLC_CASTTYPE __CLC_GENTYPE #endif #ifndef __CLC_AS_RETTYPE #define __CLC_AS_RETTYPE(x) x #endif +#ifndef __CLC_AS_CASTTYPE +#define __CLC_AS_CASTTYPE(x) x +#endif + #ifdef __CLC_NO_VALUE_ARG #define __CLC_DEFINE_ATOMIC(ADDRSPACE) \ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNCTION( \ volatile ADDRSPACE __CLC_GENTYPE *Ptr, int MemoryOrder, \ int MemoryScope) { \ return __CLC_AS_RETTYPE(__CLC_IMPL_FUNCTION( \ - (ADDRSPACE __CLC_PTR_CASTTYPE *)Ptr, MemoryOrder, MemoryScope)); \ + (ADDRSPACE __CLC_CASTTYPE *)Ptr, MemoryOrder, MemoryScope)); \ } #elif defined(__CLC_INC_DEC) #define __CLC_DEFINE_ATOMIC(ADDRSPACE) \ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNCTION( \ volatile ADDRSPACE __CLC_GENTYPE *Ptr, int MemoryOrder, \ int MemoryScope) { \ - return __CLC_AS_RETTYPE( \ - __CLC_IMPL_FUNCTION((ADDRSPACE __CLC_PTR_CASTTYPE *)Ptr, \ - (__CLC_GENTYPE)1, MemoryOrder, MemoryScope)); \ + return __CLC_IMPL_FUNCTION(Ptr, (__CLC_GENTYPE)1, MemoryOrder, \ + MemoryScope); \ } #elif defined(__CLC_RETURN_VOID) #define __CLC_DEFINE_ATOMIC(ADDRSPACE) \ _CLC_OVERLOAD _CLC_DEF void __CLC_FUNCTION( \ volatile ADDRSPACE __CLC_GENTYPE *Ptr, __CLC_GENTYPE Value, \ int MemoryOrder, int MemoryScope) { \ - __CLC_IMPL_FUNCTION((ADDRSPACE __CLC_PTR_CASTTYPE *)Ptr, Value, \ - MemoryOrder, MemoryScope); \ + __CLC_IMPL_FUNCTION((ADDRSPACE __CLC_CASTTYPE *)Ptr, \ + __CLC_AS_CASTTYPE(Value), MemoryOrder, MemoryScope); \ } #else #define __CLC_DEFINE_ATOMIC(ADDRSPACE) \ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNCTION( \ volatile ADDRSPACE __CLC_GENTYPE *Ptr, __CLC_GENTYPE Value, \ int MemoryOrder, int MemoryScope) { \ - return __CLC_AS_RETTYPE( \ - __CLC_IMPL_FUNCTION((ADDRSPACE __CLC_PTR_CASTTYPE *)Ptr, Value, \ - MemoryOrder, MemoryScope)); \ + return __CLC_AS_RETTYPE(__CLC_IMPL_FUNCTION( \ + (ADDRSPACE __CLC_CASTTYPE *)Ptr, __CLC_AS_CASTTYPE(Value), \ + MemoryOrder, MemoryScope)); \ } #endif diff --git a/libclc/clc/lib/generic/atomic/clc_atomic_exchange.cl b/libclc/clc/lib/generic/atomic/clc_atomic_exchange.cl index ee80256d3dbb6..b2c26758103cd 100644 --- a/libclc/clc/lib/generic/atomic/clc_atomic_exchange.cl +++ b/libclc/clc/lib/generic/atomic/clc_atomic_exchange.cl @@ -14,10 +14,12 @@ #define __CLC_BODY #include -#undef __CLC_PTR_CASTTYPE +#undef __CLC_CASTTYPE #undef __CLC_AS_RETTYPE -#define __CLC_PTR_CASTTYPE __CLC_BIT_INTN +#undef __CLC_AS_CASTTYPE +#define __CLC_CASTTYPE __CLC_BIT_INTN #define __CLC_AS_RETTYPE(x) __CLC_AS_GENTYPE(x) +#define __CLC_AS_CASTTYPE __CLC_AS_S_GENTYPE #define __CLC_BODY #include diff --git a/libclc/clc/lib/generic/atomic/clc_atomic_load.cl b/libclc/clc/lib/generic/atomic/clc_atomic_load.cl index f7fe2510569e4..af808553a7110 100644 --- a/libclc/clc/lib/generic/atomic/clc_atomic_load.cl +++ b/libclc/clc/lib/generic/atomic/clc_atomic_load.cl @@ -15,9 +15,9 @@ #define __CLC_BODY #include -#undef __CLC_PTR_CASTTYPE +#undef __CLC_CASTTYPE #undef __CLC_AS_RETTYPE -#define __CLC_PTR_CASTTYPE __CLC_BIT_INTN +#define __CLC_CASTTYPE __CLC_BIT_INTN #define __CLC_AS_RETTYPE(x) __CLC_AS_GENTYPE(x) #define __CLC_BODY diff --git a/libclc/clc/lib/generic/atomic/clc_atomic_store.cl b/libclc/clc/lib/generic/atomic/clc_atomic_store.cl index a93d21e8430ce..66ae2ba98556d 100644 --- a/libclc/clc/lib/generic/atomic/clc_atomic_store.cl +++ b/libclc/clc/lib/generic/atomic/clc_atomic_store.cl @@ -15,8 +15,10 @@ #define __CLC_BODY #include -#undef __CLC_PTR_CASTTYPE -#define __CLC_PTR_CASTTYPE __CLC_BIT_INTN +#undef __CLC_CASTTYPE +#undef __CLC_AS_CASTTYPE +#define __CLC_CASTTYPE __CLC_BIT_INTN +#define __CLC_AS_CASTTYPE __CLC_AS_S_GENTYPE #define __CLC_BODY #include diff --git a/libcxx/test/selftest/dsl/dsl.sh.py b/libcxx/test/selftest/dsl/dsl.sh.py index 93f351f58eb4b..b8ee2ca3d6bb9 100644 --- a/libcxx/test/selftest/dsl/dsl.sh.py +++ b/libcxx/test/selftest/dsl/dsl.sh.py @@ -61,7 +61,7 @@ def setUp(self): self.litConfig = lit.LitConfig.LitConfig( progname="lit", path=[], - quiet=False, + diagnostic_level="note", useValgrind=False, valgrindLeakCheck=False, valgrindArgs=[], diff --git a/libcxx/utils/libcxx/test/config.py b/libcxx/utils/libcxx/test/config.py index 0840c46d7bfae..00fab6a73ba68 100644 --- a/libcxx/utils/libcxx/test/config.py +++ b/libcxx/utils/libcxx/test/config.py @@ -22,6 +22,7 @@ def _appendToSubstitution(substitutions, key, value): def configure(parameters, features, config, lit_config): note = lambda s: lit_config.note("({}) {}".format(config.name, s)) + debug = lambda s: lit_config.dbg("({}) {}".format(config.name, s)) config.environment = dict(os.environ) # Apply the actions supplied by parameters to the configuration first, since @@ -31,25 +32,23 @@ def configure(parameters, features, config, lit_config): actions = param.getActions(config, lit_config.params) for action in actions: action.applyTo(config) - if lit_config.debug: - note( - "Applied '{}' as a result of parameter '{}'".format( - action.pretty(config, lit_config.params), - param.pretty(config, lit_config.params), - ) + debug( + "Applied '{}' as a result of parameter '{}'".format( + action.pretty(config, lit_config.params), + param.pretty(config, lit_config.params), ) + ) # Then, apply the automatically-detected features. for feature in features: actions = feature.getActions(config) for action in actions: action.applyTo(config) - if lit_config.debug: - note( - "Applied '{}' as a result of implicitly detected feature '{}'".format( - action.pretty(config, lit_config.params), feature.pretty(config) - ) + debug( + "Applied '{}' as a result of implicitly detected feature '{}'".format( + action.pretty(config, lit_config.params), feature.pretty(config) ) + ) # Print the basic substitutions for sub in ("%{cxx}", "%{flags}", "%{compile_flags}", "%{link_flags}", "%{benchmark_flags}", "%{exec}"): diff --git a/libcxx/utils/libcxx/test/dsl.py b/libcxx/utils/libcxx/test/dsl.py index 3fb30d82e0d24..88fc49160c56b 100644 --- a/libcxx/utils/libcxx/test/dsl.py +++ b/libcxx/utils/libcxx/test/dsl.py @@ -88,7 +88,7 @@ def _executeWithFakeConfig(test, commands): litConfig = lit.LitConfig.LitConfig( progname="lit", path=[], - quiet=False, + diagnostic_level="note", useValgrind=False, valgrindLeakCheck=False, valgrindArgs=[], diff --git a/lldb/source/Commands/CMakeLists.txt b/lldb/source/Commands/CMakeLists.txt index 69e4c45f0b8e5..33332f2d59a23 100644 --- a/lldb/source/Commands/CMakeLists.txt +++ b/lldb/source/Commands/CMakeLists.txt @@ -58,6 +58,8 @@ add_lldb_library(lldbCommands NO_PLUGIN_DEPENDENCIES lldbUtility lldbValueObject lldbVersion + CLANG_LIBS + clangFrontend ) add_dependencies(lldbCommands LLDBOptionsGen) diff --git a/lldb/source/Core/Module.cpp b/lldb/source/Core/Module.cpp index f27a95de484df..815cc9dada2c1 100644 --- a/lldb/source/Core/Module.cpp +++ b/lldb/source/Core/Module.cpp @@ -52,9 +52,6 @@ #include "lldb/Host/windows/PosixApi.h" #endif -#include "Plugins/Language/CPlusPlus/CPlusPlusLanguage.h" -#include "Plugins/Language/ObjC/ObjCLanguage.h" - #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/DJB.h" diff --git a/lldb/source/ValueObject/CMakeLists.txt b/lldb/source/ValueObject/CMakeLists.txt index 2a61407521bec..f0fe7f374a506 100644 --- a/lldb/source/ValueObject/CMakeLists.txt +++ b/lldb/source/ValueObject/CMakeLists.txt @@ -1,4 +1,4 @@ -add_lldb_library(lldbValueObject +add_lldb_library(lldbValueObject NO_PLUGIN_DEPENDENCIES DILAST.cpp DILEval.cpp DILLexer.cpp @@ -34,6 +34,4 @@ add_lldb_library(lldbValueObject lldbSymbol lldbTarget lldbUtility - lldbPluginCPlusPlusLanguage - lldbPluginObjCLanguage ) diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index 3418df1dede11..22075c8e73b3f 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -1017,6 +1017,15 @@ supported for the ``amdgcn`` target. `ptr addrspace(7)` directly, which produces a buffer fat pointer with an initial offset of 0 and prevents the address space cast from being rewritten away. + The ``align`` attribute on operations from buffer fat pointers is deemed to apply + to all componenents of the pointer - that is, an ``align 4`` load is expected to + both have the offset be a multiple of 4 and to have a base pointer with an + alignment of 4. + + This componentwise definition of alignment is needed to allow for promotion of + aligned loads to ``s_buffer_load``, which requires that both the base pointer and + offset be appropriately aligned. + **Buffer Resource** The buffer resource pointer, in address space 8, is the newer form for representing buffer descriptors in AMDGPU IR, replacing their @@ -1040,6 +1049,25 @@ supported for the ``amdgcn`` target. (bits `127:96`). The specific interpretation of these fields varies by the target architecture and is detailed in the ISA descriptions. + When buffer resources are passed to buffer intrinsics such as + ``llvm.amdgcn.raw.ptr.buffer.load`` or + ``llvm.amdgcn.struct.ptr.buffer.store``, the ``align`` attribute on the + pointer is assumed to apply to both the offset and the base pointer value. + That is, ``align 8`` means that both the base address within the ``ptr + addrspace(8)`` and the ``offset`` argument have their three lowest bits set + to 0. If the stride of the resource is nonzero, the stride must be a multiple + of the given alignment. + + In other words, the ``align`` attribute specifies the alignment of the effective + address being loaded from/stored to *and* acts as a guarantee that this is + not achieved from adding lower-alignment parts (as hardware may not always + allow for such an addition). For example, if a buffer resource has the base + address ``0xfffe`` and is accessed with a ``raw.ptr.buffer.load`` with an offset + of ``2``, the load must **not** be marked ``align 4`` (even though the + effective adddress ``0x10000`` is so aligned) as this would permit the compiler + to make incorrect transformations (such as promotion to ``s_buffer_load``, + which requires such componentwise alignment). + **Buffer Strided Pointer** The buffer index pointer is an experimental address space. It represents a 128-bit buffer descriptor and a 32-bit offset, like the **Buffer Fat @@ -1058,6 +1086,12 @@ supported for the ``amdgcn`` target. index and offset values are both 0. This prevents the address space cast from being rewritten away. + As with buffer fat pointers, alignment of a buffer strided pointer applies to + both the base pointer address and the offset. In addition, the alignment also + constrains the stride of the pointer. That is, if you do an ``align 4`` load from + a buffer strided pointer, this means that the base pointer is ``align(4)``, that + the offset is a multiple of 4 bytes, and that the stride is a multiple of 4. + **Streamout Registers** Dedicated registers used by the GS NGG Streamout Instructions. The register file is modelled as a memory in a distinct address space because it is indexed diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index e57ed24a45065..2ebccee6aa68c 100644 --- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -628,10 +628,15 @@ void CodeViewDebug::beginModule(Module *M) { // When emitting only compiler information, we may have only NoDebug CUs, // which would be skipped by debug_compile_units_begin. NamedMDNode *CUs = MMI->getModule()->getNamedMetadata("llvm.dbg.cu"); + if (CUs->operands().empty()) { + Asm = nullptr; + return; + } Node = *CUs->operands().begin(); } - const auto *CU = cast(Node); - DISourceLanguageName Lang = CU->getSourceLanguage(); + + TheCU = cast(Node); + DISourceLanguageName Lang = TheCU->getSourceLanguage(); CurrentSourceLanguage = Lang.hasVersionedName() ? MapDWARFLanguageToCVLang( @@ -639,7 +644,7 @@ void CodeViewDebug::beginModule(Module *M) { : MapDWARFLanguageToCVLang( static_cast(Lang.getName())); if (!M->getCodeViewFlag() || - CU->getEmissionKind() == DICompileUnit::NoDebug) { + TheCU->getEmissionKind() == DICompileUnit::NoDebug) { Asm = nullptr; return; } @@ -900,11 +905,10 @@ void CodeViewDebug::emitCompilerInformation() { OS.AddComment("CPUType"); OS.emitInt16(static_cast(TheCPU)); - NamedMDNode *CUs = MMI->getModule()->getNamedMetadata("llvm.dbg.cu"); - const MDNode *Node = *CUs->operands().begin(); - const auto *CU = cast(Node); + StringRef CompilerVersion = "0"; + if (TheCU) + CompilerVersion = TheCU->getProducer(); - StringRef CompilerVersion = CU->getProducer(); Version FrontVer = parseVersion(CompilerVersion); OS.AddComment("Frontend version"); for (int N : FrontVer.Part) { diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h index c2b878e52e1c3..7fd2cec8c74f2 100644 --- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h +++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h @@ -98,6 +98,8 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { /// The codeview CPU type used by the translation unit. codeview::CPUType TheCPU; + const DICompileUnit *TheCU = nullptr; + /// The AsmPrinter used for emitting compiler metadata. When only compiler /// info is being emitted, DebugHandlerBase::Asm may be null. AsmPrinter *CompilerInfoAsm = nullptr; diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index 5160cc999d63b..d78c4c707d168 100644 --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -2218,7 +2218,7 @@ class VOP3PX2e op, bits<8> LdScaleOp, VOP3PWMMA_Profile P> : Enc128, VO let Inst{23-16} = LdScaleOp; let Inst{40-32} = scale_src0; let Inst{49-41} = scale_src1; - let Inst{58-50} = 0; // scale src2 + let Inst{58-50} = 0x100; // scale src2 = vgpr0 (dummy) let Inst{59} = matrix_b_scale{0}; // scale_op_sel_hi(0) let Inst{60} = 0; // scale_op_sel_hi(1) let Inst{63-61} = {0, matrix_a_scale_fmt{1-0}}; // neg (lo) @@ -2433,6 +2433,15 @@ multiclass VOP3P_Real_with_name_gfx12 op, string asmName = !cast(NAME).Mnemonic> : VOP3P_Real_with_name; +multiclass VOP3P_Real_LD_SCALE_gfx1250 op> { + defvar ps = !cast(NAME); + def _gfx1250 : + VOP3P_Real_Gen, + VOP3Pe_gfx11_gfx12 { + let Inst{58-50} = 0x100; // scale src2 = vgpr0 (dummy) + } +} + defm V_PK_MIN_NUM_F16 : VOP3P_Real_with_name_gfx12<0x1b, "V_PK_MIN_F16", "v_pk_min_num_f16">; defm V_PK_MAX_NUM_F16 : VOP3P_Real_with_name_gfx12<0x1c, "V_PK_MAX_F16", "v_pk_max_num_f16">; @@ -2462,8 +2471,8 @@ defm V_FMA_MIX_F32_BF16 : VOP3P_Realtriple; defm V_FMA_MIXLO_BF16 : VOP3P_Realtriple; defm V_FMA_MIXHI_BF16 : VOP3P_Realtriple; -defm V_WMMA_LD_SCALE_PAIRED_B32 : VOP3P_Real_gfx1250<0x35>; -defm V_WMMA_LD_SCALE16_PAIRED_B64 : VOP3P_Real_gfx1250<0x3a>; +defm V_WMMA_LD_SCALE_PAIRED_B32 : VOP3P_Real_LD_SCALE_gfx1250<0x35>; +defm V_WMMA_LD_SCALE16_PAIRED_B64 : VOP3P_Real_LD_SCALE_gfx1250<0x3a>; let AssemblerPredicate = isGFX1250Plus in def : AMDGPUMnemonicAlias<"v_fma_mix_f32_f16", "v_fma_mix_f32">; diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp index 21a113572ce93..c19eed122fe63 100644 --- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -2536,7 +2536,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, MachineRegisterInfo &MRI = MF.getRegInfo(); const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); (void)TRI; // Silence unused warning in non-assert builds. - Register FramePtr = RegInfo->getFrameRegister(MF); + Register FramePtr = STI.getFramePointerReg(); ARMSubtarget::PushPopSplitVariation PushPopSplit = STI.getPushPopSplitVariation(MF); @@ -2783,7 +2783,11 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) { AFI->setHasStackFrame(true); - if (HasFP) { + // Save the FP if: + // 1. We currently need it (HasFP), OR + // 2. We might need it later due to stack realignment from aligned DPRCS2 + // saves (which will make hasFP() become true in emitPrologue). + if (HasFP || (isFPReserved(MF) && AFI->getNumAlignedDPRCS2Regs() > 0)) { SavedRegs.set(FramePtr); // If the frame pointer is required by the ABI, also spill LR so that we // emit a complete frame record. diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 92fae71121a81..f28640ce7b107 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -2510,9 +2510,44 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (isTailCall && VA.isMemLoc() && !AfterFormalArgLoads) { Chain = DAG.getStackArgumentTokenFactor(Chain); - if (ByValTempChain) + if (ByValTempChain) { + // In case of large byval copies, re-using the stackframe for tail-calls + // can lead to overwriting incoming arguments on the stack. Force + // loading these stack arguments before the copy to avoid that. + SmallVector IncomingLoad; + for (unsigned I = 0; I < OutVals.size(); ++I) { + if (Outs[I].Flags.isByVal()) + continue; + + SDValue OutVal = OutVals[I]; + LoadSDNode *OutLN = dyn_cast_or_null(OutVal); + if (!OutLN) + continue; + + FrameIndexSDNode *FIN = + dyn_cast_or_null(OutLN->getBasePtr()); + if (!FIN) + continue; + + if (!MFI.isFixedObjectIndex(FIN->getIndex())) + continue; + + for (const CCValAssign &VA : ArgLocs) { + if (VA.isMemLoc()) + IncomingLoad.push_back(OutVal.getValue(1)); + } + } + + // Update the chain to force loads for potentially clobbered argument + // loads to happen before the byval copy. + if (!IncomingLoad.empty()) { + IncomingLoad.push_back(Chain); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, IncomingLoad); + } + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chain, ByValTempChain); + } AfterFormalArgLoads = true; } diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 1cbedb7d141e2..1024e55f912c7 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -2691,6 +2691,16 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { } break; } + case ISD::SCALAR_TO_VECTOR: + if (Subtarget->enablePExtCodeGen()) { + MVT SrcVT = Node->getOperand(0).getSimpleValueType(); + if (VT == MVT::v2i32 && SrcVT == MVT::i64) { + ReplaceUses(SDValue(Node, 0), Node->getOperand(0)); + CurDAG->RemoveDeadNode(Node); + return; + } + } + break; case ISD::INSERT_SUBVECTOR: case RISCVISD::TUPLE_INSERT: { SDValue V = Node->getOperand(0); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 5a081d54d0726..d086a2a4a3057 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -516,8 +516,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setTruncStoreAction(MVT::v8i16, MVT::v8i8, Expand); setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand); setTruncStoreAction(MVT::v4i16, MVT::v4i8, Expand); - setOperationAction(ISD::LOAD, MVT::v2i16, Custom); - setOperationAction(ISD::LOAD, MVT::v4i8, Custom); } else { VTs.append({MVT::v2i16, MVT::v4i8}); } @@ -14757,21 +14755,6 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, return; } - if (Subtarget.is64Bit() && Subtarget.enablePExtCodeGen()) { - SDLoc DL(N); - SDValue ExtLoad = - DAG.getExtLoad(ISD::SEXTLOAD, DL, MVT::i64, Ld->getChain(), - Ld->getBasePtr(), MVT::i32, Ld->getMemOperand()); - if (N->getValueType(0) == MVT::v2i16) { - Results.push_back(DAG.getBitcast(MVT::v4i16, ExtLoad)); - Results.push_back(ExtLoad.getValue(1)); - } else if (N->getValueType(0) == MVT::v4i8) { - Results.push_back(DAG.getBitcast(MVT::v8i8, ExtLoad)); - Results.push_back(ExtLoad.getValue(1)); - } - return; - } - assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && "Unexpected custom legalisation"); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index b9d4ff41c0755..835b0995cc4fc 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7633,6 +7633,14 @@ createWidenInductionRecipes(VPInstruction *PhiR, VPValue *Step = vputils::getOrCreateVPValueForSCEVExpr(Plan, IndDesc.getStep()); + + // Update wide induction increments to use the same step as the corresponding + // wide induction. This enables detecting induction increments directly in + // VPlan and removes redundant splats. + using namespace llvm::VPlanPatternMatch; + if (match(PhiR->getOperand(1), m_Add(m_Specific(PhiR), m_VPValue()))) + PhiR->getOperand(1)->getDefiningRecipe()->setOperand(1, Step); + PHINode *Phi = cast(PhiR->getUnderlyingInstr()); return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, &Plan.getVF(), IndDesc, PhiR->getDebugLoc()); @@ -8473,20 +8481,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( "entry block must be set to a VPRegionBlock having a non-empty entry " "VPBasicBlock"); - // Update wide induction increments to use the same step as the corresponding - // wide induction. This enables detecting induction increments directly in - // VPlan and removes redundant splats. - for (const auto &[Phi, ID] : Legal->getInductionVars()) { - auto *IVInc = cast( - Phi->getIncomingValueForBlock(OrigLoop->getLoopLatch())); - if (IVInc->getOperand(0) != Phi || IVInc->getOpcode() != Instruction::Add) - continue; - VPWidenInductionRecipe *WideIV = - cast(RecipeBuilder.getRecipe(Phi)); - VPRecipeBase *R = RecipeBuilder.getRecipe(IVInc); - R->setOperand(1, WideIV->getStepValue()); - } - // TODO: We can't call runPass on these transforms yet, due to verifier // failures. VPlanTransforms::addExitUsersForFirstOrderRecurrences(*Plan, Range); @@ -8627,6 +8621,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) { void LoopVectorizationPlanner::adjustRecipesForReductions( VPlanPtr &Plan, VPRecipeBuilder &RecipeBuilder, ElementCount MinVF) { using namespace VPlanPatternMatch; + VPTypeAnalysis TypeInfo(*Plan); VPRegionBlock *VectorLoopRegion = Plan->getVectorLoopRegion(); VPBasicBlock *Header = VectorLoopRegion->getEntryBasicBlock(); VPBasicBlock *MiddleVPBB = Plan->getMiddleBlock(); @@ -8711,8 +8706,8 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( LinkVPBB->insert(FMulRecipe, CurrentLink->getIterator()); VecOp = FMulRecipe; } else if (PhiR->isInLoop() && Kind == RecurKind::AddChainWithSubs && - CurrentLinkI->getOpcode() == Instruction::Sub) { - Type *PhiTy = PhiR->getUnderlyingValue()->getType(); + match(CurrentLink, m_Sub(m_VPValue(), m_VPValue()))) { + Type *PhiTy = TypeInfo.inferScalarType(PhiR); auto *Zero = Plan->getConstantInt(PhiTy, 0); VPWidenRecipe *Sub = new VPWidenRecipe( Instruction::Sub, {Zero, CurrentLink->getOperand(1)}, {}, @@ -8788,7 +8783,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( const RecurrenceDescriptor &RdxDesc = Legal->getRecurrenceDescriptor( cast(PhiR->getUnderlyingInstr())); - Type *PhiTy = PhiR->getUnderlyingValue()->getType(); + Type *PhiTy = TypeInfo.inferScalarType(PhiR); // If tail is folded by masking, introduce selects between the phi // and the users outside the vector region of each reduction, at the // beginning of the dedicated latch block. diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h index aa2785252d376..f34c99b84b1aa 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h +++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h @@ -496,6 +496,12 @@ m_c_Binary(const Op0_t &Op0, const Op1_t &Op1) { return AllRecipe_commutative_match(Op0, Op1); } +template +inline AllRecipe_match m_Add(const Op0_t &Op0, + const Op1_t &Op1) { + return m_Binary(Op0, Op1); +} + template inline AllRecipe_commutative_match m_c_Add(const Op0_t &Op0, const Op1_t &Op1) { diff --git a/llvm/test/CodeGen/ARM/byval_struct_copy_tailcall.ll b/llvm/test/CodeGen/ARM/byval_struct_copy_tailcall.ll new file mode 100644 index 0000000000000..50c676c425ce7 --- /dev/null +++ b/llvm/test/CodeGen/ARM/byval_struct_copy_tailcall.ll @@ -0,0 +1,69 @@ +; RUN: llc -mtriple thumbv7em-apple-darwin -o - < %s | FileCheck %s + +%"struct.s1" = type { [19 x i32] } + +define void @f0(ptr byval(%"struct.s1") %0, ptr %1) #1 { +; CHECK-LABEL: _f0: @ @f0 +; CHECK-NEXT: @ %bb.0: +; CHECK-NEXT: sub sp, #16 +; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: sub sp, #76 +; CHECK-NEXT: add.w r9, sp, #84 +; CHECK-NEXT: stm.w r9, {r0, r1, r2, r3} +; CHECK-NEXT: mov r0, sp +; CHECK-NEXT: add r1, sp, #84 +; CHECK-NEXT: movs r2, #76 +; CHECK-NEXT: mov r3, r0 +; CHECK-NEXT: LBB0_1: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldr r4, [r1], #4 +; CHECK-NEXT: subs r2, #4 +; CHECK-NEXT: str r4, [r3], #4 +; CHECK-NEXT: bne LBB0_1 +; CHECK-NEXT: @ %bb.2: +; CHECK-NEXT: add.w r1, r0, #12 +; CHECK-NEXT: add r2, sp, #100 +; CHECK-NEXT: ldr r0, [sp, #160] +; CHECK-NEXT: ldr r3, [r1], #4 +; CHECK-NEXT: str r3, [r2], #4 +; CHECK-NEXT: ldr r3, [r1], #4 +; CHECK-NEXT: str r3, [r2], #4 +; CHECK-NEXT: ldr r3, [r1], #4 +; CHECK-NEXT: str r3, [r2], #4 +; CHECK-NEXT: ldr r3, [r1], #4 +; CHECK-NEXT: str r3, [r2], #4 +; CHECK-NEXT: ldr r3, [r1], #4 +; CHECK-NEXT: str r3, [r2], #4 +; CHECK-NEXT: ldr r3, [r1], #4 +; CHECK-NEXT: str r3, [r2], #4 +; CHECK-NEXT: ldr r3, [r1], #4 +; CHECK-NEXT: str r3, [r2], #4 +; CHECK-NEXT: ldr r3, [r1], #4 +; CHECK-NEXT: str r3, [r2], #4 +; CHECK-NEXT: ldr r3, [r1], #4 +; CHECK-NEXT: str r3, [r2], #4 +; CHECK-NEXT: ldr r3, [r1], #4 +; CHECK-NEXT: str r3, [r2], #4 +; CHECK-NEXT: ldr r3, [r1], #4 +; CHECK-NEXT: str r3, [r2], #4 +; CHECK-NEXT: ldr r3, [r1], #4 +; CHECK-NEXT: str r3, [r2], #4 +; CHECK-NEXT: ldr r3, [r1], #4 +; CHECK-NEXT: str r3, [r2], #4 +; CHECK-NEXT: ldr r3, [r1], #4 +; CHECK-NEXT: str r3, [r2], #4 +; CHECK-NEXT: ldr r3, [r1], #4 +; CHECK-NEXT: str r3, [r2], #4 +; CHECK-NEXT: ldr r3, [r1], #4 +; CHECK-NEXT: str r3, [r2], #4 +; CHECK-NEXT: ldm.w sp, {r1, r2, r3} +; CHECK-NEXT: add sp, #76 +; CHECK-NEXT: pop.w {r4, lr} +; CHECK-NEXT: add sp, #16 +; CHECK-NEXT: b.w _f1 + tail call void @f1(ptr %1, ptr byval(%"struct.s1") %0) + ret void +} + +declare void @f1(ptr, ptr) + +attributes #1 = { nounwind "frame-pointes"="non-leaf" } diff --git a/llvm/test/CodeGen/ARM/save-fp-with-non-leaf.ll b/llvm/test/CodeGen/ARM/save-fp-with-non-leaf.ll new file mode 100644 index 0000000000000..fefa5a0a68020 --- /dev/null +++ b/llvm/test/CodeGen/ARM/save-fp-with-non-leaf.ll @@ -0,0 +1,37 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc %s -o - | FileCheck %s --check-prefix=CHECK +target datalayout = "e-m:o-p:32:32-Fi8-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32" +target triple = "thumbv7-apple-darwin" + +; This test checks that even with NEON register induced stack re-alignment, and +; with the frame-pointer=non-leaf option, that we still save fp aka r7 in the +; prolog as required. + +define fastcc i32 @test_save_fp() #0 { +; CHECK-LABEL: test_save_fp: +; CHECK: @ %bb.0: +; CHECK-NEXT: push {r4, r7, lr} +; CHECK-NEXT: add r7, sp, #4 +; CHECK-NEXT: sub.w r4, sp, #64 +; CHECK-NEXT: bfc r4, #0, #4 +; CHECK-NEXT: mov sp, r4 +; CHECK-NEXT: vst1.64 {d8, d9, d10, d11}, [r4:128]! +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: vst1.64 {d12, d13, d14, d15}, [r4:128] +; CHECK-NEXT: mov r4, sp +; CHECK-NEXT: @ InlineAsm Start +; CHECK-NEXT: vld1.16 {d0, d1, d2, d3}, [r0] +; CHECK-NEXT: vld1.16 {d4, d5, d6, d7}, [r0] +; CHECK-NEXT: vabdl.s16 q4, d0, d4 +; CHECK-EMPTY: +; CHECK-NEXT: @ InlineAsm End +; CHECK-NEXT: vld1.64 {d8, d9, d10, d11}, [r4:128]! +; CHECK-NEXT: vld1.64 {d12, d13, d14, d15}, [r4:128] +; CHECK-NEXT: subs r4, r7, #4 +; CHECK-NEXT: mov sp, r4 +; CHECK-NEXT: pop {r4, r7, pc} + tail call void asm sideeffect "vld1.i16 {q0,q1}, [$0]\0Avld1.i16 {q2,q3}, [$1]\0Avabdl.s16 q4, d0, d4\0A", "r,r,r,~{q0},~{q1},~{q2},~{q3},~{q4},~{q5},~{q6},~{q7},~{memory}"(ptr null, ptr null, ptr null) + ret i32 0 +} + +attributes #0 = { "frame-pointer"="non-leaf" } diff --git a/llvm/test/CodeGen/X86/basic-block-sections-list.ll b/llvm/test/CodeGen/X86/basic-block-sections-list.ll index d652a540f3e9c..d17182131168c 100644 --- a/llvm/test/CodeGen/X86/basic-block-sections-list.ll +++ b/llvm/test/CodeGen/X86/basic-block-sections-list.ll @@ -5,9 +5,9 @@ ; RUN: echo 'v1' > %t ; RUN: echo 'f _Z3foob' >> %t ;; -; RUN: llc < %s -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t > %bbsections -; RUN: llc < %s -mtriple=x86_64-pc-linux -function-sections > %orig -; RUN: diff -u %orig %bbsections +; RUN: llc < %s -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t > %t.bbsections +; RUN: llc < %s -mtriple=x86_64-pc-linux -function-sections > %t.orig +; RUN: diff -u %t.orig %t.bbsections define i32 @_Z3foob(i1 zeroext %0) nounwind { %2 = alloca i32, align 4 diff --git a/llvm/test/DebugInfo/X86/codeview-empty-dbg-cu-crash.ll b/llvm/test/DebugInfo/X86/codeview-empty-dbg-cu-crash.ll new file mode 100644 index 0000000000000..51435b10fdc2a --- /dev/null +++ b/llvm/test/DebugInfo/X86/codeview-empty-dbg-cu-crash.ll @@ -0,0 +1,39 @@ +; RUN: llc -mtriple=x86_64-pc-windows-msvc < %s | FileCheck %s + +; CHECK: .file "" +; CHECK-NEXT: .section .debug$S,"dr" +; CHECK-NEXT: .p2align 2, 0x0 +; CHECK-NEXT: .long 4 # Debug section magic +; CHECK-NEXT: .long 241 +; CHECK-NEXT: .long .Ltmp1-.Ltmp0 # Subsection size +; CHECK-NEXT: .Ltmp0: +; CHECK-NEXT: .short .Ltmp3-.Ltmp2 # Record length +; CHECK-NEXT: .Ltmp2: +; CHECK-NEXT: .short 4353 # Record kind: S_OBJNAME +; CHECK-NEXT: .long 0 # Signature +; CHECK-NEXT: .byte 0 # Object name +; CHECK-NEXT: .p2align 2, 0x0 +; CHECK-NEXT: .Ltmp3: +; CHECK-NEXT: .short .Ltmp5-.Ltmp4 # Record length +; CHECK-NEXT: .Ltmp4: +; CHECK-NEXT: .short 4412 # Record kind: S_COMPILE3 +; CHECK-NEXT: .long 3 # Flags and language +; CHECK-NEXT: .short 208 # CPUType +; CHECK-NEXT: .short 0 # Frontend version +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .short 22000 # Backend version +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .asciz "0" # Null-terminated compiler version string +; CHECK-NEXT: .p2align 2, 0x0 +; CHECK-NEXT: .Ltmp5: +; CHECK-NEXT: .Ltmp1: +; CHECK-NEXT: .p2align 2, 0x0 + +!llvm.dbg.cu = !{} +!llvm.module.flags = !{!0} + +!0 = !{i32 2, !"Debug Info Version", i32 3} diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_wmma_w32.s b/llvm/test/MC/AMDGPU/gfx1250_asm_wmma_w32.s index 8185b77beb935..fcfff9ac5b63d 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_wmma_w32.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_wmma_w32.s @@ -4,1906 +4,1906 @@ // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11] ; encoding: [0x04,0x00,0x5d,0xcc,0x00,0x05,0x12,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], 1.0 +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], 1.0 ; encoding: [0x04,0x00,0x5d,0xcc,0x00,0x05,0xca,0x1b] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], 1.0 neg_lo:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], 1.0 neg_lo:[0,0,1] ; encoding: [0x04,0x00,0x5d,0xcc,0x00,0x05,0xca,0x9b] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[1,0,0] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[1,0,0] ; encoding: [0x04,0x00,0x5d,0xcc,0x00,0x05,0x12,0x3c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,1,0] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,1,0] ; encoding: [0x04,0x00,0x5d,0xcc,0x00,0x05,0x12,0x5c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,0,1] ; encoding: [0x04,0x00,0x5d,0xcc,0x00,0x05,0x12,0x9c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11] neg_hi:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11] neg_hi:[0,0,1] ; encoding: [0x04,0x04,0x5d,0xcc,0x00,0x05,0x12,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11] matrix_a_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11] matrix_a_reuse ; encoding: [0x04,0x20,0x5d,0xcc,0x00,0x05,0x12,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11] matrix_b_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11] matrix_b_reuse ; encoding: [0x04,0x40,0x5d,0xcc,0x00,0x05,0x12,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], v[16:23] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], v[16:23] ; encoding: [0x10,0x00,0x62,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], 1.0 +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x62,0xcc,0x00,0x11,0xca,0x1b] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x62,0xcc,0x00,0x11,0xca,0x9b] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[1,0,0] neg_hi:[1,0,0] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x10,0x01,0x62,0xcc,0x00,0x11,0x42,0x3c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,1,0] neg_hi:[0,1,0] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x10,0x02,0x62,0xcc,0x00,0x11,0x42,0x5c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x62,0xcc,0x00,0x11,0x42,0x9c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], v[16:23] neg_hi:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], v[16:23] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x62,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], v[16:23] matrix_a_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], v[16:23] matrix_a_reuse ; encoding: [0x10,0x20,0x62,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], v[16:23] matrix_b_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x32_bf16 v[16:23], v[0:7], v[8:15], v[16:23] matrix_b_reuse ; encoding: [0x10,0x40,0x62,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], v[16:19] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], v[16:19] ; encoding: [0x10,0x00,0x63,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], 1.0 +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x63,0xcc,0x00,0x11,0xca,0x1b] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x63,0xcc,0x00,0x11,0xca,0x9b] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[1,0,0] neg_hi:[1,0,0] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x10,0x01,0x63,0xcc,0x00,0x11,0x42,0x3c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,1,0] neg_hi:[0,1,0] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x10,0x02,0x63,0xcc,0x00,0x11,0x42,0x5c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x63,0xcc,0x00,0x11,0x42,0x9c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], v[16:19] neg_hi:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], v[16:19] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x63,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], v[16:19] matrix_a_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], v[16:19] matrix_a_reuse ; encoding: [0x10,0x20,0x63,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], v[16:19] matrix_b_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_bf16_16x16x32_bf16 v[16:19], v[0:7], v[8:15], v[16:19] matrix_b_reuse ; encoding: [0x10,0x40,0x63,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], v[16:23] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], v[16:23] ; encoding: [0x1a,0x00,0x64,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], 1.0 +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], 1.0 ; encoding: [0x1a,0x00,0x64,0xcc,0x00,0x11,0xca,0x1b] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x1a,0x00,0x64,0xcc,0x00,0x11,0xca,0x9b] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], v[16:23] neg_lo:[1,0,0] neg_hi:[1,0,0] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], v[16:23] neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x1a,0x01,0x64,0xcc,0x00,0x11,0x42,0x3c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], v[16:23] neg_lo:[0,1,0] neg_hi:[0,1,0] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], v[16:23] neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x1a,0x02,0x64,0xcc,0x00,0x11,0x42,0x5c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1] ; encoding: [0x1a,0x00,0x64,0xcc,0x00,0x11,0x42,0x9c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], v[16:23] neg_hi:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], v[16:23] neg_hi:[0,0,1] ; encoding: [0x1a,0x04,0x64,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], v[16:23] matrix_a_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], v[16:23] matrix_a_reuse ; encoding: [0x1a,0x20,0x64,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], v[16:23] matrix_b_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_bf16f32_16x16x32_bf16 v[26:29], v[0:7], v[8:15], v[16:23] matrix_b_reuse ; encoding: [0x1a,0x40,0x64,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] ; encoding: [0x10,0x00,0x6a,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], 1.0 +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x6a,0xcc,0x00,0x11,0xca,0x1b] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6a,0xcc,0x00,0x11,0xca,0x9b] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6a,0xcc,0x00,0x11,0x42,0x9c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] neg_hi:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x6a,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_a_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_a_reuse ; encoding: [0x10,0x20,0x6a,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_b_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x64_fp8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_b_reuse ; encoding: [0x10,0x40,0x6a,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] ; encoding: [0x10,0x00,0x6b,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], 1.0 +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x6b,0xcc,0x00,0x11,0xca,0x1b] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6b,0xcc,0x00,0x11,0xca,0x9b] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6b,0xcc,0x00,0x11,0x42,0x9c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] neg_hi:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x6b,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_a_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_a_reuse ; encoding: [0x10,0x20,0x6b,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_b_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x64_fp8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_b_reuse ; encoding: [0x10,0x40,0x6b,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] ; encoding: [0x10,0x00,0x6c,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], 1.0 +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x6c,0xcc,0x00,0x11,0xca,0x1b] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6c,0xcc,0x00,0x11,0xca,0x9b] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6c,0xcc,0x00,0x11,0x42,0x9c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] neg_hi:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x6c,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_a_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_a_reuse ; encoding: [0x10,0x20,0x6c,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_b_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x64_bf8_fp8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_b_reuse ; encoding: [0x10,0x40,0x6c,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] ; encoding: [0x10,0x00,0x6d,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], 1.0 +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x6d,0xcc,0x00,0x11,0xca,0x1b] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6d,0xcc,0x00,0x11,0xca,0x9b] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6d,0xcc,0x00,0x11,0x42,0x9c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] neg_hi:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x6d,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_a_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_a_reuse ; encoding: [0x10,0x20,0x6d,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_b_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x64_bf8_bf8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_b_reuse ; encoding: [0x10,0x40,0x6d,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] ; encoding: [0x10,0x00,0x6e,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], 1.0 +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x6e,0xcc,0x00,0x11,0xca,0x1b] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6e,0xcc,0x00,0x11,0xca,0x9b] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6e,0xcc,0x00,0x11,0x42,0x9c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] neg_hi:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x6e,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_a_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_a_reuse ; encoding: [0x10,0x20,0x6e,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_b_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x64_fp8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_b_reuse ; encoding: [0x10,0x40,0x6e,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] ; encoding: [0x10,0x00,0x6f,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], 1.0 +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x6f,0xcc,0x00,0x11,0xca,0x1b] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6f,0xcc,0x00,0x11,0xca,0x9b] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x6f,0xcc,0x00,0x11,0x42,0x9c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] neg_hi:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x6f,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_a_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_a_reuse ; encoding: [0x10,0x20,0x6f,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_b_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x64_fp8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_b_reuse ; encoding: [0x10,0x40,0x6f,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] ; encoding: [0x10,0x00,0x70,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], 1.0 +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x70,0xcc,0x00,0x11,0xca,0x1b] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x70,0xcc,0x00,0x11,0xca,0x9b] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x70,0xcc,0x00,0x11,0x42,0x9c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] neg_hi:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x70,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_a_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_a_reuse ; encoding: [0x10,0x20,0x70,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_b_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x64_bf8_fp8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_b_reuse ; encoding: [0x10,0x40,0x70,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] ; encoding: [0x10,0x00,0x71,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], 1.0 +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x71,0xcc,0x00,0x11,0xca,0x1b] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x71,0xcc,0x00,0x11,0xca,0x9b] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x71,0xcc,0x00,0x11,0x42,0x9c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] neg_hi:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x71,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_a_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_a_reuse ; encoding: [0x10,0x20,0x71,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_b_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x64_bf8_bf8 v[16:19], v[0:7], v[8:15], v[16:19] matrix_b_reuse ; encoding: [0x10,0x40,0x71,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], v[16:23] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], v[16:23] ; encoding: [0x10,0x00,0x72,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], 1 +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], 1 ; encoding: [0x10,0x00,0x72,0xcc,0x00,0x11,0x06,0x1a] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[1,0,0] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[1,0,0] ; encoding: [0x10,0x00,0x72,0xcc,0x00,0x11,0x42,0x3c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,1,0] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,1,0] ; encoding: [0x10,0x00,0x72,0xcc,0x00,0x11,0x42,0x5c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_a_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_a_reuse ; encoding: [0x10,0x20,0x72,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_b_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_b_reuse ; encoding: [0x10,0x40,0x72,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], v[16:23] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], v[16:23] ; encoding: [0x10,0x00,0x60,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], 1.0 +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x60,0xcc,0x00,0x11,0xca,0x1b] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x60,0xcc,0x00,0x11,0xca,0x9b] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[1,0,0] neg_hi:[1,0,0] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x10,0x01,0x60,0xcc,0x00,0x11,0x42,0x3c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,1,0] neg_hi:[0,1,0] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x10,0x02,0x60,0xcc,0x00,0x11,0x42,0x5c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x60,0xcc,0x00,0x11,0x42,0x9c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], v[16:23] neg_hi:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], v[16:23] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x60,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], v[16:23] matrix_a_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], v[16:23] matrix_a_reuse ; encoding: [0x10,0x20,0x60,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], v[16:23] matrix_b_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x32_f16 v[16:23], v[0:7], v[8:15], v[16:23] matrix_b_reuse ; encoding: [0x10,0x40,0x60,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], v[16:19] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], v[16:19] ; encoding: [0x10,0x00,0x61,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], 1.0 +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x00,0x61,0xcc,0x00,0x11,0xca,0x1b] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x61,0xcc,0x00,0x11,0xca,0x9b] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[1,0,0] neg_hi:[1,0,0] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x10,0x01,0x61,0xcc,0x00,0x11,0x42,0x3c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,1,0] neg_hi:[0,1,0] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x10,0x02,0x61,0xcc,0x00,0x11,0x42,0x5c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x61,0xcc,0x00,0x11,0x42,0x9c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], v[16:19] neg_hi:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], v[16:19] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x61,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], v[16:19] matrix_a_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], v[16:19] matrix_a_reuse ; encoding: [0x10,0x20,0x61,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], v[16:19] matrix_b_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x32_f16 v[16:19], v[0:7], v[8:15], v[16:19] matrix_b_reuse ; encoding: [0x10,0x40,0x61,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 ; encoding: [0x18,0x00,0x66,0xcc,0x00,0x11,0x82,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 index_key:1 +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 index_key:1 ; encoding: [0x18,0x08,0x66,0xcc,0x00,0x11,0x82,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 neg_lo:[1,0,0] neg_hi:[1,0,0] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x18,0x01,0x66,0xcc,0x00,0x11,0x82,0x3c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 neg_lo:[0,1,0] neg_hi:[0,1,0] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x18,0x02,0x66,0xcc,0x00,0x11,0x82,0x5c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 matrix_a_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 matrix_a_reuse ; encoding: [0x18,0x20,0x66,0xcc,0x00,0x11,0x82,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 matrix_b_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 matrix_b_reuse ; encoding: [0x18,0x40,0x66,0xcc,0x00,0x11,0x82,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_bf16_16x16x64_bf16 v[24:27], v[0:7], v[8:23], v28 +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_bf16_16x16x64_bf16 v[24:27], v[0:7], v[8:23], v28 ; encoding: [0x18,0x00,0x68,0xcc,0x00,0x11,0x72,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_bf16_16x16x64_bf16 v[24:27], v[0:7], v[8:23], v28 index_key:1 +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_bf16_16x16x64_bf16 v[24:27], v[0:7], v[8:23], v28 index_key:1 ; encoding: [0x18,0x08,0x68,0xcc,0x00,0x11,0x72,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_bf16_16x16x64_bf16 v[24:27], v[0:7], v[8:23], v28 neg_lo:[1,0,0] neg_hi:[1,0,0] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_bf16_16x16x64_bf16 v[24:27], v[0:7], v[8:23], v28 neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x18,0x01,0x68,0xcc,0x00,0x11,0x72,0x3c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_bf16_16x16x64_bf16 v[24:27], v[0:7], v[8:23], v28 neg_lo:[0,1,0] neg_hi:[0,1,0] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_bf16_16x16x64_bf16 v[24:27], v[0:7], v[8:23], v28 neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x18,0x02,0x68,0xcc,0x00,0x11,0x72,0x5c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 matrix_a_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 matrix_a_reuse ; encoding: [0x18,0x20,0x66,0xcc,0x00,0x11,0x82,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 matrix_b_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 matrix_b_reuse ; encoding: [0x18,0x40,0x66,0xcc,0x00,0x11,0x82,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_bf16f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_bf16f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 ; encoding: [0x18,0x00,0x69,0xcc,0x00,0x11,0x82,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_bf16f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 index_key:1 +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_bf16f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 index_key:1 ; encoding: [0x18,0x08,0x69,0xcc,0x00,0x11,0x82,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_bf16f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 neg_lo:[1,0,0] neg_hi:[1,0,0] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_bf16f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x18,0x01,0x69,0xcc,0x00,0x11,0x82,0x3c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_bf16f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 neg_lo:[0,1,0] neg_hi:[0,1,0] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_bf16f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x18,0x02,0x69,0xcc,0x00,0x11,0x82,0x5c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_bf16f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 matrix_a_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_bf16f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 matrix_a_reuse ; encoding: [0x18,0x20,0x69,0xcc,0x00,0x11,0x82,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_bf16f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 matrix_b_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_bf16f32_16x16x64_bf16 v[24:31], v[0:7], v[8:23], v32 matrix_b_reuse ; encoding: [0x18,0x40,0x69,0xcc,0x00,0x11,0x82,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_f32_16x16x128_fp8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_f32_16x16x128_fp8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] ; encoding: [0x18,0x00,0x73,0xcc,0x00,0x11,0x82,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_f32_16x16x128_fp8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] index_key:1 +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_f32_16x16x128_fp8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] index_key:1 ; encoding: [0x18,0x08,0x73,0xcc,0x00,0x11,0x82,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_f32_16x16x128_fp8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_a_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_f32_16x16x128_fp8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_a_reuse ; encoding: [0x18,0x20,0x73,0xcc,0x00,0x11,0x82,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_f32_16x16x128_fp8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_b_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_f32_16x16x128_fp8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_b_reuse ; encoding: [0x18,0x40,0x73,0xcc,0x00,0x11,0x82,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_f32_16x16x128_fp8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_f32_16x16x128_fp8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] ; encoding: [0x18,0x00,0x74,0xcc,0x00,0x11,0x82,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_f32_16x16x128_fp8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] index_key:1 +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_f32_16x16x128_fp8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] index_key:1 ; encoding: [0x18,0x08,0x74,0xcc,0x00,0x11,0x82,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_f32_16x16x128_fp8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_a_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_f32_16x16x128_fp8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_a_reuse ; encoding: [0x18,0x20,0x74,0xcc,0x00,0x11,0x82,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_f32_16x16x128_fp8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_b_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_f32_16x16x128_fp8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_b_reuse ; encoding: [0x18,0x40,0x74,0xcc,0x00,0x11,0x82,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_f32_16x16x128_bf8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_f32_16x16x128_bf8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] ; encoding: [0x18,0x00,0x75,0xcc,0x00,0x11,0x82,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_f32_16x16x128_bf8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] index_key:1 +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_f32_16x16x128_bf8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] index_key:1 ; encoding: [0x18,0x08,0x75,0xcc,0x00,0x11,0x82,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_f32_16x16x128_bf8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_a_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_f32_16x16x128_bf8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_a_reuse ; encoding: [0x18,0x20,0x75,0xcc,0x00,0x11,0x82,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_f32_16x16x128_bf8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_b_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_f32_16x16x128_bf8_fp8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_b_reuse ; encoding: [0x18,0x40,0x75,0xcc,0x00,0x11,0x82,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_f32_16x16x128_bf8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_f32_16x16x128_bf8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] ; encoding: [0x18,0x00,0x76,0xcc,0x00,0x11,0x82,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_f32_16x16x128_bf8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] index_key:1 +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_f32_16x16x128_bf8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] index_key:1 ; encoding: [0x18,0x08,0x76,0xcc,0x00,0x11,0x82,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_f32_16x16x128_bf8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_a_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_f32_16x16x128_bf8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_a_reuse ; encoding: [0x18,0x20,0x76,0xcc,0x00,0x11,0x82,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_f32_16x16x128_bf8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_b_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_f32_16x16x128_bf8_bf8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_b_reuse ; encoding: [0x18,0x40,0x76,0xcc,0x00,0x11,0x82,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_f16_16x16x128_fp8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_f16_16x16x128_fp8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] ; encoding: [0x18,0x00,0x77,0xcc,0x00,0x11,0x72,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_f16_16x16x128_fp8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] index_key:1 +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_f16_16x16x128_fp8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] index_key:1 ; encoding: [0x18,0x08,0x77,0xcc,0x00,0x11,0x72,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_f16_16x16x128_fp8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_a_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_f16_16x16x128_fp8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_a_reuse ; encoding: [0x18,0x20,0x77,0xcc,0x00,0x11,0x72,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_f16_16x16x128_fp8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_b_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_f16_16x16x128_fp8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_b_reuse ; encoding: [0x18,0x40,0x77,0xcc,0x00,0x11,0x72,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_f16_16x16x128_fp8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_f16_16x16x128_fp8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] ; encoding: [0x18,0x00,0x78,0xcc,0x00,0x11,0x72,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_f16_16x16x128_fp8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] index_key:1 +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_f16_16x16x128_fp8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] index_key:1 ; encoding: [0x18,0x08,0x78,0xcc,0x00,0x11,0x72,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_f16_16x16x128_fp8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_a_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_f16_16x16x128_fp8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_a_reuse ; encoding: [0x18,0x20,0x78,0xcc,0x00,0x11,0x72,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_f16_16x16x128_fp8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_b_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_f16_16x16x128_fp8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_b_reuse ; encoding: [0x18,0x40,0x78,0xcc,0x00,0x11,0x72,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_f16_16x16x128_bf8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_f16_16x16x128_bf8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] ; encoding: [0x18,0x00,0x79,0xcc,0x00,0x11,0x72,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_f16_16x16x128_bf8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] index_key:1 +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_f16_16x16x128_bf8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] index_key:1 ; encoding: [0x18,0x08,0x79,0xcc,0x00,0x11,0x72,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_f16_16x16x128_bf8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_a_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_f16_16x16x128_bf8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_a_reuse ; encoding: [0x18,0x20,0x79,0xcc,0x00,0x11,0x72,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_f16_16x16x128_bf8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_b_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_f16_16x16x128_bf8_fp8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_b_reuse ; encoding: [0x18,0x40,0x79,0xcc,0x00,0x11,0x72,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_f16_16x16x128_bf8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_f16_16x16x128_bf8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] ; encoding: [0x18,0x00,0x7a,0xcc,0x00,0x11,0x72,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_f16_16x16x128_bf8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] index_key:1 +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_f16_16x16x128_bf8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] index_key:1 ; encoding: [0x18,0x08,0x7a,0xcc,0x00,0x11,0x72,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_f16_16x16x128_bf8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_a_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_f16_16x16x128_bf8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_a_reuse ; encoding: [0x18,0x20,0x7a,0xcc,0x00,0x11,0x72,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_f16_16x16x128_bf8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_b_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_f16_16x16x128_bf8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] matrix_b_reuse ; encoding: [0x18,0x40,0x7a,0xcc,0x00,0x11,0x72,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], v[8:23], v[32:33] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], v[8:23], v[32:33] ; encoding: [0x18,0x00,0x7b,0xcc,0x00,0x11,0x82,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], v[8:23], v[32:33] index_key:1 +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], v[8:23], v[32:33] index_key:1 ; encoding: [0x18,0x08,0x7b,0xcc,0x00,0x11,0x82,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], v[8:23], v[32:33] neg_lo:[1,0,0] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], v[8:23], v[32:33] neg_lo:[1,0,0] ; encoding: [0x18,0x00,0x7b,0xcc,0x00,0x11,0x82,0x3c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], v[8:23], v[32:33] neg_lo:[0,1,0] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], v[8:23], v[32:33] neg_lo:[0,1,0] ; encoding: [0x18,0x00,0x7b,0xcc,0x00,0x11,0x82,0x5c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_a_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_a_reuse ; encoding: [0x18,0x20,0x7b,0xcc,0x00,0x11,0x82,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_b_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_i32_16x16x128_iu8 v[24:31], v[0:7], v[8:23], v[32:33] matrix_b_reuse ; encoding: [0x18,0x40,0x7b,0xcc,0x00,0x11,0x82,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_f32_16x16x64_f16 v[24:31], v[0:7], v[8:23], v32 +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_f32_16x16x64_f16 v[24:31], v[0:7], v[8:23], v32 ; encoding: [0x18,0x00,0x65,0xcc,0x00,0x11,0x82,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_f32_16x16x64_f16 v[24:31], v[0:7], v[8:23], v32 index_key:1 +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_f32_16x16x64_f16 v[24:31], v[0:7], v[8:23], v32 index_key:1 ; encoding: [0x18,0x08,0x65,0xcc,0x00,0x11,0x82,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_f32_16x16x64_f16 v[24:31], v[0:7], v[8:23], v32 neg_lo:[1,0,0] neg_hi:[1,0,0] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_f32_16x16x64_f16 v[24:31], v[0:7], v[8:23], v32 neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x18,0x01,0x65,0xcc,0x00,0x11,0x82,0x3c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_f32_16x16x64_f16 v[24:31], v[0:7], v[8:23], v32 neg_lo:[0,1,0] neg_hi:[0,1,0] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_f32_16x16x64_f16 v[24:31], v[0:7], v[8:23], v32 neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x18,0x02,0x65,0xcc,0x00,0x11,0x82,0x5c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_f32_16x16x64_f16 v[24:31], v[0:7], v[8:23], v32 matrix_a_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_f32_16x16x64_f16 v[24:31], v[0:7], v[8:23], v32 matrix_a_reuse ; encoding: [0x18,0x20,0x65,0xcc,0x00,0x11,0x82,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_f32_16x16x64_f16 v[24:31], v[0:7], v[8:23], v32 matrix_b_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_f32_16x16x64_f16 v[24:31], v[0:7], v[8:23], v32 matrix_b_reuse ; encoding: [0x18,0x40,0x65,0xcc,0x00,0x11,0x82,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_f16_16x16x64_f16 v[24:27], v[0:7], v[8:23], v28 +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_f16_16x16x64_f16 v[24:27], v[0:7], v[8:23], v28 ; encoding: [0x18,0x00,0x67,0xcc,0x00,0x11,0x72,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_f16_16x16x64_f16 v[24:27], v[0:7], v[8:23], v28 index_key:1 +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_f16_16x16x64_f16 v[24:27], v[0:7], v[8:23], v28 index_key:1 ; encoding: [0x18,0x08,0x67,0xcc,0x00,0x11,0x72,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_f16_16x16x64_f16 v[24:27], v[0:7], v[8:23], v28 neg_lo:[1,0,0] neg_hi:[1,0,0] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_f16_16x16x64_f16 v[24:27], v[0:7], v[8:23], v28 neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x18,0x01,0x67,0xcc,0x00,0x11,0x72,0x3c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_f16_16x16x64_f16 v[24:27], v[0:7], v[8:23], v28 neg_lo:[0,1,0] neg_hi:[0,1,0] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_f16_16x16x64_f16 v[24:27], v[0:7], v[8:23], v28 neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x18,0x02,0x67,0xcc,0x00,0x11,0x72,0x5c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_f16_16x16x64_f16 v[24:27], v[0:7], v[8:23], v28 matrix_a_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_f16_16x16x64_f16 v[24:27], v[0:7], v[8:23], v28 matrix_a_reuse ; encoding: [0x18,0x20,0x67,0xcc,0x00,0x11,0x72,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_swmmac_f16_16x16x64_f16 v[24:27], v[0:7], v[8:23], v28 matrix_b_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_swmmac_f16_16x16x64_f16 v[24:27], v[0:7], v[8:23], v28 matrix_b_reuse ; encoding: [0x18,0x40,0x67,0xcc,0x00,0x11,0x72,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47] ; encoding: [0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47] matrix_a_fmt:MATRIX_FMT_BF8 +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47] matrix_a_fmt:MATRIX_FMT_BF8 ; encoding: [0x00,0x08,0x33,0xcc,0x08,0x31,0xa2,0x04] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x128_f8f6f4 v[0:7], v[8:19], v[24:39], v[40:47] matrix_a_fmt:MATRIX_FMT_FP6 +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x128_f8f6f4 v[0:7], v[8:19], v[24:39], v[40:47] matrix_a_fmt:MATRIX_FMT_FP6 ; encoding: [0x00,0x10,0x33,0xcc,0x08,0x31,0xa2,0x04] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x128_f8f6f4 v[0:7], v[8:19], v[24:39], v[40:47] matrix_a_fmt:MATRIX_FMT_BF6 +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x128_f8f6f4 v[0:7], v[8:19], v[24:39], v[40:47] matrix_a_fmt:MATRIX_FMT_BF6 ; encoding: [0x00,0x18,0x33,0xcc,0x08,0x31,0xa2,0x04] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x128_f8f6f4 v[0:7], v[8:15], v[24:39], v[40:47] matrix_a_fmt:MATRIX_FMT_FP4 +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x128_f8f6f4 v[0:7], v[8:15], v[24:39], v[40:47] matrix_a_fmt:MATRIX_FMT_FP4 ; encoding: [0x00,0x20,0x33,0xcc,0x08,0x31,0xa2,0x04] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47] matrix_b_fmt:MATRIX_FMT_BF8 +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47] matrix_b_fmt:MATRIX_FMT_BF8 ; encoding: [0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x0c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47] matrix_b_fmt:MATRIX_FMT_FP6 +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47] matrix_b_fmt:MATRIX_FMT_FP6 ; encoding: [0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x14] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47] matrix_b_fmt:MATRIX_FMT_BF6 +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47] matrix_b_fmt:MATRIX_FMT_BF6 ; encoding: [0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:31], v[40:47] matrix_b_fmt:MATRIX_FMT_FP4 +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:31], v[40:47] matrix_b_fmt:MATRIX_FMT_FP4 ; encoding: [0x00,0x40,0x33,0xcc,0x08,0x31,0xa2,0x04] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47] matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47] matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 ; encoding: [0x00,0x08,0x33,0xcc,0x08,0x31,0xa2,0x14] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], 1.0 +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], 1.0 ; encoding: [0x00,0x00,0x33,0xcc,0x08,0x31,0xca,0x03] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47] neg_lo:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47] neg_lo:[0,0,1] ; encoding: [0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x84] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47] neg_hi:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47] neg_hi:[0,0,1] ; encoding: [0x00,0x04,0x33,0xcc,0x08,0x31,0xa2,0x04] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_ld_scale_paired_b32 v1, v2 -// GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x00] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_ld_scale_paired_b32 s1, s2 -// GFX1250: v_wmma_ld_scale_paired_b32 s1, s2 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x04,0x00,0x00] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_ld_scale_paired_b32 s1, s2 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x04,0x00,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_ld_scale_paired_b32 2, -4 -// GFX1250: v_wmma_ld_scale_paired_b32 2, -4 ; encoding: [0x00,0x00,0x35,0xcc,0x82,0x88,0x01,0x00] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_ld_scale_paired_b32 2, -4 ; encoding: [0x00,0x00,0x35,0xcc,0x82,0x88,0x01,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_ld_scale_paired_b32 v1, v2 matrix_a_scale:MATRIX_SCALE_ROW0 matrix_b_scale:MATRIX_SCALE_ROW0 -// GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x00] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_ld_scale_paired_b32 s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 -// GFX1250: v_wmma_ld_scale_paired_b32 s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x08,0x35,0xcc,0x00,0x00,0x00,0x00] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_ld_scale_paired_b32 s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x08,0x35,0xcc,0x00,0x00,0x00,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_ld_scale_paired_b32 s0, s0 matrix_a_reuse -// GFX1250: v_wmma_ld_scale_paired_b32 s0, s0 matrix_a_reuse ; encoding: [0x00,0x20,0x35,0xcc,0x00,0x00,0x00,0x00] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_ld_scale_paired_b32 s0, s0 matrix_a_reuse ; encoding: [0x00,0x20,0x35,0xcc,0x00,0x00,0x00,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_ld_scale_paired_b32 s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse -// GFX1250: v_wmma_ld_scale_paired_b32 s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse ; encoding: [0x00,0x28,0x35,0xcc,0x00,0x00,0x00,0x00] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_ld_scale_paired_b32 s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse ; encoding: [0x00,0x28,0x35,0xcc,0x00,0x00,0x00,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_ld_scale_paired_b32 s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 -// GFX1250: v_wmma_ld_scale_paired_b32 s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x08] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_ld_scale_paired_b32 s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x0c] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_ld_scale_paired_b32 s0, s0 matrix_b_reuse -// GFX1250: v_wmma_ld_scale_paired_b32 s0, s0 matrix_b_reuse ; encoding: [0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x00] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_ld_scale_paired_b32 s0, s0 matrix_b_reuse ; encoding: [0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_ld_scale_paired_b32 s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse -// GFX1250: v_wmma_ld_scale_paired_b32 s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse ; encoding: [0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x08] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_ld_scale_paired_b32 s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse ; encoding: [0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x0c] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_ld_scale_paired_b32 v1, v2 matrix_a_scale:MATRIX_SCALE_ROW0 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E8 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E8 -// GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x08] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x0c] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_ld_scale_paired_b32 v1, v2 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 -// GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x28] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x2c] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_ld_scale_paired_b32 v1, v2 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 -// GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x48] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x4c] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_ld_scale_paired_b32 v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E8 -// GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x00] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_ld_scale_paired_b32 v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 -// GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x01,0x35,0xcc,0x01,0x05,0x02,0x00] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x01,0x35,0xcc,0x01,0x05,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_ld_scale_paired_b32 v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 -// GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x02,0x35,0xcc,0x01,0x05,0x02,0x00] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x02,0x35,0xcc,0x01,0x05,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_ld_scale_paired_b32 v1, v2 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 matrix_a_reuse matrix_b_reuse -// GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 matrix_a_reuse matrix_b_reuse ; encoding: [0x00,0x6a,0x35,0xcc,0x01,0x05,0x02,0x28] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 matrix_a_reuse matrix_b_reuse ; encoding: [0x00,0x6a,0x35,0xcc,0x01,0x05,0x02,0x2c] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] -// GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x00] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_ld_scale16_paired_b64 s[2:3], s[4:5] -// GFX1250: v_wmma_ld_scale16_paired_b64 s[2:3], s[4:5] ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x08,0x00,0x00] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_ld_scale16_paired_b64 s[2:3], s[4:5] ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x08,0x00,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_ld_scale16_paired_b64 2, -4 -// GFX1250: v_wmma_ld_scale16_paired_b64 2, -4 ; encoding: [0x00,0x00,0x3a,0xcc,0x82,0x88,0x01,0x00] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_ld_scale16_paired_b64 2, -4 ; encoding: [0x00,0x00,0x3a,0xcc,0x82,0x88,0x01,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_a_scale:MATRIX_SCALE_ROW0 matrix_b_scale:MATRIX_SCALE_ROW0 -// GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x00] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 -// GFX1250: v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x08,0x3a,0xcc,0x00,0x00,0x00,0x00] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x08,0x3a,0xcc,0x00,0x00,0x00,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_a_reuse -// GFX1250: v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_a_reuse ; encoding: [0x00,0x20,0x3a,0xcc,0x00,0x00,0x00,0x00] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_a_reuse ; encoding: [0x00,0x20,0x3a,0xcc,0x00,0x00,0x00,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse -// GFX1250: v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse ; encoding: [0x00,0x28,0x3a,0xcc,0x00,0x00,0x00,0x00] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse ; encoding: [0x00,0x28,0x3a,0xcc,0x00,0x00,0x00,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 -// GFX1250: v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x08] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x0c] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_b_reuse -// GFX1250: v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_b_reuse ; encoding: [0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x00] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_b_reuse ; encoding: [0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse -// GFX1250: v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse ; encoding: [0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x08] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse ; encoding: [0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x0c] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_a_scale:MATRIX_SCALE_ROW0 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E8 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E8 -// GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x08] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x0c] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 -// GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x28] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x2c] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 -// GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x48] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x4c] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E8 -// GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x00] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 -// GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x01,0x3a,0xcc,0x02,0x09,0x02,0x00] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x01,0x3a,0xcc,0x02,0x09,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 -// GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x02,0x3a,0xcc,0x02,0x09,0x02,0x00] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x02,0x3a,0xcc,0x02,0x09,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 matrix_a_reuse matrix_b_reuse -// GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 matrix_a_reuse matrix_b_reuse ; encoding: [0x00,0x6a,0x3a,0xcc,0x02,0x09,0x02,0x28] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 matrix_a_reuse matrix_b_reuse ; encoding: [0x00,0x6a,0x3a,0xcc,0x02,0x09,0x02,0x2c] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], v1, v2 matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 neg_lo:[0,0,1] neg_hi:[0,0,1] -// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], v1, v2 matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x08,0x35,0xcc,0x01,0x05,0x02,0x08,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], v1, v2 matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x08,0x35,0xcc,0x01,0x05,0x02,0x0c,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], s1, s2 matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] -// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], s1, s2 matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x35,0xcc,0x01,0x04,0x00,0x08,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], s1, s2 matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x35,0xcc,0x01,0x04,0x00,0x0c,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 -// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_fmt:MATRIX_FMT_FP8 -// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_fmt:MATRIX_FMT_BF8 -// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_fmt:MATRIX_FMT_BF8 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x08,0x33,0xcc,0x00,0x01,0x02,0x04] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_fmt:MATRIX_FMT_BF8 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x08,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:11], v[0:15], v[0:7], s0, s0 matrix_a_fmt:MATRIX_FMT_FP6 -// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:11], v[0:15], v[0:7], s0, s0 matrix_a_fmt:MATRIX_FMT_FP6 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x10,0x33,0xcc,0x00,0x01,0x02,0x04] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:11], v[0:15], v[0:7], s0, s0 matrix_a_fmt:MATRIX_FMT_FP6 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x10,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:11], v[0:15], v[0:7], s0, s0 matrix_a_fmt:MATRIX_FMT_BF6 -// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:11], v[0:15], v[0:7], s0, s0 matrix_a_fmt:MATRIX_FMT_BF6 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x18,0x33,0xcc,0x00,0x01,0x02,0x04] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:11], v[0:15], v[0:7], s0, s0 matrix_a_fmt:MATRIX_FMT_BF6 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x18,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:7], v[0:15], v[0:7], s0, s0 matrix_a_fmt:MATRIX_FMT_FP4 -// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:7], v[0:15], v[0:7], s0, s0 matrix_a_fmt:MATRIX_FMT_FP4 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x20,0x33,0xcc,0x00,0x01,0x02,0x04] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:7], v[0:15], v[0:7], s0, s0 matrix_a_fmt:MATRIX_FMT_FP4 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x20,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_fmt:MATRIX_FMT_FP8 -// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_fmt:MATRIX_FMT_BF8 -// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_fmt:MATRIX_FMT_BF8 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x0c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_fmt:MATRIX_FMT_BF8 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x0c] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:11], v[0:7], s0, s0 matrix_b_fmt:MATRIX_FMT_FP6 -// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:11], v[0:7], s0, s0 matrix_b_fmt:MATRIX_FMT_FP6 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x14] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:11], v[0:7], s0, s0 matrix_b_fmt:MATRIX_FMT_FP6 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x14] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:11], v[0:7], s0, s0 matrix_b_fmt:MATRIX_FMT_BF6 -// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:11], v[0:7], s0, s0 matrix_b_fmt:MATRIX_FMT_BF6 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:11], v[0:7], s0, s0 matrix_b_fmt:MATRIX_FMT_BF6 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x1c] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:7], v[0:7], s0, s0 matrix_b_fmt:MATRIX_FMT_FP4 -// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:7], v[0:7], s0, s0 matrix_b_fmt:MATRIX_FMT_FP4 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x40,0x33,0xcc,0x00,0x01,0x02,0x04] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:7], v[0:7], s0, s0 matrix_b_fmt:MATRIX_FMT_FP4 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_scale:MATRIX_SCALE_ROW0 -// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 -// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x08,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x08,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_reuse -// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_reuse ; encoding: [0x00,0x20,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_reuse ; encoding: [0x00,0x20,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse -// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse ; encoding: [0x00,0x28,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse ; encoding: [0x00,0x28,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_scale:MATRIX_SCALE_ROW0 -// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 -// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x08,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x0c,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_reuse -// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_reuse ; encoding: [0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_reuse ; encoding: [0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse -// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse ; encoding: [0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x08,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse ; encoding: [0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x0c,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E8 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E8 -// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x00,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x04,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 -// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x20,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x24,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 -// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x40,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x44,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 -// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x01,0x35,0xcc,0x01,0x05,0x02,0x00,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x01,0x35,0xcc,0x01,0x05,0x02,0x04,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 -// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x02,0x35,0xcc,0x01,0x05,0x02,0x00,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x02,0x35,0xcc,0x01,0x05,0x02,0x04,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], v1, v2 matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E8 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E8 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] -// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], v1, v2 matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x35,0xcc,0x01,0x05,0x02,0x08,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], v1, v2 matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x35,0xcc,0x01,0x05,0x02,0x0c,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], v[2:3], v[4:5] matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 neg_lo:[0,0,1] neg_hi:[0,0,1] -// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], v[2:3], v[4:5] matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x08,0x3a,0xcc,0x02,0x09,0x02,0x08,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], v[2:3], v[4:5] matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x08,0x3a,0xcc,0x02,0x09,0x02,0x0c,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], s[2:3], s[4:5] matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] -// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], s[2:3], s[4:5] matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x3a,0xcc,0x02,0x08,0x00,0x08,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], s[2:3], s[4:5] matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x3a,0xcc,0x02,0x08,0x00,0x0c,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] -// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_fmt:MATRIX_FMT_FP8 -// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_fmt:MATRIX_FMT_BF8 -// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_fmt:MATRIX_FMT_BF8 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x08,0x33,0xcc,0x00,0x01,0x02,0x04] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_fmt:MATRIX_FMT_BF8 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x08,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:11], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_fmt:MATRIX_FMT_FP6 -// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:11], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_fmt:MATRIX_FMT_FP6 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x10,0x33,0xcc,0x00,0x01,0x02,0x04] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:11], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_fmt:MATRIX_FMT_FP6 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x10,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:11], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_fmt:MATRIX_FMT_BF6 -// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:11], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_fmt:MATRIX_FMT_BF6 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x18,0x33,0xcc,0x00,0x01,0x02,0x04] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:11], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_fmt:MATRIX_FMT_BF6 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x18,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:7], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_fmt:MATRIX_FMT_FP4 -// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:7], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_fmt:MATRIX_FMT_FP4 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x20,0x33,0xcc,0x00,0x01,0x02,0x04] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:7], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_fmt:MATRIX_FMT_FP4 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x20,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_fmt:MATRIX_FMT_FP8 -// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_fmt:MATRIX_FMT_BF8 -// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_fmt:MATRIX_FMT_BF8 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x0c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_fmt:MATRIX_FMT_BF8 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x0c] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:11], v[0:7], s[0:1], s[0:1] matrix_b_fmt:MATRIX_FMT_FP6 -// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:11], v[0:7], s[0:1], s[0:1] matrix_b_fmt:MATRIX_FMT_FP6 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x14] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:11], v[0:7], s[0:1], s[0:1] matrix_b_fmt:MATRIX_FMT_FP6 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x14] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:11], v[0:7], s[0:1], s[0:1] matrix_b_fmt:MATRIX_FMT_BF6 -// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:11], v[0:7], s[0:1], s[0:1] matrix_b_fmt:MATRIX_FMT_BF6 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:11], v[0:7], s[0:1], s[0:1] matrix_b_fmt:MATRIX_FMT_BF6 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x1c] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:7], v[0:7], s[0:1], s[0:1] matrix_b_fmt:MATRIX_FMT_FP4 -// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:7], v[0:7], s[0:1], s[0:1] matrix_b_fmt:MATRIX_FMT_FP4 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x40,0x33,0xcc,0x00,0x01,0x02,0x04] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:7], v[0:7], s[0:1], s[0:1] matrix_b_fmt:MATRIX_FMT_FP4 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW0 -// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 -// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x08,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x08,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_reuse -// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_reuse ; encoding: [0x00,0x20,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_reuse ; encoding: [0x00,0x20,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse -// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse ; encoding: [0x00,0x28,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse ; encoding: [0x00,0x28,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW0 -// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 -// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x08,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x0c,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_reuse -// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_reuse ; encoding: [0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_reuse ; encoding: [0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse -// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse ; encoding: [0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x08,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse ; encoding: [0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x0c,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] matrix_a_scale_fmt:MATRIX_SCALE_FMT_E8 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E8 -// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x00,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x04,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 -// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x20,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x24,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 -// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x40,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x44,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 -// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x01,0x3a,0xcc,0x02,0x09,0x02,0x00,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x01,0x3a,0xcc,0x02,0x09,0x02,0x04,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 -// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x02,0x3a,0xcc,0x02,0x09,0x02,0x00,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x02,0x3a,0xcc,0x02,0x09,0x02,0x04,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], v[2:3], v[4:5] matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E8 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E8 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] -// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], v[2:3], v[4:5] matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x3a,0xcc,0x02,0x09,0x02,0x08,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], v[2:3], v[4:5] matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x3a,0xcc,0x02,0x09,0x02,0x0c,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x128_fp8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x128_fp8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] ; encoding: [0x10,0x00,0x84,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x128_fp8_fp8 v[16:19], v[0:15], v[8:23], 1.0 +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x128_fp8_fp8 v[16:19], v[0:15], v[8:23], 1.0 ; encoding: [0x10,0x00,0x84,0xcc,0x00,0x11,0xca,0x1b] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x128_fp8_fp8 v[16:19], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x128_fp8_fp8 v[16:19], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x84,0xcc,0x00,0x11,0xca,0x9b] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x128_fp8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] neg_lo:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x128_fp8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x84,0xcc,0x00,0x11,0x42,0x9c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x128_fp8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] neg_hi:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x128_fp8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x84,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x128_fp8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_a_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x128_fp8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_a_reuse ; encoding: [0x10,0x20,0x84,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x128_fp8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_b_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x128_fp8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_b_reuse ; encoding: [0x10,0x40,0x84,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x128_fp8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x128_fp8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] ; encoding: [0x10,0x00,0x85,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x128_fp8_bf8 v[16:19], v[0:15], v[8:23], 1.0 +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x128_fp8_bf8 v[16:19], v[0:15], v[8:23], 1.0 ; encoding: [0x10,0x00,0x85,0xcc,0x00,0x11,0xca,0x1b] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x128_fp8_bf8 v[16:19], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x128_fp8_bf8 v[16:19], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x85,0xcc,0x00,0x11,0xca,0x9b] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x128_fp8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] neg_lo:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x128_fp8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x85,0xcc,0x00,0x11,0x42,0x9c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x128_fp8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] neg_hi:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x128_fp8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x85,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x128_fp8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_a_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x128_fp8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_a_reuse ; encoding: [0x10,0x20,0x85,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x128_fp8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_b_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x128_fp8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_b_reuse ; encoding: [0x10,0x40,0x85,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x128_bf8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x128_bf8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] ; encoding: [0x10,0x00,0x86,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x128_bf8_fp8 v[16:19], v[0:15], v[8:23], 1.0 +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x128_bf8_fp8 v[16:19], v[0:15], v[8:23], 1.0 ; encoding: [0x10,0x00,0x86,0xcc,0x00,0x11,0xca,0x1b] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x128_bf8_fp8 v[16:19], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x128_bf8_fp8 v[16:19], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x86,0xcc,0x00,0x11,0xca,0x9b] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x128_bf8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] neg_lo:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x128_bf8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x86,0xcc,0x00,0x11,0x42,0x9c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x128_bf8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] neg_hi:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x128_bf8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x86,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x128_bf8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_a_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x128_bf8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_a_reuse ; encoding: [0x10,0x20,0x86,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x128_bf8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_b_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x128_bf8_fp8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_b_reuse ; encoding: [0x10,0x40,0x86,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] ; encoding: [0x10,0x00,0x87,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], 1.0 +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], 1.0 ; encoding: [0x10,0x00,0x87,0xcc,0x00,0x11,0xca,0x1b] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x87,0xcc,0x00,0x11,0xca,0x9b] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] neg_lo:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x87,0xcc,0x00,0x11,0x42,0x9c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] neg_hi:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x87,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_a_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_a_reuse ; encoding: [0x10,0x20,0x87,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_b_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], v[16:19] matrix_b_reuse ; encoding: [0x10,0x40,0x87,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x128_fp8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x128_fp8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] ; encoding: [0x10,0x00,0x80,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x128_fp8_fp8 v[16:23], v[0:15], v[8:23], 1.0 +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x128_fp8_fp8 v[16:23], v[0:15], v[8:23], 1.0 ; encoding: [0x10,0x00,0x80,0xcc,0x00,0x11,0xca,0x1b] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x128_fp8_fp8 v[16:23], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x128_fp8_fp8 v[16:23], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x80,0xcc,0x00,0x11,0xca,0x9b] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x128_fp8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] neg_lo:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x128_fp8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x80,0xcc,0x00,0x11,0x42,0x9c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x128_fp8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] neg_hi:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x128_fp8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x80,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x128_fp8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_a_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x128_fp8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_a_reuse ; encoding: [0x10,0x20,0x80,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x128_fp8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_b_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x128_fp8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_b_reuse ; encoding: [0x10,0x40,0x80,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x128_fp8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x128_fp8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] ; encoding: [0x10,0x00,0x81,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x128_fp8_bf8 v[16:23], v[0:15], v[8:23], 1.0 +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x128_fp8_bf8 v[16:23], v[0:15], v[8:23], 1.0 ; encoding: [0x10,0x00,0x81,0xcc,0x00,0x11,0xca,0x1b] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x128_fp8_bf8 v[16:23], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x128_fp8_bf8 v[16:23], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x81,0xcc,0x00,0x11,0xca,0x9b] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x128_fp8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] neg_lo:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x128_fp8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x81,0xcc,0x00,0x11,0x42,0x9c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x128_fp8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] neg_hi:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x128_fp8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x81,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x128_fp8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_a_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x128_fp8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_a_reuse ; encoding: [0x10,0x20,0x81,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x128_fp8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_b_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x128_fp8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_b_reuse ; encoding: [0x10,0x40,0x81,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x128_bf8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x128_bf8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] ; encoding: [0x10,0x00,0x82,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x128_bf8_fp8 v[16:23], v[0:15], v[8:23], 1.0 +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x128_bf8_fp8 v[16:23], v[0:15], v[8:23], 1.0 ; encoding: [0x10,0x00,0x82,0xcc,0x00,0x11,0xca,0x1b] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x128_bf8_fp8 v[16:23], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x128_bf8_fp8 v[16:23], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x82,0xcc,0x00,0x11,0xca,0x9b] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x128_bf8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] neg_lo:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x128_bf8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x82,0xcc,0x00,0x11,0x42,0x9c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x128_bf8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] neg_hi:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x128_bf8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x82,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x128_bf8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_a_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x128_bf8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_a_reuse ; encoding: [0x10,0x20,0x82,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x128_bf8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_b_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x128_bf8_fp8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_b_reuse ; encoding: [0x10,0x40,0x82,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x128_bf8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x128_bf8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] ; encoding: [0x10,0x00,0x83,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x128_bf8_bf8 v[16:23], v[0:15], v[8:23], 1.0 +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x128_bf8_bf8 v[16:23], v[0:15], v[8:23], 1.0 ; encoding: [0x10,0x00,0x83,0xcc,0x00,0x11,0xca,0x1b] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x128_bf8_bf8 v[16:23], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x128_bf8_bf8 v[16:23], v[0:15], v[8:23], 1.0 neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x83,0xcc,0x00,0x11,0xca,0x9b] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x128_bf8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] neg_lo:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x128_bf8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] neg_lo:[0,0,1] ; encoding: [0x10,0x00,0x83,0xcc,0x00,0x11,0x42,0x9c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x128_bf8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] neg_hi:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x128_bf8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] neg_hi:[0,0,1] ; encoding: [0x10,0x04,0x83,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x128_bf8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_a_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x128_bf8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_a_reuse ; encoding: [0x10,0x20,0x83,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_16x16x128_bf8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_b_reuse +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_16x16x128_bf8_bf8 v[16:23], v[0:15], v[8:23], v[16:23] matrix_b_reuse ; encoding: [0x10,0x40,0x83,0xcc,0x00,0x11,0x42,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], v[4:19] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], v[4:19] ; encoding: [0x04,0x40,0x88,0xcc,0x00,0x05,0x12,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], 1.0 +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], 1.0 ; encoding: [0x04,0x40,0x88,0xcc,0x00,0x05,0xca,0x1b] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], 1.0 neg_lo:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], 1.0 neg_lo:[0,0,1] ; encoding: [0x04,0x40,0x88,0xcc,0x00,0x05,0xca,0x9b] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], v[4:19] neg_lo:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], v[4:19] neg_lo:[0,0,1] ; encoding: [0x04,0x40,0x88,0xcc,0x00,0x05,0x12,0x9c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], v[4:19] neg_hi:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], v[4:19] neg_hi:[0,0,1] ; encoding: [0x04,0x44,0x88,0xcc,0x00,0x05,0x12,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], v[4:19] neg_lo:[0,0,1] neg_hi:[0,0,1] +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU // GFX1250: v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], v[4:19] neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x04,0x44,0x88,0xcc,0x00,0x05,0x12,0x9c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 neg_lo:[0,0,1] neg_hi:[0,0,1] -// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x08,0x35,0xcc,0x01,0x05,0x02,0x08,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x08,0x35,0xcc,0x01,0x05,0x02,0x0c,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], s1, s2 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] -// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], s1, s2 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x35,0xcc,0x01,0x04,0x00,0x08,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], s1, s2 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x35,0xcc,0x01,0x04,0x00,0x0c,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 -// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_a_scale:MATRIX_SCALE_ROW0 -// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 -// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x08,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x08,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_a_reuse -// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_a_reuse ; encoding: [0x00,0x20,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_a_reuse ; encoding: [0x00,0x20,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse -// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse ; encoding: [0x00,0x28,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse ; encoding: [0x00,0x28,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_b_scale:MATRIX_SCALE_ROW0 -// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 -// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x08,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x0c,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_b_reuse -// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_b_reuse ; encoding: [0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_b_reuse ; encoding: [0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse -// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse ; encoding: [0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x08,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse ; encoding: [0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x0c,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E8 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E8 -// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x00,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x04,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 -// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x20,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x24,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 -// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x40,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x44,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 -// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x01,0x35,0xcc,0x01,0x05,0x02,0x00,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x01,0x35,0xcc,0x01,0x05,0x02,0x04,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 -// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x02,0x35,0xcc,0x01,0x05,0x02,0x00,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x02,0x35,0xcc,0x01,0x05,0x02,0x04,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E8 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E8 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] -// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x35,0xcc,0x01,0x05,0x02,0x08,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x35,0xcc,0x01,0x05,0x02,0x0c,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 neg_lo:[0,0,1] neg_hi:[0,0,1] -// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x08,0x3a,0xcc,0x02,0x09,0x02,0x08,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x08,0x3a,0xcc,0x02,0x09,0x02,0x0c,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], s[2:3], s[4:5] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] -// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], s[2:3], s[4:5] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x3a,0xcc,0x02,0x08,0x00,0x08,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], s[2:3], s[4:5] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x3a,0xcc,0x02,0x08,0x00,0x0c,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] -// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW0 -// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 -// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x08,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x08,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_a_reuse -// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_a_reuse ; encoding: [0x00,0x20,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_a_reuse ; encoding: [0x00,0x20,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse -// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse ; encoding: [0x00,0x28,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse ; encoding: [0x00,0x28,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW0 -// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 -// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x08,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x0c,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_b_reuse -// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_b_reuse ; encoding: [0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_b_reuse ; encoding: [0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse -// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse ; encoding: [0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x08,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse ; encoding: [0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x0c,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_a_scale_fmt:MATRIX_SCALE_FMT_E8 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E8 -// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x00,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x04,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 -// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x20,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x24,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 -// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x40,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x44,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 -// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x01,0x3a,0xcc,0x02,0x09,0x02,0x00,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x01,0x3a,0xcc,0x02,0x09,0x02,0x04,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 -// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x02,0x3a,0xcc,0x02,0x09,0x02,0x00,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x02,0x3a,0xcc,0x02,0x09,0x02,0x04,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E8 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E8 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] -// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x3a,0xcc,0x02,0x09,0x02,0x08,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c] -// WAVESIZE-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 -// GFX12-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x3a,0xcc,0x02,0x09,0x02,0x0c,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c] +// WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_wmma_w32.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_wmma_w32.txt index a409dac321f83..5d73cbd512edb 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_wmma_w32.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_wmma_w32.txt @@ -586,233 +586,233 @@ 0x10,0x00,0x72,0xcc,0x00,0x11,0x42,0x3c # GFX1250: v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[1,0,0] ; encoding: [0x10,0x00,0x72,0xcc,0x00,0x11,0x42,0x3c] -0x00,0x00,0x3a,0xcc,0x82,0x88,0x01,0x00 -# GFX1250: v_wmma_ld_scale16_paired_b64 2, -4 ; encoding: [0x00,0x00,0x3a,0xcc,0x82,0x88,0x01,0x00] +0x00,0x00,0x3a,0xcc,0x82,0x88,0x01,0x04 +# GFX1250: v_wmma_ld_scale16_paired_b64 2, -4 ; encoding: [0x00,0x00,0x3a,0xcc,0x82,0x88,0x01,0x04] -0x00,0x20,0x3a,0xcc,0x00,0x00,0x00,0x00 -# GFX1250: v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_a_reuse ; encoding: [0x00,0x20,0x3a,0xcc,0x00,0x00,0x00,0x00] +0x00,0x20,0x3a,0xcc,0x00,0x00,0x00,0x04 +# GFX1250: v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_a_reuse ; encoding: [0x00,0x20,0x3a,0xcc,0x00,0x00,0x00,0x04] -0x00,0x08,0x3a,0xcc,0x00,0x00,0x00,0x00 -# GFX1250: v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x08,0x3a,0xcc,0x00,0x00,0x00,0x00] +0x00,0x08,0x3a,0xcc,0x00,0x00,0x00,0x04 +# GFX1250: v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x08,0x3a,0xcc,0x00,0x00,0x00,0x04] -0x00,0x28,0x3a,0xcc,0x00,0x00,0x00,0x00 -# GFX1250: v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse ; encoding: [0x00,0x28,0x3a,0xcc,0x00,0x00,0x00,0x00] +0x00,0x28,0x3a,0xcc,0x00,0x00,0x00,0x04 +# GFX1250: v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse ; encoding: [0x00,0x28,0x3a,0xcc,0x00,0x00,0x00,0x04] -0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x00 -# GFX1250: v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_b_reuse ; encoding: [0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x00] +0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x04 +# GFX1250: v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_b_reuse ; encoding: [0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x04] -0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x08 -# GFX1250: v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x08] +0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x0c +# GFX1250: v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x0c] -0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x08 -# GFX1250: v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse ; encoding: [0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x08] +0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x0c +# GFX1250: v_wmma_ld_scale16_paired_b64 s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse ; encoding: [0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x0c] -0x00,0x00,0x3a,0xcc,0x02,0x08,0x00,0x00 -# GFX1250: v_wmma_ld_scale16_paired_b64 s[2:3], s[4:5] ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x08,0x00,0x00] +0x00,0x00,0x3a,0xcc,0x02,0x08,0x00,0x04 +# GFX1250: v_wmma_ld_scale16_paired_b64 s[2:3], s[4:5] ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x08,0x00,0x04] -0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x00 -# GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x00] +0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x04 +# GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x04] -0x00,0x6a,0x3a,0xcc,0x02,0x09,0x02,0x28 -# GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 matrix_a_reuse matrix_b_reuse ; encoding: [0x00,0x6a,0x3a,0xcc,0x02,0x09,0x02,0x28] +0x00,0x6a,0x3a,0xcc,0x02,0x09,0x02,0x2c +# GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 matrix_a_reuse matrix_b_reuse ; encoding: [0x00,0x6a,0x3a,0xcc,0x02,0x09,0x02,0x2c] -0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x08 -# GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x08] +0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x0c +# GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x0c] -0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x48 -# GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x48] +0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x4c +# GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x4c] -0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x28 -# GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x28] +0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x2c +# GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x2c] -0x00,0x02,0x3a,0xcc,0x02,0x09,0x02,0x00 -# GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x02,0x3a,0xcc,0x02,0x09,0x02,0x00] +0x00,0x02,0x3a,0xcc,0x02,0x09,0x02,0x04 +# GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x02,0x3a,0xcc,0x02,0x09,0x02,0x04] -0x00,0x01,0x3a,0xcc,0x02,0x09,0x02,0x00 -# GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x01,0x3a,0xcc,0x02,0x09,0x02,0x00] +0x00,0x01,0x3a,0xcc,0x02,0x09,0x02,0x04 +# GFX1250: v_wmma_ld_scale16_paired_b64 v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x01,0x3a,0xcc,0x02,0x09,0x02,0x04] -0x00,0x00,0x35,0xcc,0x82,0x88,0x01,0x00 -# GFX1250: v_wmma_ld_scale_paired_b32 2, -4 ; encoding: [0x00,0x00,0x35,0xcc,0x82,0x88,0x01,0x00] +0x00,0x00,0x35,0xcc,0x82,0x88,0x01,0x04 +# GFX1250: v_wmma_ld_scale_paired_b32 2, -4 ; encoding: [0x00,0x00,0x35,0xcc,0x82,0x88,0x01,0x04] -0x00,0x20,0x35,0xcc,0x00,0x00,0x00,0x00 -# GFX1250: v_wmma_ld_scale_paired_b32 s0, s0 matrix_a_reuse ; encoding: [0x00,0x20,0x35,0xcc,0x00,0x00,0x00,0x00] +0x00,0x20,0x35,0xcc,0x00,0x00,0x00,0x04 +# GFX1250: v_wmma_ld_scale_paired_b32 s0, s0 matrix_a_reuse ; encoding: [0x00,0x20,0x35,0xcc,0x00,0x00,0x00,0x04] -0x00,0x08,0x35,0xcc,0x00,0x00,0x00,0x00 -# GFX1250: v_wmma_ld_scale_paired_b32 s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x08,0x35,0xcc,0x00,0x00,0x00,0x00] +0x00,0x08,0x35,0xcc,0x00,0x00,0x00,0x04 +# GFX1250: v_wmma_ld_scale_paired_b32 s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x08,0x35,0xcc,0x00,0x00,0x00,0x04] -0x00,0x28,0x35,0xcc,0x00,0x00,0x00,0x00 -# GFX1250: v_wmma_ld_scale_paired_b32 s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse ; encoding: [0x00,0x28,0x35,0xcc,0x00,0x00,0x00,0x00] +0x00,0x28,0x35,0xcc,0x00,0x00,0x00,0x04 +# GFX1250: v_wmma_ld_scale_paired_b32 s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse ; encoding: [0x00,0x28,0x35,0xcc,0x00,0x00,0x00,0x04] -0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x00 -# GFX1250: v_wmma_ld_scale_paired_b32 s0, s0 matrix_b_reuse ; encoding: [0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x00] +0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x04 +# GFX1250: v_wmma_ld_scale_paired_b32 s0, s0 matrix_b_reuse ; encoding: [0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x04] -0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x08 -# GFX1250: v_wmma_ld_scale_paired_b32 s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x08] +0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x0c +# GFX1250: v_wmma_ld_scale_paired_b32 s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x0c] -0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x08 -# GFX1250: v_wmma_ld_scale_paired_b32 s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse ; encoding: [0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x08] +0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x0c +# GFX1250: v_wmma_ld_scale_paired_b32 s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse ; encoding: [0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x0c] -0x00,0x00,0x35,0xcc,0x01,0x04,0x00,0x00 -# GFX1250: v_wmma_ld_scale_paired_b32 s1, s2 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x04,0x00,0x00] +0x00,0x00,0x35,0xcc,0x01,0x04,0x00,0x04 +# GFX1250: v_wmma_ld_scale_paired_b32 s1, s2 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x04,0x00,0x04] -0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x00 -# GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x00] +0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x04 +# GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x04] -0x00,0x6a,0x35,0xcc,0x01,0x05,0x02,0x28 -# GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 matrix_a_reuse matrix_b_reuse ; encoding: [0x00,0x6a,0x35,0xcc,0x01,0x05,0x02,0x28] +0x00,0x6a,0x35,0xcc,0x01,0x05,0x02,0x2c +# GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 matrix_a_reuse matrix_b_reuse ; encoding: [0x00,0x6a,0x35,0xcc,0x01,0x05,0x02,0x2c] -0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x08 -# GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x08] +0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x0c +# GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x0c] -0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x48 -# GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x48] +0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x4c +# GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x4c] -0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x28 -# GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x28] +0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x2c +# GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x2c] -0x00,0x02,0x35,0xcc,0x01,0x05,0x02,0x00 -# GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x02,0x35,0xcc,0x01,0x05,0x02,0x00] +0x00,0x02,0x35,0xcc,0x01,0x05,0x02,0x04 +# GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x02,0x35,0xcc,0x01,0x05,0x02,0x04] -0x00,0x01,0x35,0xcc,0x01,0x05,0x02,0x00 -# GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x01,0x35,0xcc,0x01,0x05,0x02,0x00] +0x00,0x01,0x35,0xcc,0x01,0x05,0x02,0x04 +# GFX1250: v_wmma_ld_scale_paired_b32 v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x01,0x35,0xcc,0x01,0x05,0x02,0x04] -0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x18,0x33,0xcc,0x00,0x01,0x02,0x04 -# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:11], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_fmt:MATRIX_FMT_BF6 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x18,0x33,0xcc,0x00,0x01,0x02,0x04] +0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x18,0x33,0xcc,0x00,0x01,0x02,0x04 +# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:11], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_fmt:MATRIX_FMT_BF6 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x18,0x33,0xcc,0x00,0x01,0x02,0x04] -0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x10,0x33,0xcc,0x00,0x01,0x02,0x04 -# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:11], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_fmt:MATRIX_FMT_FP6 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x10,0x33,0xcc,0x00,0x01,0x02,0x04] +0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x10,0x33,0xcc,0x00,0x01,0x02,0x04 +# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:11], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_fmt:MATRIX_FMT_FP6 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x10,0x33,0xcc,0x00,0x01,0x02,0x04] -0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x1c -# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:11], v[0:7], s[0:1], s[0:1] matrix_b_fmt:MATRIX_FMT_BF6 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x1c] +0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x1c +# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:11], v[0:7], s[0:1], s[0:1] matrix_b_fmt:MATRIX_FMT_BF6 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x1c] -0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x14 -# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:11], v[0:7], s[0:1], s[0:1] matrix_b_fmt:MATRIX_FMT_FP6 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x14] +0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x14 +# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:11], v[0:7], s[0:1], s[0:1] matrix_b_fmt:MATRIX_FMT_FP6 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x14] -0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04 -# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04 +# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] -0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x08,0x33,0xcc,0x00,0x01,0x02,0x04 -# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_fmt:MATRIX_FMT_BF8 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x08,0x33,0xcc,0x00,0x01,0x02,0x04] +0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x08,0x33,0xcc,0x00,0x01,0x02,0x04 +# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_fmt:MATRIX_FMT_BF8 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x08,0x33,0xcc,0x00,0x01,0x02,0x04] -0x00,0x20,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04 -# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_reuse ; encoding: [0x00,0x20,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +0x00,0x20,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04 +# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_reuse ; encoding: [0x00,0x20,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] -0x00,0x08,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04 -# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x08,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +0x00,0x08,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04 +# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x08,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] -0x00,0x28,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04 -# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse ; encoding: [0x00,0x28,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +0x00,0x28,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04 +# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse ; encoding: [0x00,0x28,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] -0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x0c -# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_fmt:MATRIX_FMT_BF8 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x0c] +0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x0c +# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_fmt:MATRIX_FMT_BF8 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x0c] -0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04 -# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_reuse ; encoding: [0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04 +# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_reuse ; encoding: [0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] -0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x08,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04 -# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x08,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x0c,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04 +# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x0c,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] -0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x08,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04 -# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse ; encoding: [0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x08,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x0c,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04 +# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse ; encoding: [0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x0c,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] -0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x40,0x33,0xcc,0x00,0x01,0x02,0x04 -# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:7], v[0:7], s[0:1], s[0:1] matrix_b_fmt:MATRIX_FMT_FP4 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x40,0x33,0xcc,0x00,0x01,0x02,0x04] +0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x33,0xcc,0x00,0x01,0x02,0x04 +# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:7], v[0:7], s[0:1], s[0:1] matrix_b_fmt:MATRIX_FMT_FP4 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x33,0xcc,0x00,0x01,0x02,0x04] -0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x20,0x33,0xcc,0x00,0x01,0x02,0x04 -# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:7], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_fmt:MATRIX_FMT_FP4 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x20,0x33,0xcc,0x00,0x01,0x02,0x04] +0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x20,0x33,0xcc,0x00,0x01,0x02,0x04 +# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[0:7], v[0:15], v[0:7], s[0:1], s[0:1] matrix_a_fmt:MATRIX_FMT_FP4 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x20,0x33,0xcc,0x00,0x01,0x02,0x04] -0x00,0x68,0x3a,0xcc,0x02,0x08,0x00,0x08,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94 -# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], s[2:3], s[4:5] matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x3a,0xcc,0x02,0x08,0x00,0x08,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94] +0x00,0x68,0x3a,0xcc,0x02,0x08,0x00,0x0c,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94 +# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], s[2:3], s[4:5] matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x3a,0xcc,0x02,0x08,0x00,0x0c,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94] -0x00,0x68,0x3a,0xcc,0x02,0x09,0x02,0x08,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94 -# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], v[2:3], v[4:5] matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x3a,0xcc,0x02,0x09,0x02,0x08,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94] +0x00,0x68,0x3a,0xcc,0x02,0x09,0x02,0x0c,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94 +# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], v[2:3], v[4:5] matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x3a,0xcc,0x02,0x09,0x02,0x0c,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94] -0x00,0x08,0x3a,0xcc,0x02,0x09,0x02,0x08,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94 -# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], v[2:3], v[4:5] matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x08,0x3a,0xcc,0x02,0x09,0x02,0x08,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94] +0x00,0x08,0x3a,0xcc,0x02,0x09,0x02,0x0c,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94 +# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], v[2:3], v[4:5] matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x08,0x3a,0xcc,0x02,0x09,0x02,0x0c,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94] -0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x00,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04 -# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x00,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] +0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x04,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04 +# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x04,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] -0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x40,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04 -# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x40,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] +0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x44,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04 +# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x44,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] -0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x20,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04 -# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x20,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] +0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x24,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04 +# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x24,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] -0x00,0x02,0x3a,0xcc,0x02,0x09,0x02,0x00,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04 -# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x02,0x3a,0xcc,0x02,0x09,0x02,0x00,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] +0x00,0x02,0x3a,0xcc,0x02,0x09,0x02,0x04,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04 +# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x02,0x3a,0xcc,0x02,0x09,0x02,0x04,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] -0x00,0x01,0x3a,0xcc,0x02,0x09,0x02,0x00,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04 -# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x01,0x3a,0xcc,0x02,0x09,0x02,0x00,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] +0x00,0x01,0x3a,0xcc,0x02,0x09,0x02,0x04,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04 +# GFX1250: v_wmma_scale16_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x01,0x3a,0xcc,0x02,0x09,0x02,0x04,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] -0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x18,0x33,0xcc,0x00,0x01,0x02,0x04 -# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:11], v[0:15], v[0:7], s0, s0 matrix_a_fmt:MATRIX_FMT_BF6 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x18,0x33,0xcc,0x00,0x01,0x02,0x04] +0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x18,0x33,0xcc,0x00,0x01,0x02,0x04 +# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:11], v[0:15], v[0:7], s0, s0 matrix_a_fmt:MATRIX_FMT_BF6 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x18,0x33,0xcc,0x00,0x01,0x02,0x04] -0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x10,0x33,0xcc,0x00,0x01,0x02,0x04 -# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:11], v[0:15], v[0:7], s0, s0 matrix_a_fmt:MATRIX_FMT_FP6 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x10,0x33,0xcc,0x00,0x01,0x02,0x04] +0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x10,0x33,0xcc,0x00,0x01,0x02,0x04 +# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:11], v[0:15], v[0:7], s0, s0 matrix_a_fmt:MATRIX_FMT_FP6 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x10,0x33,0xcc,0x00,0x01,0x02,0x04] -0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x1c -# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:11], v[0:7], s0, s0 matrix_b_fmt:MATRIX_FMT_BF6 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x1c] +0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x1c +# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:11], v[0:7], s0, s0 matrix_b_fmt:MATRIX_FMT_BF6 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x1c] -0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x14 -# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:11], v[0:7], s0, s0 matrix_b_fmt:MATRIX_FMT_FP6 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x14] +0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x14 +# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:11], v[0:7], s0, s0 matrix_b_fmt:MATRIX_FMT_FP6 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x14] -0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04 -# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04 +# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] -0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x08,0x33,0xcc,0x00,0x01,0x02,0x04 -# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_fmt:MATRIX_FMT_BF8 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x08,0x33,0xcc,0x00,0x01,0x02,0x04] +0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x08,0x33,0xcc,0x00,0x01,0x02,0x04 +# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_fmt:MATRIX_FMT_BF8 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x08,0x33,0xcc,0x00,0x01,0x02,0x04] -0x00,0x20,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04 -# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_reuse ; encoding: [0x00,0x20,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +0x00,0x20,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04 +# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_reuse ; encoding: [0x00,0x20,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] -0x00,0x08,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04 -# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x08,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +0x00,0x08,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04 +# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x08,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] -0x00,0x28,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04 -# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse ; encoding: [0x00,0x28,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +0x00,0x28,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04 +# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse ; encoding: [0x00,0x28,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] -0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x0c -# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_fmt:MATRIX_FMT_BF8 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x0c] +0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x0c +# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_fmt:MATRIX_FMT_BF8 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x0c] -0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04 -# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_reuse ; encoding: [0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04 +# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_reuse ; encoding: [0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] -0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x08,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04 -# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x08,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x0c,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04 +# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x0c,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] -0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x08,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04 -# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse ; encoding: [0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x08,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] +0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x0c,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04 +# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:15], v[0:7], s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse ; encoding: [0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x0c,0x00,0x00,0x33,0xcc,0x00,0x01,0x02,0x04] -0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x40,0x33,0xcc,0x00,0x01,0x02,0x04 -# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:7], v[0:7], s0, s0 matrix_b_fmt:MATRIX_FMT_FP4 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x40,0x33,0xcc,0x00,0x01,0x02,0x04] +0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x33,0xcc,0x00,0x01,0x02,0x04 +# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:15], v[0:7], v[0:7], s0, s0 matrix_b_fmt:MATRIX_FMT_FP4 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x33,0xcc,0x00,0x01,0x02,0x04] -0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x20,0x33,0xcc,0x00,0x01,0x02,0x04 -# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:7], v[0:15], v[0:7], s0, s0 matrix_a_fmt:MATRIX_FMT_FP4 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x20,0x33,0xcc,0x00,0x01,0x02,0x04] +0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x20,0x33,0xcc,0x00,0x01,0x02,0x04 +# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[0:7], v[0:15], v[0:7], s0, s0 matrix_a_fmt:MATRIX_FMT_FP4 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x20,0x33,0xcc,0x00,0x01,0x02,0x04] -0x00,0x68,0x35,0xcc,0x01,0x04,0x00,0x08,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94 -# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], s1, s2 matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x35,0xcc,0x01,0x04,0x00,0x08,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94] +0x00,0x68,0x35,0xcc,0x01,0x04,0x00,0x0c,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94 +# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], s1, s2 matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x35,0xcc,0x01,0x04,0x00,0x0c,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94] -0x00,0x68,0x35,0xcc,0x01,0x05,0x02,0x08,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94 -# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], v1, v2 matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x35,0xcc,0x01,0x05,0x02,0x08,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94] +0x00,0x68,0x35,0xcc,0x01,0x05,0x02,0x0c,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94 +# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], v1, v2 matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x35,0xcc,0x01,0x05,0x02,0x0c,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94] -0x00,0x08,0x35,0xcc,0x01,0x05,0x02,0x08,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94 -# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], v1, v2 matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x08,0x35,0xcc,0x01,0x05,0x02,0x08,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94] +0x00,0x08,0x35,0xcc,0x01,0x05,0x02,0x0c,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94 +# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:35], v[40:47], v1, v2 matrix_a_fmt:MATRIX_FMT_BF8 matrix_b_fmt:MATRIX_FMT_FP6 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x08,0x35,0xcc,0x01,0x05,0x02,0x0c,0x00,0x0c,0x33,0xcc,0x08,0x31,0xa2,0x94] -0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x00,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04 -# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x00,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] +0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x04,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04 +# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x04,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] -0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x40,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04 -# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x40,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] +0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x44,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04 +# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x44,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] -0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x20,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04 -# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x20,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] +0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x24,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04 +# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x24,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] -0x00,0x02,0x35,0xcc,0x01,0x05,0x02,0x00,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04 -# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x02,0x35,0xcc,0x01,0x05,0x02,0x00,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] +0x00,0x02,0x35,0xcc,0x01,0x05,0x02,0x04,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04 +# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x02,0x35,0xcc,0x01,0x05,0x02,0x04,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] -0x00,0x01,0x35,0xcc,0x01,0x05,0x02,0x00,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04 -# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x01,0x35,0xcc,0x01,0x05,0x02,0x00,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] +0x00,0x01,0x35,0xcc,0x01,0x05,0x02,0x04,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04 +# GFX1250: v_wmma_scale_f32_16x16x128_f8f6f4 v[0:7], v[8:23], v[24:39], v[40:47], v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x01,0x35,0xcc,0x01,0x05,0x02,0x04,0x00,0x00,0x33,0xcc,0x08,0x31,0xa2,0x04] 0x10,0x00,0x87,0xcc,0x00,0x11,0xca,0x1b # GFX1250: v_wmma_f16_16x16x128_bf8_bf8 v[16:19], v[0:15], v[8:23], 1.0 ; encoding: [0x10,0x00,0x87,0xcc,0x00,0x11,0xca,0x1b] @@ -1000,92 +1000,92 @@ 0x04,0x44,0x88,0xcc,0x00,0x05,0x12,0x9c # GFX1250: v_wmma_f32_32x16x128_f4 v[4:19], v[0:15], v[2:9], v[4:19] neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x04,0x44,0x88,0xcc,0x00,0x05,0x12,0x9c] -0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c -# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] +0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c +# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] -0x00,0x20,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c -# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_a_reuse ; encoding: [0x00,0x20,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] +0x00,0x20,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c +# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_a_reuse ; encoding: [0x00,0x20,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] -0x00,0x08,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c -# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x08,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] +0x00,0x08,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c +# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x08,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] -0x00,0x28,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c -# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse ; encoding: [0x00,0x28,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] +0x00,0x28,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c +# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse ; encoding: [0x00,0x28,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] -0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c -# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_b_reuse ; encoding: [0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x00,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] +0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c +# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_b_reuse ; encoding: [0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] -0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x08,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c -# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x08,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] +0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x0c,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c +# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x35,0xcc,0x00,0x00,0x00,0x0c,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] -0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x08,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c -# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse ; encoding: [0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x08,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] +0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x0c,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c +# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s0, s0 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse ; encoding: [0x00,0x40,0x35,0xcc,0x00,0x00,0x00,0x0c,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] -0x00,0x68,0x35,0xcc,0x01,0x04,0x00,0x08,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c -# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], s1, s2 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x35,0xcc,0x01,0x04,0x00,0x08,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c] +0x00,0x68,0x35,0xcc,0x01,0x04,0x00,0x0c,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c +# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], s1, s2 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x35,0xcc,0x01,0x04,0x00,0x0c,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c] -0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x00,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c -# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x00,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c] +0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x0c,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c +# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x0c,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c] -0x00,0x68,0x35,0xcc,0x01,0x05,0x02,0x08,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c -# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x35,0xcc,0x01,0x05,0x02,0x08,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c] +0x00,0x68,0x35,0xcc,0x01,0x05,0x02,0x0c,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c +# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x35,0xcc,0x01,0x05,0x02,0x0c,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c] -0x00,0x08,0x35,0xcc,0x01,0x05,0x02,0x08,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c -# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x08,0x35,0xcc,0x01,0x05,0x02,0x08,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c] +0x00,0x08,0x35,0xcc,0x01,0x05,0x02,0x0c,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c +# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x08,0x35,0xcc,0x01,0x05,0x02,0x0c,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c] -0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x40,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c -# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x40,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c] +0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x44,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c +# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x44,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c] -0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x20,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c -# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x20,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c] +0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x24,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c +# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x00,0x35,0xcc,0x01,0x05,0x02,0x24,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c] -0x00,0x02,0x35,0xcc,0x01,0x05,0x02,0x00,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c -# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x02,0x35,0xcc,0x01,0x05,0x02,0x00,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c] +0x00,0x02,0x35,0xcc,0x01,0x05,0x02,0x04,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c +# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x02,0x35,0xcc,0x01,0x05,0x02,0x04,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c] -0x00,0x01,0x35,0xcc,0x01,0x05,0x02,0x00,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c -# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x01,0x35,0xcc,0x01,0x05,0x02,0x00,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c] +0x00,0x01,0x35,0xcc,0x01,0x05,0x02,0x04,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c +# GFX1250: v_wmma_scale_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v1, v2 matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x01,0x35,0xcc,0x01,0x05,0x02,0x04,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c] -0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c -# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] +0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c +# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] -0x00,0x20,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c -# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_a_reuse ; encoding: [0x00,0x20,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] +0x00,0x20,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c +# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_a_reuse ; encoding: [0x00,0x20,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] -0x00,0x08,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c -# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x08,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] +0x00,0x08,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c +# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x08,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] -0x00,0x28,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c -# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse ; encoding: [0x00,0x28,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] +0x00,0x28,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c +# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_a_reuse ; encoding: [0x00,0x28,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] -0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c -# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_b_reuse ; encoding: [0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x00,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] +0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c +# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_b_reuse ; encoding: [0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x04,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] -0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x08,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c -# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x08,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] +0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x0c,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c +# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 ; encoding: [0x00,0x00,0x3a,0xcc,0x00,0x00,0x00,0x0c,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] -0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x08,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c -# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse ; encoding: [0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x08,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] +0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x0c,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c +# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[0:7], v[0:15], s[0:1], s[0:1] matrix_b_scale:MATRIX_SCALE_ROW1 matrix_b_reuse ; encoding: [0x00,0x40,0x3a,0xcc,0x00,0x00,0x00,0x0c,0x00,0x40,0x88,0xcc,0x08,0x01,0x02,0x1c] -0x00,0x68,0x3a,0xcc,0x02,0x08,0x00,0x08,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c -# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], s[2:3], s[4:5] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x3a,0xcc,0x02,0x08,0x00,0x08,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c] +0x00,0x68,0x3a,0xcc,0x02,0x08,0x00,0x0c,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c +# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], s[2:3], s[4:5] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x3a,0xcc,0x02,0x08,0x00,0x0c,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c] -0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x00,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c -# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x00,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c] +0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x04,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c +# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x04,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c] -0x00,0x68,0x3a,0xcc,0x02,0x09,0x02,0x08,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c -# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x3a,0xcc,0x02,0x09,0x02,0x08,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c] +0x00,0x68,0x3a,0xcc,0x02,0x09,0x02,0x0c,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c +# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 matrix_a_reuse matrix_b_reuse neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x68,0x3a,0xcc,0x02,0x09,0x02,0x0c,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c] -0x00,0x08,0x3a,0xcc,0x02,0x09,0x02,0x08,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c -# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x08,0x3a,0xcc,0x02,0x09,0x02,0x08,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c] +0x00,0x08,0x3a,0xcc,0x02,0x09,0x02,0x0c,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c +# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_a_scale:MATRIX_SCALE_ROW1 matrix_b_scale:MATRIX_SCALE_ROW1 neg_lo:[0,0,1] neg_hi:[0,0,1] ; encoding: [0x00,0x08,0x3a,0xcc,0x02,0x09,0x02,0x0c,0x00,0x44,0x88,0xcc,0x08,0x31,0xa2,0x9c] -0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x40,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c -# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x40,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c] +0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x44,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c +# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_a_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x44,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c] -0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x20,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c -# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x20,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c] +0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x24,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c +# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_a_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x00,0x3a,0xcc,0x02,0x09,0x02,0x24,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c] -0x00,0x02,0x3a,0xcc,0x02,0x09,0x02,0x00,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c -# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x02,0x3a,0xcc,0x02,0x09,0x02,0x00,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c] +0x00,0x02,0x3a,0xcc,0x02,0x09,0x02,0x04,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c +# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E4M3 ; encoding: [0x00,0x02,0x3a,0xcc,0x02,0x09,0x02,0x04,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c] -0x00,0x01,0x3a,0xcc,0x02,0x09,0x02,0x00,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c -# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x01,0x3a,0xcc,0x02,0x09,0x02,0x00,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c] +0x00,0x01,0x3a,0xcc,0x02,0x09,0x02,0x04,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c +# GFX1250: v_wmma_scale16_f32_32x16x128_f4 v[0:15], v[8:23], v[24:31], v[40:55], v[2:3], v[4:5] matrix_b_scale_fmt:MATRIX_SCALE_FMT_E5M3 ; encoding: [0x00,0x01,0x3a,0xcc,0x02,0x09,0x02,0x04,0x00,0x40,0x88,0xcc,0x08,0x31,0xa2,0x1c] diff --git a/llvm/utils/lit/lit/LitConfig.py b/llvm/utils/lit/lit/LitConfig.py index 8cef3c1fd8569..71dad85bbaddd 100644 --- a/llvm/utils/lit/lit/LitConfig.py +++ b/llvm/utils/lit/lit/LitConfig.py @@ -1,6 +1,7 @@ from __future__ import absolute_import import inspect import os +import enum import platform import sys @@ -25,7 +26,7 @@ def __init__( self, progname, path, - quiet, + diagnostic_level, useValgrind, valgrindLeakCheck, valgrindArgs, @@ -46,7 +47,7 @@ def __init__( self.progname = progname # The items to add to the PATH environment variable. self.path = [str(p) for p in path] - self.quiet = bool(quiet) + self.diagnostic_level = diagnostic_level self.useValgrind = bool(useValgrind) self.valgrindLeakCheck = bool(valgrindLeakCheck) self.valgrindUserArgs = list(valgrindArgs) @@ -155,8 +156,7 @@ def per_test_coverage(self, value): def load_config(self, config, path): """load_config(config, path) - Load a config object from an alternate path.""" - if self.debug: - self.note("load_config from %r" % path) + self.dbg("load_config from %r" % path) config.load_from_path(path, self) return config @@ -209,6 +209,8 @@ def getToolsPath(self, dir, paths, tools): return dir def _write_message(self, kind, message): + if not self.diagnostic_level_enabled(kind): + return # Get the file/line where this message was generated. f = inspect.currentframe() # Step out of _write_message, and then out of wrapper. @@ -234,13 +236,21 @@ def substitute(self, string): "unable to find %r parameter, use '--param=%s=VALUE'" % (key, key) ) + def diagnostic_level_enabled(self, kind): + if kind == "debug": + return self.debug + return DiagnosticLevel.create(self.diagnostic_level) >= DiagnosticLevel.create( + kind + ) + + def dbg(self, message): + self._write_message("debug", message) + def note(self, message): - if not self.quiet: - self._write_message("note", message) + self._write_message("note", message) def warning(self, message): - if not self.quiet: - self._write_message("warning", message) + self._write_message("warning", message) self.numWarnings += 1 def error(self, message): @@ -250,3 +260,25 @@ def error(self, message): def fatal(self, message): self._write_message("fatal", message) sys.exit(2) + + +@enum.unique +class DiagnosticLevel(enum.IntEnum): + FATAL = 0 + ERROR = 1 + WARNING = 2 + NOTE = 3 + + @classmethod + def create(cls, value): + if value == "fatal": + return cls.FATAL + if value == "error": + return cls.ERROR + if value == "warning": + return cls.WARNING + if value == "note": + return cls.NOTE + raise ValueError( + f"invalid diagnostic level {repr(value)} of type {type(value)}" + ) diff --git a/llvm/utils/lit/lit/LitTestCase.py b/llvm/utils/lit/lit/LitTestCase.py index 566d068ad11ea..690b7cb6f13d5 100644 --- a/llvm/utils/lit/lit/LitTestCase.py +++ b/llvm/utils/lit/lit/LitTestCase.py @@ -46,7 +46,7 @@ def load_test_suite(inputs): lit_config = lit.LitConfig.LitConfig( progname="lit", path=[], - quiet=False, + diagnostic_level="note", useValgrind=False, valgrindLeakCheck=False, valgrindArgs=[], diff --git a/llvm/utils/lit/lit/TestingConfig.py b/llvm/utils/lit/lit/TestingConfig.py index c250838250547..e7e545cc8e300 100644 --- a/llvm/utils/lit/lit/TestingConfig.py +++ b/llvm/utils/lit/lit/TestingConfig.py @@ -143,8 +143,7 @@ def load_from_path(self, path, litConfig): cfg_globals["__file__"] = path try: exec(compile(data, path, "exec"), cfg_globals, None) - if litConfig.debug: - litConfig.note("... loaded config %r" % path) + litConfig.dbg("... loaded config %r" % path) except SystemExit: e = sys.exc_info()[1] # We allow normal system exit inside a config file to just diff --git a/llvm/utils/lit/lit/cl_arguments.py b/llvm/utils/lit/lit/cl_arguments.py index 8238bc42395af..5c2ff4e70a3aa 100644 --- a/llvm/utils/lit/lit/cl_arguments.py +++ b/llvm/utils/lit/lit/cl_arguments.py @@ -15,6 +15,59 @@ class TestOrder(enum.Enum): SMART = "smart" +@enum.unique +class TestOutputLevel(enum.IntEnum): + OFF = 0 + FAILED = 1 + ALL = 2 + + @classmethod + def create(cls, value): + if value == "off": + return cls.OFF + if value == "failed": + return cls.FAILED + if value == "all": + return cls.ALL + raise ValueError(f"invalid output level {repr(value)} of type {type(value)}") + + +class TestOutputAction(argparse.Action): + def __init__(self, option_strings, dest, **kwargs): + super().__init__(option_strings, dest, nargs=None, **kwargs) + + def __call__(self, parser, namespace, value, option_string=None): + TestOutputAction.setOutputLevel(namespace, self.dest, value) + + @classmethod + def setOutputLevel(cls, namespace, dest, value): + setattr(namespace, dest, value) + if dest == "test_output" and TestOutputLevel.create( + namespace.print_result_after + ) < TestOutputLevel.create(value): + setattr(namespace, "print_result_after", value) + elif dest == "print_result_after" and TestOutputLevel.create( + namespace.test_output + ) > TestOutputLevel.create(value): + setattr(namespace, "test_output", value) + + +class AliasAction(argparse.Action): + def __init__(self, option_strings, dest, nargs=None, **kwargs): + self.expansion = kwargs.pop("alias", None) + if not self.expansion: + raise ValueError("no aliases expansion provided") + super().__init__(option_strings, dest, nargs=0, **kwargs) + + def __call__(self, parser, namespace, value, option_string=None): + for e in self.expansion: + if callable(e): + e(namespace) + else: + dest, val = e + setattr(namespace, dest, val) + + def parse_args(): parser = argparse.ArgumentParser(prog="lit", fromfile_prefix_chars="@") parser.add_argument( @@ -55,41 +108,103 @@ def parse_args(): ) format_group = parser.add_argument_group("Output Format") - # FIXME: I find these names very confusing, although I like the - # functionality. format_group.add_argument( - "-q", "--quiet", help="Suppress no error output", action="store_true" + "--test-output", + help="Control whether the executed commands and their outputs are printed after each test has executed (default off). " + "If --print-result-after is set lower than the level given to --test-output, --print-result-after is raised to match.", + choices=["off", "failed", "all"], + default="off", + action=TestOutputAction, + ) + format_group.add_argument( + "--print-result-after", + help="Control which the executed test names and results are printed after each test has executed (default all). " + "If --test-output is set higher than the level given to --print-result-after, --test-output is lowered to match.", + choices=["off", "failed", "all"], + default="all", + action=TestOutputAction, + ) + format_group.add_argument( + "--diagnostic-level", + help="Control how verbose lit diagnostics should be (default note)", + choices=["error", "warning", "note"], + default="note", + ) + format_group.add_argument( + "--terse-summary", + help="Print the elapsed time and the number of passed tests after all tests have finished (default on)", + action="store_true", + dest="terse_summary", + ) + format_group.add_argument( + "--no-terse-summary", + help="Don't show the elapsed time after all tests have finished, and only show the number of failed tests.", + action="store_false", + dest="terse_summary", + ) + parser.set_defaults(terse_summary=False) + format_group.add_argument( + "-q", + "--quiet", + help="Alias for '--diagnostic-level=error --test-output=off --terse-summary'", + action=AliasAction, + alias=[ + lambda namespace: TestOutputAction.setOutputLevel( + namespace, "print_result_after", "failed" + ), + lambda namespace: TestOutputAction.setOutputLevel( + namespace, "test_output", "off" + ), + ("diagnostic_level", "error"), + ("terse_summary", True), + ], ) format_group.add_argument( "-s", "--succinct", - help="Reduce amount of output." - " Additionally, show a progress bar," - " unless --no-progress-bar is specified.", - action="store_true", + help="Alias for '--progress-bar --print-result-after=failed'", + action=AliasAction, + alias=[ + ("useProgressBar", True), + lambda namespace: TestOutputAction.setOutputLevel( + namespace, "print_result_after", "failed" + ), + ], ) format_group.add_argument( "-v", "--verbose", - dest="showOutput", help="For failed tests, show all output. For example, each command is" " printed before it is executed, so the last printed command is the one" - " that failed.", - action="store_true", + " that failed. Alias for '--test-output=failed'", + action=AliasAction, + alias=[ + lambda namespace: TestOutputAction.setOutputLevel( + namespace, "test_output", "failed" + ), + ], ) format_group.add_argument( "-vv", "--echo-all-commands", - dest="showOutput", help="Deprecated alias for -v.", - action="store_true", + action=AliasAction, + alias=[ + lambda namespace: TestOutputAction.setOutputLevel( + namespace, "test_output", "failed" + ), + ], ) format_group.add_argument( "-a", "--show-all", - dest="showAllOutput", - help="Enable -v, but for all tests not just failed tests.", - action="store_true", + help="Enable -v, but for all tests not just failed tests. Alias for '--test-output=all'", + action=AliasAction, + alias=[ + lambda namespace: TestOutputAction.setOutputLevel( + namespace, "test_output", "all" + ), + ], ) format_group.add_argument( "-r", @@ -105,10 +220,16 @@ def parse_args(): help="Write test results to the provided path", metavar="PATH", ) + format_group.add_argument( + "--progress-bar", + dest="useProgressBar", + help="Show curses based progress bar", + action="store_true", + ) format_group.add_argument( "--no-progress-bar", dest="useProgressBar", - help="Do not use curses based progress bar", + help="Do not use curses based progress bar (default)", action="store_false", ) diff --git a/llvm/utils/lit/lit/discovery.py b/llvm/utils/lit/lit/discovery.py index 2e7f90c6bb0c9..ac06223b45345 100644 --- a/llvm/utils/lit/lit/discovery.py +++ b/llvm/utils/lit/lit/discovery.py @@ -62,8 +62,7 @@ def search1(path): cfgpath = target # We found a test suite, create a new config for it and load it. - if litConfig.debug: - litConfig.note("loading suite config %r" % cfgpath) + litConfig.dbg("loading suite config %r" % cfgpath) cfg = TestingConfig.fromdefaults(litConfig) cfg.load_from_path(cfgpath, litConfig) @@ -115,8 +114,7 @@ def search1(path_in_suite): # Otherwise, copy the current config and load the local configuration # file into it. config = copy.deepcopy(parent) - if litConfig.debug: - litConfig.note("loading local config %r" % cfgpath) + litConfig.dbg("loading local config %r" % cfgpath) config.load_from_path(cfgpath, litConfig) return config @@ -137,8 +135,7 @@ def getTests(path, litConfig, testSuiteCache, localConfigCache): litConfig.warning("unable to find test suite for %r" % path) return (), () - if litConfig.debug: - litConfig.note("resolved input %r to %r::%r" % (path, ts.name, path_in_suite)) + litConfig.dbg("resolved input %r to %r::%r" % (path, ts.name, path_in_suite)) return ts, getTestsInSuite( ts, diff --git a/llvm/utils/lit/lit/display.py b/llvm/utils/lit/lit/display.py index b565bbc7a4f93..4dc04d93d3ea7 100644 --- a/llvm/utils/lit/lit/display.py +++ b/llvm/utils/lit/lit/display.py @@ -2,7 +2,7 @@ def create_display(opts, tests, total_tests, workers): - if opts.quiet: + if opts.print_result_after == "off" and not opts.useProgressBar: return NopDisplay() num_tests = len(tests) @@ -10,7 +10,7 @@ def create_display(opts, tests, total_tests, workers): header = "-- Testing: %d%s tests, %d workers --" % (num_tests, of_total, workers) progress_bar = None - if opts.succinct and opts.useProgressBar: + if opts.useProgressBar: import lit.ProgressBar try: @@ -96,8 +96,8 @@ def update(self, test): show_result = ( test.isFailure() - or self.opts.showAllOutput - or (not self.opts.quiet and not self.opts.succinct) + and self.opts.print_result_after == "failed" + or self.opts.print_result_after == "all" ) if show_result: if self.progress_bar: @@ -134,7 +134,9 @@ def print_result(self, test): ) # Show the test failure output, if requested. - if (test.isFailure() and self.opts.showOutput) or self.opts.showAllOutput: + if ( + test.isFailure() and self.opts.test_output == "failed" + ) or self.opts.test_output == "all": if test.isFailure(): print("%s TEST '%s' FAILED %s" % ("*" * 20, test_name, "*" * 20)) out = test.result.output diff --git a/llvm/utils/lit/lit/llvm/config.py b/llvm/utils/lit/lit/llvm/config.py index 913ba69d63328..59982c94b787c 100644 --- a/llvm/utils/lit/lit/llvm/config.py +++ b/llvm/utils/lit/lit/llvm/config.py @@ -53,7 +53,10 @@ def __init__(self, lit_config, config): self.use_lit_shell = True global lit_path_displayed - if not self.lit_config.quiet and lit_path_displayed is False: + if ( + self.lit_config.diagnostic_level_enabled("note") + and lit_path_displayed is False + ): self.lit_config.note("using lit tools: {}".format(path)) lit_path_displayed = True @@ -527,7 +530,7 @@ def use_llvm_tool( if tool: tool = os.path.normpath(tool) - if not self.lit_config.quiet and not quiet: + if not quiet: self.lit_config.note("using {}: {}".format(name, tool)) return tool @@ -637,10 +640,9 @@ def clang_setup( ("%ms_abi_triple", self.make_msabi_triple(self.config.target_triple)) ) else: - if not self.lit_config.quiet: - self.lit_config.note( - "No default target triple was found, some tests may fail as a result." - ) + self.lit_config.note( + "No default target triple was found, some tests may fail as a result." + ) self.config.substitutions.append(("%itanium_abi_triple", "")) self.config.substitutions.append(("%ms_abi_triple", "")) diff --git a/llvm/utils/lit/lit/main.py b/llvm/utils/lit/lit/main.py index a585cc0abdd48..07e809b168dc2 100755 --- a/llvm/utils/lit/lit/main.py +++ b/llvm/utils/lit/lit/main.py @@ -30,7 +30,7 @@ def main(builtin_params={}): lit_config = lit.LitConfig.LitConfig( progname=os.path.basename(sys.argv[0]), path=opts.path, - quiet=opts.quiet, + diagnostic_level=opts.diagnostic_level, useValgrind=opts.useValgrind, valgrindLeakCheck=opts.valgrindLeakCheck, valgrindArgs=opts.valgrindArgs, @@ -332,7 +332,7 @@ def print_results(tests, elapsed, opts): opts.printPathRelativeCWD, ) - print_summary(total_tests, tests_by_code, opts.quiet, elapsed) + print_summary(total_tests, tests_by_code, opts.terse_summary, elapsed) def print_group(tests, code, shown_codes, printPathRelativeCWD): diff --git a/llvm/utils/lit/tests/Inputs/verbosity/fail.txt b/llvm/utils/lit/tests/Inputs/verbosity/fail.txt new file mode 100644 index 0000000000000..2bcca02683614 --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/verbosity/fail.txt @@ -0,0 +1,2 @@ +RUN: echo "fail test output" +RUN: fail \ No newline at end of file diff --git a/llvm/utils/lit/tests/Inputs/verbosity/lit.cfg b/llvm/utils/lit/tests/Inputs/verbosity/lit.cfg new file mode 100644 index 0000000000000..c3a1f4f4d873a --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/verbosity/lit.cfg @@ -0,0 +1,11 @@ +import lit.formats + +config.name = "verbosity" +config.suffixes = [".txt"] +config.test_format = lit.formats.ShTest() +config.test_source_root = None +config.test_exec_root = None + +lit_config.dbg("this is a debug log") +lit_config.note("this is a note") +lit_config.warning("this is a warning") diff --git a/llvm/utils/lit/tests/Inputs/verbosity/pass.txt b/llvm/utils/lit/tests/Inputs/verbosity/pass.txt new file mode 100644 index 0000000000000..f64843827e147 --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/verbosity/pass.txt @@ -0,0 +1 @@ +RUN: echo "pass test output" \ No newline at end of file diff --git a/llvm/utils/lit/tests/Inputs/verbosity/unsupported.txt b/llvm/utils/lit/tests/Inputs/verbosity/unsupported.txt new file mode 100644 index 0000000000000..f5ebd4da178f8 --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/verbosity/unsupported.txt @@ -0,0 +1,2 @@ +REQUIRES: asdf +RUN: not echo "unsupported test output" diff --git a/llvm/utils/lit/tests/Inputs/verbosity/xfail.txt b/llvm/utils/lit/tests/Inputs/verbosity/xfail.txt new file mode 100644 index 0000000000000..85001cc22b08e --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/verbosity/xfail.txt @@ -0,0 +1,2 @@ +XFAIL: * +RUN: not echo "xfail test output" \ No newline at end of file diff --git a/llvm/utils/lit/tests/Inputs/verbosity/xpass.txt b/llvm/utils/lit/tests/Inputs/verbosity/xpass.txt new file mode 100644 index 0000000000000..87c95ec75ecdc --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/verbosity/xpass.txt @@ -0,0 +1,2 @@ +XFAIL: * +RUN: echo "xpass test output" diff --git a/llvm/utils/lit/tests/lit-opts.py b/llvm/utils/lit/tests/lit-opts.py index a533a59d9d124..0759c1d17be58 100644 --- a/llvm/utils/lit/tests/lit-opts.py +++ b/llvm/utils/lit/tests/lit-opts.py @@ -12,13 +12,13 @@ # Check that LIT_OPTS understands multiple options with arbitrary spacing. # -# RUN: env LIT_OPTS='-a -v -Dvar=foobar' \ +# RUN: env LIT_OPTS='-v -a -Dvar=foobar' \ # RUN: %{lit} -s %{inputs}/lit-opts \ # RUN: | FileCheck -check-prefix=SHOW-ALL -DVAR=foobar %s # Check that LIT_OPTS parses shell-like quotes and escapes. # -# RUN: env LIT_OPTS='-a -v -Dvar="foo bar"\ baz' \ +# RUN: env LIT_OPTS='-v -a -Dvar="foo bar"\ baz' \ # RUN: %{lit} -s %{inputs}/lit-opts \ # RUN: | FileCheck -check-prefix=SHOW-ALL -DVAR="foo bar baz" %s diff --git a/llvm/utils/lit/tests/per-test-coverage-by-lit-cfg.py b/llvm/utils/lit/tests/per-test-coverage-by-lit-cfg.py index 189c1cebd623b..b3af606c52f18 100644 --- a/llvm/utils/lit/tests/per-test-coverage-by-lit-cfg.py +++ b/llvm/utils/lit/tests/per-test-coverage-by-lit-cfg.py @@ -1,10 +1,10 @@ # Test if lit_config.per_test_coverage in lit.cfg sets individual test case coverage. -# RUN: %{lit} -a -vv -Dexecute_external=False \ +# RUN: %{lit} -a -Dexecute_external=False \ # RUN: %{inputs}/per-test-coverage-by-lit-cfg/per-test-coverage-by-lit-cfg.py | \ # RUN: FileCheck -DOUT=stdout %s -# RUN: %{lit} -a -vv -Dexecute_external=True \ +# RUN: %{lit} -a -Dexecute_external=True \ # RUN: %{inputs}/per-test-coverage-by-lit-cfg/per-test-coverage-by-lit-cfg.py | \ # RUN: FileCheck -DOUT=stderr %s diff --git a/llvm/utils/lit/tests/per-test-coverage.py b/llvm/utils/lit/tests/per-test-coverage.py index cf5e82c44dc51..ba513554ae76e 100644 --- a/llvm/utils/lit/tests/per-test-coverage.py +++ b/llvm/utils/lit/tests/per-test-coverage.py @@ -1,10 +1,10 @@ # Test LLVM_PROFILE_FILE is set when --per-test-coverage is passed to command line. -# RUN: %{lit} -a -vv --per-test-coverage -Dexecute_external=False \ +# RUN: %{lit} -a --per-test-coverage -Dexecute_external=False \ # RUN: %{inputs}/per-test-coverage/per-test-coverage.py | \ # RUN: FileCheck -DOUT=stdout %s -# RUN: %{lit} -a -vv --per-test-coverage -Dexecute_external=True \ +# RUN: %{lit} -a --per-test-coverage -Dexecute_external=True \ # RUN: %{inputs}/per-test-coverage/per-test-coverage.py | \ # RUN: FileCheck -DOUT=stderr %s diff --git a/llvm/utils/lit/tests/shtest-cat.py b/llvm/utils/lit/tests/shtest-cat.py index 5efe25c41684a..9763f9fbf1a9d 100644 --- a/llvm/utils/lit/tests/shtest-cat.py +++ b/llvm/utils/lit/tests/shtest-cat.py @@ -1,6 +1,6 @@ ## Test the cat command. # -# RUN: not %{lit} -a -v %{inputs}/shtest-cat \ +# RUN: not %{lit} -v %{inputs}/shtest-cat \ # RUN: | FileCheck -match-full-lines %s # END. diff --git a/llvm/utils/lit/tests/shtest-env-negative.py b/llvm/utils/lit/tests/shtest-env-negative.py index c8b59b224e7c4..236c6a19e694b 100644 --- a/llvm/utils/lit/tests/shtest-env-negative.py +++ b/llvm/utils/lit/tests/shtest-env-negative.py @@ -1,6 +1,6 @@ ## Test the env command (failing tests). -# RUN: not %{lit} -a -v %{inputs}/shtest-env-negative \ +# RUN: not %{lit} -v %{inputs}/shtest-env-negative \ # RUN: | FileCheck -match-full-lines %s # # END. diff --git a/llvm/utils/lit/tests/shtest-env-path.py b/llvm/utils/lit/tests/shtest-env-path.py index bf459ae53fbc0..7f04756ed6ad5 100644 --- a/llvm/utils/lit/tests/shtest-env-path.py +++ b/llvm/utils/lit/tests/shtest-env-path.py @@ -1,9 +1,9 @@ ## Tests env command for setting the PATH variable. # The test is using /bin/sh. Limit to system known to have /bin/sh. -# REQUIRES: system-linux +# REQUIRES: system-linux || system-darwin -# RUN: %{lit} -a -v %{inputs}/shtest-env-path/path.txt \ +# RUN: %{lit} -a %{inputs}/shtest-env-path/path.txt \ # RUN: | FileCheck -match-full-lines %s # # END. diff --git a/llvm/utils/lit/tests/shtest-env-positive.py b/llvm/utils/lit/tests/shtest-env-positive.py index 4f07b69ecc7d3..089acd308c5c5 100644 --- a/llvm/utils/lit/tests/shtest-env-positive.py +++ b/llvm/utils/lit/tests/shtest-env-positive.py @@ -1,6 +1,6 @@ ## Test the env command (passing tests). -# RUN: %{lit} -a -v %{inputs}/shtest-env-positive \ +# RUN: %{lit} -a %{inputs}/shtest-env-positive \ # RUN: | FileCheck -match-full-lines %s # # END. diff --git a/llvm/utils/lit/tests/shtest-export.py b/llvm/utils/lit/tests/shtest-export.py index f2de8e8cd8b5f..d45a94a5eb830 100644 --- a/llvm/utils/lit/tests/shtest-export.py +++ b/llvm/utils/lit/tests/shtest-export.py @@ -1,6 +1,6 @@ ## Test the export command. -# RUN: not %{lit} -a -v %{inputs}/shtest-export \ +# RUN: not %{lit} -v %{inputs}/shtest-export \ # RUN: | FileCheck -match-full-lines %s # # END. diff --git a/llvm/utils/lit/tests/shtest-glob.py b/llvm/utils/lit/tests/shtest-glob.py index aa4705b634a7d..ba609e036c166 100644 --- a/llvm/utils/lit/tests/shtest-glob.py +++ b/llvm/utils/lit/tests/shtest-glob.py @@ -1,6 +1,6 @@ ## Tests glob pattern handling in echo command. -# RUN: not %{lit} -a -v %{inputs}/shtest-glob \ +# RUN: not %{lit} -v %{inputs}/shtest-glob \ # RUN: | FileCheck -dump-input=fail -match-full-lines --implicit-check-not=Error: %s # END. diff --git a/llvm/utils/lit/tests/shtest-not.py b/llvm/utils/lit/tests/shtest-not.py index b42769ffd9383..e735d38260b37 100644 --- a/llvm/utils/lit/tests/shtest-not.py +++ b/llvm/utils/lit/tests/shtest-not.py @@ -1,6 +1,6 @@ # Check the not command -# RUN: not %{lit} -a -v %{inputs}/shtest-not \ +# RUN: not %{lit} -a %{inputs}/shtest-not \ # RUN: | FileCheck -match-full-lines %s # # END. diff --git a/llvm/utils/lit/tests/shtest-pushd-popd.py b/llvm/utils/lit/tests/shtest-pushd-popd.py index f917c1a4a4599..799e9d6d65951 100644 --- a/llvm/utils/lit/tests/shtest-pushd-popd.py +++ b/llvm/utils/lit/tests/shtest-pushd-popd.py @@ -1,6 +1,6 @@ # Check the pushd and popd commands -# RUN: not %{lit} -a -v %{inputs}/shtest-pushd-popd \ +# RUN: not %{lit} -v %{inputs}/shtest-pushd-popd \ # RUN: | FileCheck -match-full-lines %s # # END. diff --git a/llvm/utils/lit/tests/shtest-readfile-external.py b/llvm/utils/lit/tests/shtest-readfile-external.py index 6fe1088efd674..0d8e3ad1242bf 100644 --- a/llvm/utils/lit/tests/shtest-readfile-external.py +++ b/llvm/utils/lit/tests/shtest-readfile-external.py @@ -4,7 +4,7 @@ # ALLOW_RETRIES: 2 # UNSUPPORTED: system-windows -# RUN: env LIT_USE_INTERNAL_SHELL=0 not %{lit} -a -v %{inputs}/shtest-readfile | FileCheck -match-full-lines -DTEMP_PATH=%S/Inputs/shtest-readfile/Output %s +# RUN: env LIT_USE_INTERNAL_SHELL=0 not %{lit} -v %{inputs}/shtest-readfile | FileCheck -match-full-lines -DTEMP_PATH=%S/Inputs/shtest-readfile/Output %s # CHECK: -- Testing: 5 tests{{.*}} diff --git a/llvm/utils/lit/tests/shtest-readfile.py b/llvm/utils/lit/tests/shtest-readfile.py index 218da2257bcff..ca57db82e6617 100644 --- a/llvm/utils/lit/tests/shtest-readfile.py +++ b/llvm/utils/lit/tests/shtest-readfile.py @@ -3,7 +3,7 @@ # TODO(boomanaiden154): This sometimes fails, possibly due to buffers not being flushed. # ALLOW_RETRIES: 2 -# RUN: env LIT_USE_INTERNAL_SHELL=1 not %{lit} -a -v %{inputs}/shtest-readfile | FileCheck -match-full-lines -DTEMP_PATH=%S%{fs-sep}Inputs%{fs-sep}shtest-readfile%{fs-sep}Output %s +# RUN: env LIT_USE_INTERNAL_SHELL=1 not %{lit} -v %{inputs}/shtest-readfile | FileCheck -match-full-lines -DTEMP_PATH=%S%{fs-sep}Inputs%{fs-sep}shtest-readfile%{fs-sep}Output %s # CHECK: -- Testing: 5 tests{{.*}} diff --git a/llvm/utils/lit/tests/shtest-ulimit-nondarwin.py b/llvm/utils/lit/tests/shtest-ulimit-nondarwin.py index d81cde0159792..d5340a7d2efb9 100644 --- a/llvm/utils/lit/tests/shtest-ulimit-nondarwin.py +++ b/llvm/utils/lit/tests/shtest-ulimit-nondarwin.py @@ -4,7 +4,7 @@ # These tests are specific to options that Darwin does not support. # UNSUPPORTED: system-windows, system-cygwin, system-darwin, system-aix, system-solaris -# RUN: not %{lit} -a -v %{inputs}/shtest-ulimit-nondarwin | FileCheck %s +# RUN: not %{lit} -v %{inputs}/shtest-ulimit-nondarwin | FileCheck %s # CHECK: -- Testing: 2 tests{{.*}} diff --git a/llvm/utils/lit/tests/shtest-ulimit.py b/llvm/utils/lit/tests/shtest-ulimit.py index 21e5a5e2491d1..582477bef65fc 100644 --- a/llvm/utils/lit/tests/shtest-ulimit.py +++ b/llvm/utils/lit/tests/shtest-ulimit.py @@ -8,7 +8,7 @@ # RUN: %{python} %S/Inputs/shtest-ulimit/print_limits.py | grep RLIMIT_NOFILE \ # RUN: | sed -n -e 's/.*=//p' | tr -d '\n' > %t.nofile_limit -# RUN: not %{lit} -a -v %{inputs}/shtest-ulimit --order=lexical \ +# RUN: not %{lit} -v %{inputs}/shtest-ulimit --order=lexical \ # RUN: | FileCheck -DBASE_NOFILE_LIMIT=%{readfile:%t.nofile_limit} %s # CHECK: -- Testing: 3 tests{{.*}} diff --git a/llvm/utils/lit/tests/shtest-umask.py b/llvm/utils/lit/tests/shtest-umask.py index e67f0308db661..8af81ec3b4ebd 100644 --- a/llvm/utils/lit/tests/shtest-umask.py +++ b/llvm/utils/lit/tests/shtest-umask.py @@ -1,6 +1,6 @@ # Check the umask command -# RUN: not %{lit} -a -v %{inputs}/shtest-umask | FileCheck -match-full-lines %s +# RUN: not %{lit} -v %{inputs}/shtest-umask | FileCheck -match-full-lines %s # TODO(boomanaiden154): We should be asserting that we get expected behavior # on Windows rather than just listing this as unsupported. # UNSUPPORTED: system-windows diff --git a/llvm/utils/lit/tests/unit/TestRunner.py b/llvm/utils/lit/tests/unit/TestRunner.py index 09470c7b9386e..a3fa62e1ef0e1 100644 --- a/llvm/utils/lit/tests/unit/TestRunner.py +++ b/llvm/utils/lit/tests/unit/TestRunner.py @@ -30,7 +30,7 @@ def load_keyword_parser_lit_tests(): lit_config = lit.LitConfig.LitConfig( progname="lit", path=[], - quiet=False, + diagnostic_level="note", useValgrind=False, valgrindLeakCheck=False, valgrindArgs=[], diff --git a/llvm/utils/lit/tests/verbosity.py b/llvm/utils/lit/tests/verbosity.py new file mode 100644 index 0000000000000..62baf618e2aca --- /dev/null +++ b/llvm/utils/lit/tests/verbosity.py @@ -0,0 +1,1130 @@ +# Test various combinations of options controlling lit stdout and stderr output + +# RUN: mkdir -p %t + +### Test default + +# RUN: not %{lit} %{inputs}/verbosity 2> %t/stderr.txt > %t/stdout.txt +# RUN: FileCheck %s --check-prefix NO-ARGS < %t/stdout.txt +# RUN: FileCheck %s --check-prefix NO-ARGS-ERR --implicit-check-not lit < %t/stderr.txt + +# NO-ARGS: -- Testing: 5 tests, 1 workers -- +# NO-ARGS-NEXT: FAIL: verbosity :: fail.txt (1 of 5) +# NO-ARGS-NEXT: PASS: verbosity :: pass.txt (2 of 5) +# NO-ARGS-NEXT: {{UN}}SUPPORTED: verbosity :: unsupported.txt (3 of 5) +# NO-ARGS-NEXT: {{X}}FAIL: verbosity :: xfail.txt (4 of 5) +# NO-ARGS-NEXT: XPASS: verbosity :: xpass.txt (5 of 5) +# NO-ARGS-NEXT: ******************** +# NO-ARGS-NEXT: Failed Tests (1): +# NO-ARGS-NEXT: verbosity :: fail.txt +# NO-ARGS-EMPTY: +# NO-ARGS-NEXT: ******************** +# NO-ARGS-NEXT: Unexpectedly Passed Tests (1): +# NO-ARGS-NEXT: verbosity :: xpass.txt +# NO-ARGS-EMPTY: +# NO-ARGS-EMPTY: +# NO-ARGS-NEXT: Testing Time: {{.*}}s +# NO-ARGS-EMPTY: +# NO-ARGS-NEXT: Total Discovered Tests: 5 +# NO-ARGS-NEXT: Unsupported : 1 (20.00%) +# NO-ARGS-NEXT: Passed : 1 (20.00%) +# NO-ARGS-NEXT: Expectedly Failed : 1 (20.00%) +# NO-ARGS-NEXT: Failed : 1 (20.00%) +# NO-ARGS-NEXT: Unexpectedly Passed: 1 (20.00%) + +# NO-ARGS-ERR: lit.py: {{.*}}lit.cfg:{{[0-9]+}}: note: this is a note +# NO-ARGS-ERR-NEXT: lit.py: {{.*}}lit.cfg:{{[0-9]+}}: warning: this is a warning +# NO-ARGS-ERR-EMPTY: +# NO-ARGS-ERR-NEXT: 1 warning(s) in tests + + +### Test aliases + +# RUN: not %{lit} --succinct %{inputs}/verbosity 2> %t/stderr.txt > %t/stdout.txt +# RUN: FileCheck %s --check-prefix SUCCINCT < %t/stdout.txt +# RUN: FileCheck %s --check-prefix NO-ARGS-ERR --implicit-check-not lit < %t/stderr.txt + +# SUCCINCT: -- Testing: 5 tests, 1 workers -- +# SUCCINCT-NEXT: Testing: +# SUCCINCT-NEXT: FAIL: verbosity :: fail.txt (1 of 5) +# SUCCINCT-NEXT: Testing: +# SUCCINCT-NEXT: XPASS: verbosity :: xpass.txt (5 of 5) +# SUCCINCT-NEXT: Testing: +# SUCCINCT-NEXT: ******************** +# SUCCINCT-NEXT: Failed Tests (1): +# SUCCINCT-NEXT: verbosity :: fail.txt +# SUCCINCT-EMPTY: +# SUCCINCT-NEXT: ******************** +# SUCCINCT-NEXT: Unexpectedly Passed Tests (1): +# SUCCINCT-NEXT: verbosity :: xpass.txt +# SUCCINCT-EMPTY: +# SUCCINCT-EMPTY: +# SUCCINCT-NEXT: Testing Time: {{.*}}s +# SUCCINCT-EMPTY: +# SUCCINCT-NEXT: Total Discovered Tests: 5 +# SUCCINCT-NEXT: Unsupported : 1 (20.00%) +# SUCCINCT-NEXT: Passed : 1 (20.00%) +# SUCCINCT-NEXT: Expectedly Failed : 1 (20.00%) +# SUCCINCT-NEXT: Failed : 1 (20.00%) +# SUCCINCT-NEXT: Unexpectedly Passed: 1 (20.00%) + +# RUN: not %{lit} --verbose %{inputs}/verbosity 2> %t/stderr.txt > %t/stdout.txt +# RUN: FileCheck %s --check-prefix VERBOSE < %t/stdout.txt +# RUN: FileCheck %s --check-prefix NO-ARGS-ERR --implicit-check-not lit < %t/stderr.txt + +# VERBOSE: -- Testing: 5 tests, 1 workers -- +# VERBOSE-NEXT: FAIL: verbosity :: fail.txt (1 of 5) +# VERBOSE-NEXT: ******************** TEST 'verbosity :: fail.txt' FAILED ******************** +# VERBOSE-NEXT: Exit Code: 127 +# VERBOSE-EMPTY: +# VERBOSE-NEXT: Command Output (stdout): +# VERBOSE-NEXT: -- +# VERBOSE-NEXT: # {{R}}UN: at line 1 +# VERBOSE-NEXT: echo "fail test output" +# VERBOSE-NEXT: # executed command: echo 'fail test output' +# VERBOSE-NEXT: # .---command stdout------------ +# VERBOSE-NEXT: # | fail test output +# VERBOSE-NEXT: # `----------------------------- +# VERBOSE-NEXT: # {{R}}UN: at line 2 +# VERBOSE-NEXT: fail +# VERBOSE-NEXT: # executed command: fail +# VERBOSE-NEXT: # .---command stderr------------ +# VERBOSE-NEXT: # | 'fail': command not found +# VERBOSE-NEXT: # `----------------------------- +# VERBOSE-NEXT: # error: command failed with exit status: 127 +# VERBOSE-EMPTY: +# VERBOSE-NEXT: -- +# VERBOSE-EMPTY: +# VERBOSE-NEXT: ******************** +# VERBOSE-NEXT: PASS: verbosity :: pass.txt (2 of 5) +# VERBOSE-NEXT: {{UN}}SUPPORTED: verbosity :: unsupported.txt (3 of 5) +# VERBOSE-NEXT: {{X}}FAIL: verbosity :: xfail.txt (4 of 5) +# VERBOSE-NEXT: XPASS: verbosity :: xpass.txt (5 of 5) +# VERBOSE-NEXT: ******************** TEST 'verbosity :: xpass.txt' FAILED ******************** +# VERBOSE-NEXT: Exit Code: 0 +# VERBOSE-EMPTY: +# VERBOSE-NEXT: Command Output (stdout): +# VERBOSE-NEXT: -- +# VERBOSE-NEXT: # {{R}}UN: at line 2 +# VERBOSE-NEXT: echo "xpass test output" +# VERBOSE-NEXT: # executed command: echo 'xpass test output' +# VERBOSE-NEXT: # .---command stdout------------ +# VERBOSE-NEXT: # | xpass test output +# VERBOSE-NEXT: # `----------------------------- +# VERBOSE-EMPTY: +# VERBOSE-NEXT: -- +# VERBOSE-EMPTY: +# VERBOSE-NEXT: ******************** +# VERBOSE-NEXT: ******************** +# VERBOSE-NEXT: Failed Tests (1): +# VERBOSE-NEXT: verbosity :: fail.txt +# VERBOSE-EMPTY: +# VERBOSE-NEXT: ******************** +# VERBOSE-NEXT: Unexpectedly Passed Tests (1): +# VERBOSE-NEXT: verbosity :: xpass.txt +# VERBOSE-EMPTY: +# VERBOSE-EMPTY: +# VERBOSE-NEXT: Testing Time: {{.*}}s +# VERBOSE-EMPTY: +# VERBOSE-NEXT: Total Discovered Tests: 5 +# VERBOSE-NEXT: Unsupported : 1 (20.00%) +# VERBOSE-NEXT: Passed : 1 (20.00%) +# VERBOSE-NEXT: Expectedly Failed : 1 (20.00%) +# VERBOSE-NEXT: Failed : 1 (20.00%) +# VERBOSE-NEXT: Unexpectedly Passed: 1 (20.00%) + +# RUN: not %{lit} --show-all %{inputs}/verbosity 2> %t/stderr.txt > %t/stdout.txt +# RUN: FileCheck %s --check-prefix SHOW-ALL < %t/stdout.txt +# RUN: FileCheck %s --check-prefix NO-ARGS-ERR --implicit-check-not lit < %t/stderr.txt + +# SHOW-ALL: -- Testing: 5 tests, 1 workers -- +# SHOW-ALL-NEXT: FAIL: verbosity :: fail.txt (1 of 5) +# SHOW-ALL-NEXT: ******************** TEST 'verbosity :: fail.txt' FAILED ******************** +# SHOW-ALL-NEXT: Exit Code: 127 +# SHOW-ALL-EMPTY: +# SHOW-ALL-NEXT: Command Output (stdout): +# SHOW-ALL-NEXT: -- +# SHOW-ALL-NEXT: # {{R}}UN: at line 1 +# SHOW-ALL-NEXT: echo "fail test output" +# SHOW-ALL-NEXT: # executed command: echo 'fail test output' +# SHOW-ALL-NEXT: # .---command stdout------------ +# SHOW-ALL-NEXT: # | fail test output +# SHOW-ALL-NEXT: # `----------------------------- +# SHOW-ALL-NEXT: # {{R}}UN: at line 2 +# SHOW-ALL-NEXT: fail +# SHOW-ALL-NEXT: # executed command: fail +# SHOW-ALL-NEXT: # .---command stderr------------ +# SHOW-ALL-NEXT: # | 'fail': command not found +# SHOW-ALL-NEXT: # `----------------------------- +# SHOW-ALL-NEXT: # error: command failed with exit status: 127 +# SHOW-ALL-EMPTY: +# SHOW-ALL-NEXT: -- +# SHOW-ALL-EMPTY: +# SHOW-ALL-NEXT: ******************** +# SHOW-ALL-NEXT: PASS: verbosity :: pass.txt (2 of 5) +# SHOW-ALL-NEXT: Exit Code: 0 +# SHOW-ALL-EMPTY: +# SHOW-ALL-NEXT: Command Output (stdout): +# SHOW-ALL-NEXT: -- +# SHOW-ALL-NEXT: # {{R}}UN: at line 1 +# SHOW-ALL-NEXT: echo "pass test output" +# SHOW-ALL-NEXT: # executed command: echo 'pass test output' +# SHOW-ALL-NEXT: # .---command stdout------------ +# SHOW-ALL-NEXT: # | pass test output +# SHOW-ALL-NEXT: # `----------------------------- +# SHOW-ALL-EMPTY: +# SHOW-ALL-NEXT: -- +# SHOW-ALL-EMPTY: +# SHOW-ALL-NEXT: ******************** +# SHOW-ALL-NEXT: {{UN}}SUPPORTED: verbosity :: unsupported.txt (3 of 5) +# SHOW-ALL-NEXT: Test requires the following unavailable features: asdf +# SHOW-ALL-NEXT: ******************** +# SHOW-ALL-NEXT: {{X}}FAIL: verbosity :: xfail.txt (4 of 5) +# SHOW-ALL-NEXT: Exit Code: 1 +# SHOW-ALL-EMPTY: +# SHOW-ALL-NEXT: Command Output (stdout): +# SHOW-ALL-NEXT: -- +# SHOW-ALL-NEXT: # {{R}}UN: at line 2 +# SHOW-ALL-NEXT: not echo "xfail test output" +# SHOW-ALL-NEXT: # executed command: not echo 'xfail test output' +# SHOW-ALL-NEXT: # .---command stdout------------ +# SHOW-ALL-NEXT: # | xfail test output +# SHOW-ALL-NEXT: # `----------------------------- +# SHOW-ALL-NEXT: # error: command failed with exit status: 1 +# SHOW-ALL-EMPTY: +# SHOW-ALL-NEXT: -- +# SHOW-ALL-EMPTY: +# SHOW-ALL-NEXT: ******************** +# SHOW-ALL-NEXT: XPASS: verbosity :: xpass.txt (5 of 5) +# SHOW-ALL-NEXT: ******************** TEST 'verbosity :: xpass.txt' FAILED ******************** +# SHOW-ALL-NEXT: Exit Code: 0 +# SHOW-ALL-EMPTY: +# SHOW-ALL-NEXT: Command Output (stdout): +# SHOW-ALL-NEXT: -- +# SHOW-ALL-NEXT: # {{R}}UN: at line 2 +# SHOW-ALL-NEXT: echo "xpass test output" +# SHOW-ALL-NEXT: # executed command: echo 'xpass test output' +# SHOW-ALL-NEXT: # .---command stdout------------ +# SHOW-ALL-NEXT: # | xpass test output +# SHOW-ALL-NEXT: # `----------------------------- +# SHOW-ALL-EMPTY: +# SHOW-ALL-NEXT: -- +# SHOW-ALL-EMPTY: +# SHOW-ALL-NEXT: ******************** +# SHOW-ALL-NEXT: ******************** +# SHOW-ALL-NEXT: Failed Tests (1): +# SHOW-ALL-NEXT: verbosity :: fail.txt +# SHOW-ALL-EMPTY: +# SHOW-ALL-NEXT: ******************** +# SHOW-ALL-NEXT: Unexpectedly Passed Tests (1): +# SHOW-ALL-NEXT: verbosity :: xpass.txt +# SHOW-ALL-EMPTY: +# SHOW-ALL-EMPTY: +# SHOW-ALL-NEXT: Testing Time: {{.*}}s +# SHOW-ALL-EMPTY: +# SHOW-ALL-NEXT: Total Discovered Tests: 5 +# SHOW-ALL-NEXT: Unsupported : 1 (20.00%) +# SHOW-ALL-NEXT: Passed : 1 (20.00%) +# SHOW-ALL-NEXT: Expectedly Failed : 1 (20.00%) +# SHOW-ALL-NEXT: Failed : 1 (20.00%) +# SHOW-ALL-NEXT: Unexpectedly Passed: 1 (20.00%) + +# RUN: not %{lit} --quiet %{inputs}/verbosity 2> %t/stderr.txt > %t/stdout.txt +# RUN: FileCheck %s --check-prefix QUIET < %t/stdout.txt +# RUN: FileCheck %s --check-prefix QUIET-ERR --implicit-check-not lit < %t/stderr.txt + +# QUIET: -- Testing: 5 tests, 1 workers -- +# QUIET-NEXT: FAIL: verbosity :: fail.txt (1 of 5) +# QUIET-NEXT: XPASS: verbosity :: xpass.txt (5 of 5) +# QUIET-NEXT: ******************** +# QUIET-NEXT: Failed Tests (1): +# QUIET-NEXT: verbosity :: fail.txt +# QUIET-EMPTY: +# QUIET-NEXT: ******************** +# QUIET-NEXT: Unexpectedly Passed Tests (1): +# QUIET-NEXT: verbosity :: xpass.txt +# QUIET-EMPTY: +# QUIET-EMPTY: +# QUIET-NEXT: Total Discovered Tests: 5 +# QUIET-NEXT: Failed : 1 (20.00%) +# QUIET-NEXT: Unexpectedly Passed: 1 (20.00%) + +# QUIET-ERR: 1 warning(s) in tests + + +### Test log output + +# RUN: not %{lit} --debug %{inputs}/verbosity 2> %t/stderr.txt > %t/stdout.txt +# RUN: FileCheck %s --check-prefix DEBUG < %t/stdout.txt +# RUN: FileCheck %s --check-prefix DEBUG-ERR --implicit-check-not lit < %t/stderr.txt + +# DEBUG: -- Testing: 5 tests, 1 workers -- +# DEBUG-NEXT: FAIL: verbosity :: fail.txt (1 of 5) +# DEBUG-NEXT: PASS: verbosity :: pass.txt (2 of 5) +# DEBUG-NEXT: {{UN}}SUPPORTED: verbosity :: unsupported.txt (3 of 5) +# DEBUG-NEXT: {{X}}FAIL: verbosity :: xfail.txt (4 of 5) +# DEBUG-NEXT: XPASS: verbosity :: xpass.txt (5 of 5) +# DEBUG-NEXT: ******************** +# DEBUG-NEXT: Failed Tests (1): +# DEBUG-NEXT: verbosity :: fail.txt +# DEBUG-EMPTY: +# DEBUG-NEXT: ******************** +# DEBUG-NEXT: Unexpectedly Passed Tests (1): +# DEBUG-NEXT: verbosity :: xpass.txt +# DEBUG-EMPTY: +# DEBUG-EMPTY: +# DEBUG-NEXT: Testing Time: {{.*}}s +# DEBUG-EMPTY: +# DEBUG-NEXT: Total Discovered Tests: 5 +# DEBUG-NEXT: Unsupported : 1 (20.00%) +# DEBUG-NEXT: Passed : 1 (20.00%) +# DEBUG-NEXT: Expectedly Failed : 1 (20.00%) +# DEBUG-NEXT: Failed : 1 (20.00%) +# DEBUG-NEXT: Unexpectedly Passed: 1 (20.00%) + +# DEBUG-ERR: lit.py: {{.*}}discovery.py:{{[0-9]+}}: debug: loading suite config '{{.*}}lit.cfg' +# DEBUG-ERR-NEXT: lit.py: {{.*}}lit.cfg:{{[0-9]+}}: debug: this is a debug log +# DEBUG-ERR-NEXT: lit.py: {{.*}}lit.cfg:{{[0-9]+}}: note: this is a note +# DEBUG-ERR-NEXT: lit.py: {{.*}}lit.cfg:{{[0-9]+}}: warning: this is a warning +# DEBUG-ERR-NEXT: lit.py: {{.*}}TestingConfig.py:{{[0-9]+}}: debug: ... loaded config '{{.*}}lit.cfg' +# DEBUG-ERR-NEXT: lit.py: {{.*}}discovery.py:{{[0-9]+}}: debug: resolved input '{{.*}}verbosity' to 'verbosity'::() +# DEBUG-ERR-EMPTY: +# DEBUG-ERR-NEXT: 1 warning(s) in tests + + +# RUN: not %{lit} --diagnostic-level note %{inputs}/verbosity 2> %t/stderr.txt > %t/stdout.txt +# RUN: FileCheck %s --check-prefix NO-ARGS < %t/stdout.txt +# RUN: FileCheck %s --check-prefix NO-ARGS-ERR --implicit-check-not lit < %t/stderr.txt + +# RUN: not %{lit} --diagnostic-level warning %{inputs}/verbosity 2> %t/stderr.txt > %t/stdout.txt +# RUN: FileCheck %s --check-prefix NO-ARGS < %t/stdout.txt +# RUN: FileCheck %s --check-prefix WARNING-ERR --implicit-check-not lit < %t/stderr.txt + +# WARNING-ERR: lit.py: {{.*}}lit.cfg:{{[0-9]+}}: warning: this is a warning +# WARNING-ERR-EMPTY: +# WARNING-ERR-NEXT: 1 warning(s) in tests + +# RUN: not %{lit} --diagnostic-level error %{inputs}/verbosity 2> %t/stderr.txt > %t/stdout.txt +# RUN: FileCheck %s --check-prefix NO-ARGS < %t/stdout.txt +# RUN: FileCheck %s --check-prefix ERROR-ERR --implicit-check-not lit < %t/stderr.txt + +# ERROR-ERR: 1 warning(s) in tests + + +### Test --test-output + +# RUN: not %{lit} --test-output off %{inputs}/verbosity 2> %t/stderr.txt > %t/stdout.txt +# RUN: FileCheck %s --check-prefix NO-ARGS < %t/stdout.txt +# RUN: FileCheck %s --check-prefix NO-ARGS-ERR --implicit-check-not lit < %t/stderr.txt + +# RUN: not %{lit} --test-output failed %{inputs}/verbosity 2> %t/stderr.txt > %t/stdout.txt +# RUN: FileCheck %s --check-prefix VERBOSE < %t/stdout.txt +# RUN: FileCheck %s --check-prefix NO-ARGS-ERR --implicit-check-not lit < %t/stderr.txt + +# TEST-OUTPUT-OFF: -- Testing: 5 tests, 1 workers -- +# TEST-OUTPUT-OFF-NEXT: FAIL: verbosity :: fail.txt (1 of 5) +# TEST-OUTPUT-OFF-NEXT: PASS: verbosity :: pass.txt (2 of 5) +# TEST-OUTPUT-OFF-NEXT: {{UN}}SUPPORTED: verbosity :: unsupported.txt (3 of 5) +# TEST-OUTPUT-OFF-NEXT: {{X}}FAIL: verbosity :: xfail.txt (4 of 5) +# TEST-OUTPUT-OFF-NEXT: XPASS: verbosity :: xpass.txt (5 of 5) +# TEST-OUTPUT-OFF-NEXT: ******************** +# TEST-OUTPUT-OFF-NEXT: Failed Tests (1): +# TEST-OUTPUT-OFF-NEXT: verbosity :: fail.txt +# TEST-OUTPUT-OFF-EMPTY: +# TEST-OUTPUT-OFF-NEXT: ******************** +# TEST-OUTPUT-OFF-NEXT: Unexpectedly Passed Tests (1): +# TEST-OUTPUT-OFF-NEXT: verbosity :: xpass.txt +# TEST-OUTPUT-OFF-EMPTY: +# TEST-OUTPUT-OFF-EMPTY: +# TEST-OUTPUT-OFF-NEXT: Testing Time: {{.*}}s +# TEST-OUTPUT-OFF-EMPTY: +# TEST-OUTPUT-OFF-NEXT: Total Discovered Tests: 5 +# TEST-OUTPUT-OFF-NEXT: Unsupported : 1 (20.00%) +# TEST-OUTPUT-OFF-NEXT: Passed : 1 (20.00%) +# TEST-OUTPUT-OFF-NEXT: Expectedly Failed : 1 (20.00%) +# TEST-OUTPUT-OFF-NEXT: Failed : 1 (20.00%) +# TEST-OUTPUT-OFF-NEXT: Unexpectedly Passed: 1 (20.00%) + +# RUN: not %{lit} --test-output all %{inputs}/verbosity 2> %t/stderr.txt > %t/stdout.txt +# RUN: FileCheck %s --check-prefix SHOW-ALL < %t/stdout.txt +# RUN: FileCheck %s --check-prefix NO-ARGS-ERR --implicit-check-not lit < %t/stderr.txt + + +### Test --print-result-after + +# RUN: not %{lit} --print-result-after off %{inputs}/verbosity 2> %t/stderr.txt > %t/stdout.txt +# RUN: FileCheck %s --check-prefix RESULT-OFF < %t/stdout.txt +# RUN: FileCheck %s --check-prefix NO-ARGS-ERR --implicit-check-not lit < %t/stderr.txt + +# RESULT-OFF: ******************** +# RESULT-OFF-NEXT: Failed Tests (1): +# RESULT-OFF-NEXT: verbosity :: fail.txt +# RESULT-OFF-EMPTY: +# RESULT-OFF-NEXT: ******************** +# RESULT-OFF-NEXT: Unexpectedly Passed Tests (1): +# RESULT-OFF-NEXT: verbosity :: xpass.txt +# RESULT-OFF-EMPTY: +# RESULT-OFF-EMPTY: +# RESULT-OFF-NEXT: Testing Time: {{.*}}s +# RESULT-OFF-EMPTY: +# RESULT-OFF-NEXT: Total Discovered Tests: 5 +# RESULT-OFF-NEXT: Unsupported : 1 (20.00%) +# RESULT-OFF-NEXT: Passed : 1 (20.00%) +# RESULT-OFF-NEXT: Expectedly Failed : 1 (20.00%) +# RESULT-OFF-NEXT: Failed : 1 (20.00%) +# RESULT-OFF-NEXT: Unexpectedly Passed: 1 (20.00%) + + +# RUN: not %{lit} --print-result-after failed %{inputs}/verbosity 2> %t/stderr.txt > %t/stdout.txt +# RUN: FileCheck %s --check-prefix RESULT-FAILED < %t/stdout.txt +# RUN: FileCheck %s --check-prefix NO-ARGS-ERR --implicit-check-not lit < %t/stderr.txt + +# RESULT-FAILED: -- Testing: 5 tests, 1 workers -- +# RESULT-FAILED-NEXT: FAIL: verbosity :: fail.txt (1 of 5) +# RESULT-FAILED-NEXT: XPASS: verbosity :: xpass.txt (5 of 5) +# RESULT-FAILED-NEXT: ******************** +# RESULT-FAILED-NEXT: Failed Tests (1): +# RESULT-FAILED-NEXT: verbosity :: fail.txt +# RESULT-FAILED-EMPTY: +# RESULT-FAILED-NEXT: ******************** +# RESULT-FAILED-NEXT: Unexpectedly Passed Tests (1): +# RESULT-FAILED-NEXT: verbosity :: xpass.txt +# RESULT-FAILED-EMPTY: +# RESULT-FAILED-EMPTY: +# RESULT-FAILED-NEXT: Testing Time: {{.*}}s +# RESULT-FAILED-EMPTY: +# RESULT-FAILED-NEXT: Total Discovered Tests: 5 +# RESULT-FAILED-NEXT: Unsupported : 1 (20.00%) +# RESULT-FAILED-NEXT: Passed : 1 (20.00%) +# RESULT-FAILED-NEXT: Expectedly Failed : 1 (20.00%) +# RESULT-FAILED-NEXT: Failed : 1 (20.00%) +# RESULT-FAILED-NEXT: Unexpectedly Passed: 1 (20.00%) + + +# RUN: not %{lit} --print-result-after all %{inputs}/verbosity 2> %t/stderr.txt > %t/stdout.txt +# RUN: FileCheck %s --check-prefix NO-ARGS < %t/stdout.txt +# RUN: FileCheck %s --check-prefix NO-ARGS-ERR --implicit-check-not lit < %t/stderr.txt + + +### Test combinations of --print-result-after followed by --test-output + +# RUN: not %{lit} --print-result-after off --test-output failed %{inputs}/verbosity 2> %t/stderr.txt > %t/stdout.txt +# RUN: FileCheck %s --check-prefix RESULT-OFF-OUTPUT-FAILED < %t/stdout.txt +# RUN: FileCheck %s --check-prefix NO-ARGS-ERR --implicit-check-not lit < %t/stderr.txt + +# RESULT-OFF-OUTPUT-FAILED: -- Testing: 5 tests, 1 workers -- +# RESULT-OFF-OUTPUT-FAILED-NEXT: FAIL: verbosity :: fail.txt (1 of 5) +# RESULT-OFF-OUTPUT-FAILED-NEXT: ******************** TEST 'verbosity :: fail.txt' FAILED ******************** +# RESULT-OFF-OUTPUT-FAILED-NEXT: Exit Code: 127 +# RESULT-OFF-OUTPUT-FAILED-EMPTY: +# RESULT-OFF-OUTPUT-FAILED-NEXT: Command Output (stdout): +# RESULT-OFF-OUTPUT-FAILED-NEXT: -- +# RESULT-OFF-OUTPUT-FAILED-NEXT: # {{R}}UN: at line 1 +# RESULT-OFF-OUTPUT-FAILED-NEXT: echo "fail test output" +# RESULT-OFF-OUTPUT-FAILED-NEXT: # executed command: echo 'fail test output' +# RESULT-OFF-OUTPUT-FAILED-NEXT: # .---command stdout------------ +# RESULT-OFF-OUTPUT-FAILED-NEXT: # | fail test output +# RESULT-OFF-OUTPUT-FAILED-NEXT: # `----------------------------- +# RESULT-OFF-OUTPUT-FAILED-NEXT: # {{R}}UN: at line 2 +# RESULT-OFF-OUTPUT-FAILED-NEXT: fail +# RESULT-OFF-OUTPUT-FAILED-NEXT: # executed command: fail +# RESULT-OFF-OUTPUT-FAILED-NEXT: # .---command stderr------------ +# RESULT-OFF-OUTPUT-FAILED-NEXT: # | 'fail': command not found +# RESULT-OFF-OUTPUT-FAILED-NEXT: # `----------------------------- +# RESULT-OFF-OUTPUT-FAILED-NEXT: # error: command failed with exit status: 127 +# RESULT-OFF-OUTPUT-FAILED-EMPTY: +# RESULT-OFF-OUTPUT-FAILED-NEXT: -- +# RESULT-OFF-OUTPUT-FAILED-EMPTY: +# RESULT-OFF-OUTPUT-FAILED-NEXT: ******************** +# RESULT-OFF-OUTPUT-FAILED-NEXT: XPASS: verbosity :: xpass.txt (5 of 5) +# RESULT-OFF-OUTPUT-FAILED-NEXT: ******************** TEST 'verbosity :: xpass.txt' FAILED ******************** +# RESULT-OFF-OUTPUT-FAILED-NEXT: Exit Code: 0 +# RESULT-OFF-OUTPUT-FAILED-EMPTY: +# RESULT-OFF-OUTPUT-FAILED-NEXT: Command Output (stdout): +# RESULT-OFF-OUTPUT-FAILED-NEXT: -- +# RESULT-OFF-OUTPUT-FAILED-NEXT: # {{R}}UN: at line 2 +# RESULT-OFF-OUTPUT-FAILED-NEXT: echo "xpass test output" +# RESULT-OFF-OUTPUT-FAILED-NEXT: # executed command: echo 'xpass test output' +# RESULT-OFF-OUTPUT-FAILED-NEXT: # .---command stdout------------ +# RESULT-OFF-OUTPUT-FAILED-NEXT: # | xpass test output +# RESULT-OFF-OUTPUT-FAILED-NEXT: # `----------------------------- +# RESULT-OFF-OUTPUT-FAILED-EMPTY: +# RESULT-OFF-OUTPUT-FAILED-NEXT: -- +# RESULT-OFF-OUTPUT-FAILED-EMPTY: +# RESULT-OFF-OUTPUT-FAILED-NEXT: ******************** +# RESULT-OFF-OUTPUT-FAILED-NEXT: ******************** +# RESULT-OFF-OUTPUT-FAILED-NEXT: Failed Tests (1): +# RESULT-OFF-OUTPUT-FAILED-NEXT: verbosity :: fail.txt +# RESULT-OFF-OUTPUT-FAILED-EMPTY: +# RESULT-OFF-OUTPUT-FAILED-NEXT: ******************** +# RESULT-OFF-OUTPUT-FAILED-NEXT: Unexpectedly Passed Tests (1): +# RESULT-OFF-OUTPUT-FAILED-NEXT: verbosity :: xpass.txt +# RESULT-OFF-OUTPUT-FAILED-EMPTY: +# RESULT-OFF-OUTPUT-FAILED-EMPTY: +# RESULT-OFF-OUTPUT-FAILED-NEXT: Testing Time: {{.*}}s +# RESULT-OFF-OUTPUT-FAILED-EMPTY: +# RESULT-OFF-OUTPUT-FAILED-NEXT: Total Discovered Tests: 5 +# RESULT-OFF-OUTPUT-FAILED-NEXT: Unsupported : 1 (20.00%) +# RESULT-OFF-OUTPUT-FAILED-NEXT: Passed : 1 (20.00%) +# RESULT-OFF-OUTPUT-FAILED-NEXT: Expectedly Failed : 1 (20.00%) +# RESULT-OFF-OUTPUT-FAILED-NEXT: Failed : 1 (20.00%) +# RESULT-OFF-OUTPUT-FAILED-NEXT: Unexpectedly Passed: 1 (20.00%) + +# RUN: not %{lit} --print-result-after all --test-output off %{inputs}/verbosity 2> %t/stderr.txt > %t/stdout.txt +# RUN: FileCheck %s --check-prefix NO-ARGS < %t/stdout.txt +# RUN: FileCheck %s --check-prefix NO-ARGS-ERR --implicit-check-not lit < %t/stderr.txt + +# RUN: not %{lit} --print-result-after failed --test-output all %{inputs}/verbosity 2> %t/stderr.txt > %t/stdout.txt +# RUN: FileCheck %s --check-prefix SHOW-ALL < %t/stdout.txt +# RUN: FileCheck %s --check-prefix NO-ARGS-ERR --implicit-check-not lit < %t/stderr.txt + + +### Test combinations of --test-output followed by --print-result-after + +# RUN: not %{lit} --test-output failed --print-result-after off %{inputs}/verbosity 2> %t/stderr.txt > %t/stdout.txt +# RUN: FileCheck %s --check-prefix RESULT-OFF < %t/stdout.txt +# RUN: FileCheck %s --check-prefix NO-ARGS-ERR --implicit-check-not lit < %t/stderr.txt + +# RUN: not %{lit} --test-output off --print-result-after all %{inputs}/verbosity 2> %t/stderr.txt > %t/stdout.txt +# RUN: FileCheck %s --check-prefix NO-ARGS < %t/stdout.txt +# RUN: FileCheck %s --check-prefix NO-ARGS-ERR --implicit-check-not lit < %t/stderr.txt + +# RUN: not %{lit} --test-output all --print-result-after failed %{inputs}/verbosity 2> %t/stderr.txt > %t/stdout.txt +# RUN: FileCheck %s --check-prefix OUTPUT-ALL-RESULT-FAILED < %t/stdout.txt +# RUN: FileCheck %s --check-prefix NO-ARGS-ERR --implicit-check-not lit < %t/stderr.txt + +# OUTPUT-ALL-RESULT-FAILED: -- Testing: 5 tests, 1 workers -- +# OUTPUT-ALL-RESULT-FAILED-NEXT: FAIL: verbosity :: fail.txt (1 of 5) +# OUTPUT-ALL-RESULT-FAILED-NEXT: ******************** TEST 'verbosity :: fail.txt' FAILED ******************** +# OUTPUT-ALL-RESULT-FAILED-NEXT: Exit Code: 127 +# OUTPUT-ALL-RESULT-FAILED-EMPTY: +# OUTPUT-ALL-RESULT-FAILED-NEXT: Command Output (stdout): +# OUTPUT-ALL-RESULT-FAILED-NEXT: -- +# OUTPUT-ALL-RESULT-FAILED-NEXT: # {{R}}UN: at line 1 +# OUTPUT-ALL-RESULT-FAILED-NEXT: echo "fail test output" +# OUTPUT-ALL-RESULT-FAILED-NEXT: # executed command: echo 'fail test output' +# OUTPUT-ALL-RESULT-FAILED-NEXT: # .---command stdout------------ +# OUTPUT-ALL-RESULT-FAILED-NEXT: # | fail test output +# OUTPUT-ALL-RESULT-FAILED-NEXT: # `----------------------------- +# OUTPUT-ALL-RESULT-FAILED-NEXT: # {{R}}UN: at line 2 +# OUTPUT-ALL-RESULT-FAILED-NEXT: fail +# OUTPUT-ALL-RESULT-FAILED-NEXT: # executed command: fail +# OUTPUT-ALL-RESULT-FAILED-NEXT: # .---command stderr------------ +# OUTPUT-ALL-RESULT-FAILED-NEXT: # | 'fail': command not found +# OUTPUT-ALL-RESULT-FAILED-NEXT: # `----------------------------- +# OUTPUT-ALL-RESULT-FAILED-NEXT: # error: command failed with exit status: 127 +# OUTPUT-ALL-RESULT-FAILED-EMPTY: +# OUTPUT-ALL-RESULT-FAILED-NEXT: -- +# OUTPUT-ALL-RESULT-FAILED-EMPTY: +# OUTPUT-ALL-RESULT-FAILED-NEXT: ******************** +# OUTPUT-ALL-RESULT-FAILED-NEXT: XPASS: verbosity :: xpass.txt (5 of 5) +# OUTPUT-ALL-RESULT-FAILED-NEXT: ******************** TEST 'verbosity :: xpass.txt' FAILED ******************** +# OUTPUT-ALL-RESULT-FAILED-NEXT: Exit Code: 0 +# OUTPUT-ALL-RESULT-FAILED-EMPTY: +# OUTPUT-ALL-RESULT-FAILED-NEXT: Command Output (stdout): +# OUTPUT-ALL-RESULT-FAILED-NEXT: -- +# OUTPUT-ALL-RESULT-FAILED-NEXT: # {{R}}UN: at line 2 +# OUTPUT-ALL-RESULT-FAILED-NEXT: echo "xpass test output" +# OUTPUT-ALL-RESULT-FAILED-NEXT: # executed command: echo 'xpass test output' +# OUTPUT-ALL-RESULT-FAILED-NEXT: # .---command stdout------------ +# OUTPUT-ALL-RESULT-FAILED-NEXT: # | xpass test output +# OUTPUT-ALL-RESULT-FAILED-NEXT: # `----------------------------- +# OUTPUT-ALL-RESULT-FAILED-EMPTY: +# OUTPUT-ALL-RESULT-FAILED-NEXT: -- +# OUTPUT-ALL-RESULT-FAILED-EMPTY: +# OUTPUT-ALL-RESULT-FAILED-NEXT: ******************** +# OUTPUT-ALL-RESULT-FAILED-NEXT: ******************** +# OUTPUT-ALL-RESULT-FAILED-NEXT: Failed Tests (1): +# OUTPUT-ALL-RESULT-FAILED-NEXT: verbosity :: fail.txt +# OUTPUT-ALL-RESULT-FAILED-EMPTY: +# OUTPUT-ALL-RESULT-FAILED-NEXT: ******************** +# OUTPUT-ALL-RESULT-FAILED-NEXT: Unexpectedly Passed Tests (1): +# OUTPUT-ALL-RESULT-FAILED-NEXT: verbosity :: xpass.txt +# OUTPUT-ALL-RESULT-FAILED-EMPTY: +# OUTPUT-ALL-RESULT-FAILED-EMPTY: +# OUTPUT-ALL-RESULT-FAILED-NEXT: Testing Time: {{.*}} +# OUTPUT-ALL-RESULT-FAILED-EMPTY: +# OUTPUT-ALL-RESULT-FAILED-NEXT: Total Discovered Tests: 5 +# OUTPUT-ALL-RESULT-FAILED-NEXT: Unsupported : 1 (20.00%) +# OUTPUT-ALL-RESULT-FAILED-NEXT: Passed : 1 (20.00%) +# OUTPUT-ALL-RESULT-FAILED-NEXT: Expectedly Failed : 1 (20.00%) +# OUTPUT-ALL-RESULT-FAILED-NEXT: Failed : 1 (20.00%) +# OUTPUT-ALL-RESULT-FAILED-NEXT: Unexpectedly Passed: 1 (20.00%) + + +### Test progress bar and terse summary in isolation + +# RUN: not %{lit} --progress-bar %{inputs}/verbosity 2> %t/stderr.txt > %t/stdout.txt +# RUN: FileCheck %s --check-prefix PROGRESS < %t/stdout.txt +# RUN: FileCheck %s --check-prefix NO-ARGS-ERR --implicit-check-not lit < %t/stderr.txt + +# PROGRESS: -- Testing: 5 tests, 1 workers -- +# PROGRESS-NEXT: Testing: +# PROGRESS-NEXT: FAIL: verbosity :: fail.txt (1 of 5) +# PROGRESS-NEXT: Testing: +# PROGRESS-NEXT: PASS: verbosity :: pass.txt (2 of 5) +# PROGRESS-NEXT: Testing: +# PROGRESS-NEXT: {{UN}}SUPPORTED: verbosity :: unsupported.txt (3 of 5) +# PROGRESS-NEXT: Testing: +# PROGRESS-NEXT: {{X}}FAIL: verbosity :: xfail.txt (4 of 5) +# PROGRESS-NEXT: Testing: +# PROGRESS-NEXT: XPASS: verbosity :: xpass.txt (5 of 5) +# PROGRESS-NEXT: Testing: +# PROGRESS-NEXT: ******************** +# PROGRESS-NEXT: Failed Tests (1): +# PROGRESS-NEXT: verbosity :: fail.txt +# PROGRESS-EMPTY: +# PROGRESS-NEXT: ******************** +# PROGRESS-NEXT: Unexpectedly Passed Tests (1): +# PROGRESS-NEXT: verbosity :: xpass.txt +# PROGRESS-EMPTY: +# PROGRESS-EMPTY: +# PROGRESS-NEXT: Testing Time: {{.*}}s +# PROGRESS-EMPTY: +# PROGRESS-NEXT: Total Discovered Tests: 5 +# PROGRESS-NEXT: Unsupported : 1 (20.00%) +# PROGRESS-NEXT: Passed : 1 (20.00%) +# PROGRESS-NEXT: Expectedly Failed : 1 (20.00%) +# PROGRESS-NEXT: Failed : 1 (20.00%) +# PROGRESS-NEXT: Unexpectedly Passed: 1 (20.00%) + +# RUN: not %{lit} --terse-summary %{inputs}/verbosity 2> %t/stderr.txt > %t/stdout.txt +# RUN: FileCheck %s --check-prefix TERSE < %t/stdout.txt +# RUN: FileCheck %s --check-prefix NO-ARGS-ERR --implicit-check-not lit < %t/stderr.txt + +# TERSE: -- Testing: 5 tests, 1 workers -- +# TERSE-NEXT: FAIL: verbosity :: fail.txt (1 of 5) +# TERSE-NEXT: PASS: verbosity :: pass.txt (2 of 5) +# TERSE-NEXT: {{UN}}SUPPORTED: verbosity :: unsupported.txt (3 of 5) +# TERSE-NEXT: {{X}}FAIL: verbosity :: xfail.txt (4 of 5) +# TERSE-NEXT: XPASS: verbosity :: xpass.txt (5 of 5) +# TERSE-NEXT: ******************** +# TERSE-NEXT: Failed Tests (1): +# TERSE-NEXT: verbosity :: fail.txt +# TERSE-EMPTY: +# TERSE-NEXT: ******************** +# TERSE-NEXT: Unexpectedly Passed Tests (1): +# TERSE-NEXT: verbosity :: xpass.txt +# TERSE-EMPTY: +# TERSE-EMPTY: +# TERSE-NEXT: Total Discovered Tests: 5 +# TERSE-NEXT: Failed : 1 (20.00%) +# TERSE-NEXT: Unexpectedly Passed: 1 (20.00%) + + +### Aliases in combination + +# RUN: not %{lit} -a -s %{inputs}/verbosity 2> %t/stderr.txt > %t/stdout.txt +# RUN: FileCheck %s --check-prefix AS < %t/stdout.txt +# RUN: FileCheck %s --check-prefix NO-ARGS-ERR --implicit-check-not lit < %t/stderr.txt + +# AS: -- Testing: 5 tests, 1 workers -- +# AS-NEXT: Testing: +# AS-NEXT: FAIL: verbosity :: fail.txt (1 of 5) +# AS-NEXT: ******************** TEST 'verbosity :: fail.txt' FAILED ******************** +# AS-NEXT: Exit Code: 127 +# AS-EMPTY: +# AS-NEXT: Command Output (stdout): +# AS-NEXT: -- +# AS-NEXT: # {{R}}UN: at line 1 +# AS-NEXT: echo "fail test output" +# AS-NEXT: # executed command: echo 'fail test output' +# AS-NEXT: # .---command stdout------------ +# AS-NEXT: # | fail test output +# AS-NEXT: # `----------------------------- +# AS-NEXT: # {{R}}UN: at line 2 +# AS-NEXT: fail +# AS-NEXT: # executed command: fail +# AS-NEXT: # .---command stderr------------ +# AS-NEXT: # | 'fail': command not found +# AS-NEXT: # `----------------------------- +# AS-NEXT: # error: command failed with exit status: 127 +# AS-EMPTY: +# AS-NEXT: -- +# AS-EMPTY: +# AS-NEXT: ******************** +# AS-NEXT: Testing: +# AS-NEXT: XPASS: verbosity :: xpass.txt (5 of 5) +# AS-NEXT: ******************** TEST 'verbosity :: xpass.txt' FAILED ******************** +# AS-NEXT: Exit Code: 0 +# AS-EMPTY: +# AS-NEXT: Command Output (stdout): +# AS-NEXT: -- +# AS-NEXT: # {{R}}UN: at line 2 +# AS-NEXT: echo "xpass test output" +# AS-NEXT: # executed command: echo 'xpass test output' +# AS-NEXT: # .---command stdout------------ +# AS-NEXT: # | xpass test output +# AS-NEXT: # `----------------------------- +# AS-EMPTY: +# AS-NEXT: -- +# AS-EMPTY: +# AS-NEXT: ******************** +# AS-NEXT: Testing: +# AS-NEXT: ******************** +# AS-NEXT: Failed Tests (1): +# AS-NEXT: verbosity :: fail.txt +# AS-EMPTY: +# AS-NEXT: ******************** +# AS-NEXT: Unexpectedly Passed Tests (1): +# AS-NEXT: verbosity :: xpass.txt +# AS-EMPTY: +# AS-EMPTY: +# AS-NEXT: Testing Time: {{.*}}s +# AS-EMPTY: +# AS-NEXT: Total Discovered Tests: 5 +# AS-NEXT: Unsupported : 1 (20.00%) +# AS-NEXT: Passed : 1 (20.00%) +# AS-NEXT: Expectedly Failed : 1 (20.00%) +# AS-NEXT: Failed : 1 (20.00%) +# AS-NEXT: Unexpectedly Passed: 1 (20.00%) + + +# RUN: not %{lit} -s -a %{inputs}/verbosity 2> %t/stderr.txt > %t/stdout.txt +# RUN: FileCheck %s --check-prefix SA < %t/stdout.txt +# RUN: FileCheck %s --check-prefix NO-ARGS-ERR --implicit-check-not lit < %t/stderr.txt + +# SA: -- Testing: 5 tests, 1 workers -- +# SA-NEXT: Testing: +# SA-NEXT: FAIL: verbosity :: fail.txt (1 of 5) +# SA-NEXT: ******************** TEST 'verbosity :: fail.txt' FAILED ******************** +# SA-NEXT: Exit Code: 127 +# SA-EMPTY: +# SA-NEXT: Command Output (stdout): +# SA-NEXT: -- +# SA-NEXT: # {{R}}UN: at line 1 +# SA-NEXT: echo "fail test output" +# SA-NEXT: # executed command: echo 'fail test output' +# SA-NEXT: # .---command stdout------------ +# SA-NEXT: # | fail test output +# SA-NEXT: # `----------------------------- +# SA-NEXT: # {{R}}UN: at line 2 +# SA-NEXT: fail +# SA-NEXT: # executed command: fail +# SA-NEXT: # .---command stderr------------ +# SA-NEXT: # | 'fail': command not found +# SA-NEXT: # `----------------------------- +# SA-NEXT: # error: command failed with exit status: 127 +# SA-EMPTY: +# SA-NEXT: -- +# SA-EMPTY: +# SA-NEXT: ******************** +# SA-NEXT: Testing: +# SA-NEXT: PASS: verbosity :: pass.txt (2 of 5) +# SA-NEXT: Exit Code: 0 +# SA-EMPTY: +# SA-NEXT: Command Output (stdout): +# SA-NEXT: -- +# SA-NEXT: # {{R}}UN: at line 1 +# SA-NEXT: echo "pass test output" +# SA-NEXT: # executed command: echo 'pass test output' +# SA-NEXT: # .---command stdout------------ +# SA-NEXT: # | pass test output +# SA-NEXT: # `----------------------------- +# SA-EMPTY: +# SA-NEXT: -- +# SA-EMPTY: +# SA-NEXT: ******************** +# SA-NEXT: Testing: +# SA-NEXT: {{UN}}SUPPORTED: verbosity :: unsupported.txt (3 of 5) +# SA-NEXT: Test requires the following unavailable features: asdf +# SA-NEXT: ******************** +# SA-NEXT: Testing: +# SA-NEXT: {{X}}FAIL: verbosity :: xfail.txt (4 of 5) +# SA-NEXT: Exit Code: 1 +# SA-EMPTY: +# SA-NEXT: Command Output (stdout): +# SA-NEXT: -- +# SA-NEXT: # {{R}}UN: at line 2 +# SA-NEXT: not echo "xfail test output" +# SA-NEXT: # executed command: not echo 'xfail test output' +# SA-NEXT: # .---command stdout------------ +# SA-NEXT: # | xfail test output +# SA-NEXT: # `----------------------------- +# SA-NEXT: # error: command failed with exit status: 1 +# SA-EMPTY: +# SA-NEXT: -- +# SA-EMPTY: +# SA-NEXT: ******************** +# SA-NEXT: Testing: +# SA-NEXT: XPASS: verbosity :: xpass.txt (5 of 5) +# SA-NEXT: ******************** TEST 'verbosity :: xpass.txt' FAILED ******************** +# SA-NEXT: Exit Code: 0 +# SA-EMPTY: +# SA-NEXT: Command Output (stdout): +# SA-NEXT: -- +# SA-NEXT: # {{R}}UN: at line 2 +# SA-NEXT: echo "xpass test output" +# SA-NEXT: # executed command: echo 'xpass test output' +# SA-NEXT: # .---command stdout------------ +# SA-NEXT: # | xpass test output +# SA-NEXT: # `----------------------------- +# SA-EMPTY: +# SA-NEXT: -- +# SA-EMPTY: +# SA-NEXT: ******************** +# SA-NEXT: Testing: +# SA-NEXT: ******************** +# SA-NEXT: Failed Tests (1): +# SA-NEXT: verbosity :: fail.txt +# SA-EMPTY: +# SA-NEXT: ******************** +# SA-NEXT: Unexpectedly Passed Tests (1): +# SA-NEXT: verbosity :: xpass.txt +# SA-EMPTY: +# SA-EMPTY: +# SA-NEXT: Testing Time: {{.*}}s +# SA-EMPTY: +# SA-NEXT: Total Discovered Tests: 5 +# SA-NEXT: Unsupported : 1 (20.00%) +# SA-NEXT: Passed : 1 (20.00%) +# SA-NEXT: Expectedly Failed : 1 (20.00%) +# SA-NEXT: Failed : 1 (20.00%) +# SA-NEXT: Unexpectedly Passed: 1 (20.00%) + + +# RUN: not %{lit} -q -a %{inputs}/verbosity 2> %t/stderr.txt > %t/stdout.txt +# RUN: FileCheck %s --check-prefix QA < %t/stdout.txt +# RUN: FileCheck %s --check-prefix QUIET-ERR --implicit-check-not lit < %t/stderr.txt + +# QA: -- Testing: 5 tests, 1 workers -- +# QA-NEXT: FAIL: verbosity :: fail.txt (1 of 5) +# QA-NEXT: ******************** TEST 'verbosity :: fail.txt' FAILED ******************** +# QA-NEXT: Exit Code: 127 +# QA-EMPTY: +# QA-NEXT: Command Output (stdout): +# QA-NEXT: -- +# QA-NEXT: # {{R}}UN: at line 1 +# QA-NEXT: echo "fail test output" +# QA-NEXT: # executed command: echo 'fail test output' +# QA-NEXT: # .---command stdout------------ +# QA-NEXT: # | fail test output +# QA-NEXT: # `----------------------------- +# QA-NEXT: # {{R}}UN: at line 2 +# QA-NEXT: fail +# QA-NEXT: # executed command: fail +# QA-NEXT: # .---command stderr------------ +# QA-NEXT: # | 'fail': command not found +# QA-NEXT: # `----------------------------- +# QA-NEXT: # error: command failed with exit status: 127 +# QA-EMPTY: +# QA-NEXT: -- +# QA-EMPTY: +# QA-NEXT: ******************** +# QA-NEXT: PASS: verbosity :: pass.txt (2 of 5) +# QA-NEXT: Exit Code: 0 +# QA-EMPTY: +# QA-NEXT: Command Output (stdout): +# QA-NEXT: -- +# QA-NEXT: # {{R}}UN: at line 1 +# QA-NEXT: echo "pass test output" +# QA-NEXT: # executed command: echo 'pass test output' +# QA-NEXT: # .---command stdout------------ +# QA-NEXT: # | pass test output +# QA-NEXT: # `----------------------------- +# QA-EMPTY: +# QA-NEXT: -- +# QA-EMPTY: +# QA-NEXT: ******************** +# QA-NEXT: {{UN}}SUPPORTED: verbosity :: unsupported.txt (3 of 5) +# QA-NEXT: Test requires the following unavailable features: asdf +# QA-NEXT: ******************** +# QA-NEXT: {{X}}FAIL: verbosity :: xfail.txt (4 of 5) +# QA-NEXT: Exit Code: 1 +# QA-EMPTY: +# QA-NEXT: Command Output (stdout): +# QA-NEXT: -- +# QA-NEXT: # {{R}}UN: at line 2 +# QA-NEXT: not echo "xfail test output" +# QA-NEXT: # executed command: not echo 'xfail test output' +# QA-NEXT: # .---command stdout------------ +# QA-NEXT: # | xfail test output +# QA-NEXT: # `----------------------------- +# QA-NEXT: # error: command failed with exit status: 1 +# QA-EMPTY: +# QA-NEXT: -- +# QA-EMPTY: +# QA-NEXT: ******************** +# QA-NEXT: XPASS: verbosity :: xpass.txt (5 of 5) +# QA-NEXT: ******************** TEST 'verbosity :: xpass.txt' FAILED ******************** +# QA-NEXT: Exit Code: 0 +# QA-EMPTY: +# QA-NEXT: Command Output (stdout): +# QA-NEXT: -- +# QA-NEXT: # {{R}}UN: at line 2 +# QA-NEXT: echo "xpass test output" +# QA-NEXT: # executed command: echo 'xpass test output' +# QA-NEXT: # .---command stdout------------ +# QA-NEXT: # | xpass test output +# QA-NEXT: # `----------------------------- +# QA-EMPTY: +# QA-NEXT: -- +# QA-EMPTY: +# QA-NEXT: ******************** +# QA-NEXT: ******************** +# QA-NEXT: Failed Tests (1): +# QA-NEXT: verbosity :: fail.txt +# QA-EMPTY: +# QA-NEXT: ******************** +# QA-NEXT: Unexpectedly Passed Tests (1): +# QA-NEXT: verbosity :: xpass.txt +# QA-EMPTY: +# QA-EMPTY: +# QA-NEXT: Total Discovered Tests: 5 +# QA-NEXT: Failed : 1 (20.00%) +# QA-NEXT: Unexpectedly Passed: 1 (20.00%) + +# RUN: not %{lit} -a -q %{inputs}/verbosity 2> %t/stderr.txt > %t/stdout.txt +# RUN: FileCheck %s --check-prefix QUIET < %t/stdout.txt +# RUN: FileCheck %s --check-prefix QUIET-ERR --implicit-check-not lit < %t/stderr.txt + +# RUN: not %{lit} -sqav %{inputs}/verbosity 2> %t/stderr.txt > %t/stdout.txt +# RUN: FileCheck %s --check-prefix SQAV < %t/stdout.txt +# RUN: FileCheck %s --check-prefix QUIET-ERR --implicit-check-not lit < %t/stderr.txt + +# SQAV: -- Testing: 5 tests, 1 workers -- +# SQAV-NEXT: Testing: +# SQAV-NEXT: FAIL: verbosity :: fail.txt (1 of 5) +# SQAV-NEXT: ******************** TEST 'verbosity :: fail.txt' FAILED ******************** +# SQAV-NEXT: Exit Code: 127 +# SQAV-EMPTY: +# SQAV-NEXT: Command Output (stdout): +# SQAV-NEXT: -- +# SQAV-NEXT: # {{R}}UN: at line 1 +# SQAV-NEXT: echo "fail test output" +# SQAV-NEXT: # executed command: echo 'fail test output' +# SQAV-NEXT: # .---command stdout------------ +# SQAV-NEXT: # | fail test output +# SQAV-NEXT: # `----------------------------- +# SQAV-NEXT: # {{R}}UN: at line 2 +# SQAV-NEXT: fail +# SQAV-NEXT: # executed command: fail +# SQAV-NEXT: # .---command stderr------------ +# SQAV-NEXT: # | 'fail': command not found +# SQAV-NEXT: # `----------------------------- +# SQAV-NEXT: # error: command failed with exit status: 127 +# SQAV-EMPTY: +# SQAV-NEXT: -- +# SQAV-EMPTY: +# SQAV-NEXT: ******************** +# SQAV-NEXT: Testing: +# SQAV-NEXT: PASS: verbosity :: pass.txt (2 of 5) +# SQAV-NEXT: Testing: +# SQAV-NEXT: {{UN}}SUPPORTED: verbosity :: unsupported.txt (3 of 5) +# SQAV-NEXT: Testing: +# SQAV-NEXT: {{X}}FAIL: verbosity :: xfail.txt (4 of 5) +# SQAV-NEXT: Testing: +# SQAV-NEXT: XPASS: verbosity :: xpass.txt (5 of 5) +# SQAV-NEXT: ******************** TEST 'verbosity :: xpass.txt' FAILED ******************** +# SQAV-NEXT: Exit Code: 0 +# SQAV-EMPTY: +# SQAV-NEXT: Command Output (stdout): +# SQAV-NEXT: -- +# SQAV-NEXT: # {{R}}UN: at line 2 +# SQAV-NEXT: echo "xpass test output" +# SQAV-NEXT: # executed command: echo 'xpass test output' +# SQAV-NEXT: # .---command stdout------------ +# SQAV-NEXT: # | xpass test output +# SQAV-NEXT: # `----------------------------- +# SQAV-EMPTY: +# SQAV-NEXT: -- +# SQAV-EMPTY: +# SQAV-NEXT: ******************** +# SQAV-NEXT: Testing: +# SQAV-NEXT: ******************** +# SQAV-NEXT: Failed Tests (1): +# SQAV-NEXT: verbosity :: fail.txt +# SQAV-EMPTY: +# SQAV-NEXT: ******************** +# SQAV-NEXT: Unexpectedly Passed Tests (1): +# SQAV-NEXT: verbosity :: xpass.txt +# SQAV-EMPTY: +# SQAV-EMPTY: +# SQAV-NEXT: Total Discovered Tests: 5 +# SQAV-NEXT: Failed : 1 (20.00%) +# SQAV-NEXT: Unexpectedly Passed: 1 (20.00%) + + +### Aliases with specific overrides + +# RUN: not %{lit} --quiet --no-terse-summary %{inputs}/verbosity 2> %t/stderr.txt > %t/stdout.txt +# RUN: FileCheck %s --check-prefix QUIET-W-SUMMARY < %t/stdout.txt +# RUN: FileCheck %s --check-prefix QUIET-ERR --implicit-check-not lit < %t/stderr.txt + +# QUIET-W-SUMMARY: -- Testing: 5 tests, 1 workers -- +# QUIET-W-SUMMARY-NEXT: FAIL: verbosity :: fail.txt (1 of 5) +# QUIET-W-SUMMARY-NEXT: XPASS: verbosity :: xpass.txt (5 of 5) +# QUIET-W-SUMMARY-NEXT: ******************** +# QUIET-W-SUMMARY-NEXT: Failed Tests (1): +# QUIET-W-SUMMARY-NEXT: verbosity :: fail.txt +# QUIET-W-SUMMARY-EMPTY: +# QUIET-W-SUMMARY-NEXT: ******************** +# QUIET-W-SUMMARY-NEXT: Unexpectedly Passed Tests (1): +# QUIET-W-SUMMARY-NEXT: verbosity :: xpass.txt +# QUIET-W-SUMMARY-EMPTY: +# QUIET-W-SUMMARY-EMPTY: +# QUIET-W-SUMMARY-NEXT: Testing Time: {{.*}}s +# QUIET-W-SUMMARY-EMPTY: +# QUIET-W-SUMMARY-NEXT: Total Discovered Tests: 5 +# QUIET-W-SUMMARY-NEXT: Unsupported : 1 (20.00%) +# QUIET-W-SUMMARY-NEXT: Passed : 1 (20.00%) +# QUIET-W-SUMMARY-NEXT: Expectedly Failed : 1 (20.00%) +# QUIET-W-SUMMARY-NEXT: Failed : 1 (20.00%) +# QUIET-W-SUMMARY-NEXT: Unexpectedly Passed: 1 (20.00%) + + +# RUN: not %{lit} --quiet --progress-bar %{inputs}/verbosity 2> %t/stderr.txt > %t/stdout.txt +# RUN: FileCheck %s --check-prefix QUIET-W-PROGRESS < %t/stdout.txt +# RUN: FileCheck %s --check-prefix QUIET-ERR --implicit-check-not lit < %t/stderr.txt + +# QUIET-W-PROGRESS: -- Testing: 5 tests, 1 workers -- +# QUIET-W-PROGRESS-NEXT: Testing: +# QUIET-W-PROGRESS-NEXT: FAIL: verbosity :: fail.txt (1 of 5) +# QUIET-W-PROGRESS-NEXT: Testing: +# QUIET-W-PROGRESS-NEXT: XPASS: verbosity :: xpass.txt (5 of 5) +# QUIET-W-PROGRESS-NEXT: Testing: +# QUIET-W-PROGRESS-NEXT: ******************** +# QUIET-W-PROGRESS-NEXT: Failed Tests (1): +# QUIET-W-PROGRESS-NEXT: verbosity :: fail.txt +# QUIET-W-PROGRESS-EMPTY: +# QUIET-W-PROGRESS-NEXT: ******************** +# QUIET-W-PROGRESS-NEXT: Unexpectedly Passed Tests (1): +# QUIET-W-PROGRESS-NEXT: verbosity :: xpass.txt +# QUIET-W-PROGRESS-EMPTY: +# QUIET-W-PROGRESS-EMPTY: +# QUIET-W-PROGRESS-NEXT: Total Discovered Tests: 5 +# QUIET-W-PROGRESS-NEXT: Failed : 1 (20.00%) +# QUIET-W-PROGRESS-NEXT: Unexpectedly Passed: 1 (20.00%) + +# RUN: not %{lit} --show-all --terse-summary %{inputs}/verbosity 2> %t/stderr.txt > %t/stdout.txt +# RUN: FileCheck %s --check-prefix ALL-TERSE < %t/stdout.txt +# RUN: FileCheck %s --check-prefix NO-ARGS-ERR --implicit-check-not lit < %t/stderr.txt + +# ALL-TERSE: -- Testing: 5 tests, 1 workers -- +# ALL-TERSE-NEXT: FAIL: verbosity :: fail.txt (1 of 5) +# ALL-TERSE-NEXT: ******************** TEST 'verbosity :: fail.txt' FAILED ******************** +# ALL-TERSE-NEXT: Exit Code: 127 +# ALL-TERSE-EMPTY: +# ALL-TERSE-NEXT: Command Output (stdout): +# ALL-TERSE-NEXT: -- +# ALL-TERSE-NEXT: # {{R}}UN: at line 1 +# ALL-TERSE-NEXT: echo "fail test output" +# ALL-TERSE-NEXT: # executed command: echo 'fail test output' +# ALL-TERSE-NEXT: # .---command stdout------------ +# ALL-TERSE-NEXT: # | fail test output +# ALL-TERSE-NEXT: # `----------------------------- +# ALL-TERSE-NEXT: # {{R}}UN: at line 2 +# ALL-TERSE-NEXT: fail +# ALL-TERSE-NEXT: # executed command: fail +# ALL-TERSE-NEXT: # .---command stderr------------ +# ALL-TERSE-NEXT: # | 'fail': command not found +# ALL-TERSE-NEXT: # `----------------------------- +# ALL-TERSE-NEXT: # error: command failed with exit status: 127 +# ALL-TERSE-EMPTY: +# ALL-TERSE-NEXT: -- +# ALL-TERSE-EMPTY: +# ALL-TERSE-NEXT: ******************** +# ALL-TERSE-NEXT: PASS: verbosity :: pass.txt (2 of 5) +# ALL-TERSE-NEXT: Exit Code: 0 +# ALL-TERSE-EMPTY: +# ALL-TERSE-NEXT: Command Output (stdout): +# ALL-TERSE-NEXT: -- +# ALL-TERSE-NEXT: # {{R}}UN: at line 1 +# ALL-TERSE-NEXT: echo "pass test output" +# ALL-TERSE-NEXT: # executed command: echo 'pass test output' +# ALL-TERSE-NEXT: # .---command stdout------------ +# ALL-TERSE-NEXT: # | pass test output +# ALL-TERSE-NEXT: # `----------------------------- +# ALL-TERSE-EMPTY: +# ALL-TERSE-NEXT: -- +# ALL-TERSE-EMPTY: +# ALL-TERSE-NEXT: ******************** +# ALL-TERSE-NEXT: {{UN}}SUPPORTED: verbosity :: unsupported.txt (3 of 5) +# ALL-TERSE-NEXT: Test requires the following unavailable features: asdf +# ALL-TERSE-NEXT: ******************** +# ALL-TERSE-NEXT: {{X}}FAIL: verbosity :: xfail.txt (4 of 5) +# ALL-TERSE-NEXT: Exit Code: 1 +# ALL-TERSE-EMPTY: +# ALL-TERSE-NEXT: Command Output (stdout): +# ALL-TERSE-NEXT: -- +# ALL-TERSE-NEXT: # {{R}}UN: at line 2 +# ALL-TERSE-NEXT: not echo "xfail test output" +# ALL-TERSE-NEXT: # executed command: not echo 'xfail test output' +# ALL-TERSE-NEXT: # .---command stdout------------ +# ALL-TERSE-NEXT: # | xfail test output +# ALL-TERSE-NEXT: # `----------------------------- +# ALL-TERSE-NEXT: # error: command failed with exit status: 1 +# ALL-TERSE-EMPTY: +# ALL-TERSE-NEXT: -- +# ALL-TERSE-EMPTY: +# ALL-TERSE-NEXT: ******************** +# ALL-TERSE-NEXT: XPASS: verbosity :: xpass.txt (5 of 5) +# ALL-TERSE-NEXT: ******************** TEST 'verbosity :: xpass.txt' FAILED ******************** +# ALL-TERSE-NEXT: Exit Code: 0 +# ALL-TERSE-EMPTY: +# ALL-TERSE-NEXT: Command Output (stdout): +# ALL-TERSE-NEXT: -- +# ALL-TERSE-NEXT: # {{R}}UN: at line 2 +# ALL-TERSE-NEXT: echo "xpass test output" +# ALL-TERSE-NEXT: # executed command: echo 'xpass test output' +# ALL-TERSE-NEXT: # .---command stdout------------ +# ALL-TERSE-NEXT: # | xpass test output +# ALL-TERSE-NEXT: # `----------------------------- +# ALL-TERSE-EMPTY: +# ALL-TERSE-NEXT: -- +# ALL-TERSE-EMPTY: +# ALL-TERSE-NEXT: ******************** +# ALL-TERSE-NEXT: ******************** +# ALL-TERSE-NEXT: Failed Tests (1): +# ALL-TERSE-NEXT: verbosity :: fail.txt +# ALL-TERSE-EMPTY: +# ALL-TERSE-NEXT: ******************** +# ALL-TERSE-NEXT: Unexpectedly Passed Tests (1): +# ALL-TERSE-NEXT: verbosity :: xpass.txt +# ALL-TERSE-EMPTY: +# ALL-TERSE-EMPTY: +# ALL-TERSE-NEXT: Total Discovered Tests: 5 +# ALL-TERSE-NEXT: Failed : 1 (20.00%) +# ALL-TERSE-NEXT: Unexpectedly Passed: 1 (20.00%) + +# RUN: not %{lit} --show-all --diagnostic-level error %{inputs}/verbosity 2> %t/stderr.txt > %t/stdout.txt +# RUN: FileCheck %s --check-prefix SHOW-ALL < %t/stdout.txt +# RUN: FileCheck %s --check-prefix QUIET-ERR --implicit-check-not lit < %t/stderr.txt + +# RUN: not %{lit} --show-all --test-output off %{inputs}/verbosity 2> %t/stderr.txt > %t/stdout.txt +# RUN: FileCheck %s --check-prefix NO-ARGS < %t/stdout.txt +# RUN: FileCheck %s --check-prefix NO-ARGS-ERR --implicit-check-not lit < %t/stderr.txt + +# RUN: not %{lit} --succinct --print-result-after all %{inputs}/verbosity 2> %t/stderr.txt > %t/stdout.txt +# RUN: FileCheck %s --check-prefix SUCCINCT-RESULT-ALL < %t/stdout.txt +# RUN: FileCheck %s --check-prefix NO-ARGS-ERR --implicit-check-not lit < %t/stderr.txt + +# SUCCINCT-RESULT-ALL: -- Testing: 5 tests, 1 workers -- +# SUCCINCT-RESULT-ALL-NEXT: Testing: +# SUCCINCT-RESULT-ALL-NEXT: FAIL: verbosity :: fail.txt (1 of 5) +# SUCCINCT-RESULT-ALL-NEXT: Testing: +# SUCCINCT-RESULT-ALL-NEXT: PASS: verbosity :: pass.txt (2 of 5) +# SUCCINCT-RESULT-ALL-NEXT: Testing: +# SUCCINCT-RESULT-ALL-NEXT: {{UN}}SUPPORTED: verbosity :: unsupported.txt (3 of 5) +# SUCCINCT-RESULT-ALL-NEXT: Testing: +# SUCCINCT-RESULT-ALL-NEXT: {{X}}FAIL: verbosity :: xfail.txt (4 of 5) +# SUCCINCT-RESULT-ALL-NEXT: Testing: +# SUCCINCT-RESULT-ALL-NEXT: XPASS: verbosity :: xpass.txt (5 of 5) +# SUCCINCT-RESULT-ALL-NEXT: Testing: +# SUCCINCT-RESULT-ALL-NEXT: ******************** +# SUCCINCT-RESULT-ALL-NEXT: Failed Tests (1): +# SUCCINCT-RESULT-ALL-NEXT: verbosity :: fail.txt +# SUCCINCT-RESULT-ALL-EMPTY: +# SUCCINCT-RESULT-ALL-NEXT: ******************** +# SUCCINCT-RESULT-ALL-NEXT: Unexpectedly Passed Tests (1): +# SUCCINCT-RESULT-ALL-NEXT: verbosity :: xpass.txt +# SUCCINCT-RESULT-ALL-EMPTY: +# SUCCINCT-RESULT-ALL-EMPTY: +# SUCCINCT-RESULT-ALL-NEXT: Testing Time: {{.*}}s +# SUCCINCT-RESULT-ALL-EMPTY: +# SUCCINCT-RESULT-ALL-NEXT: Total Discovered Tests: 5 +# SUCCINCT-RESULT-ALL-NEXT: Unsupported : 1 (20.00%) +# SUCCINCT-RESULT-ALL-NEXT: Passed : 1 (20.00%) +# SUCCINCT-RESULT-ALL-NEXT: Expectedly Failed : 1 (20.00%) +# SUCCINCT-RESULT-ALL-NEXT: Failed : 1 (20.00%) +# SUCCINCT-RESULT-ALL-NEXT: Unexpectedly Passed: 1 (20.00%) diff --git a/mlir/include/mlir/Conversion/ArithToAPFloat/ArithToAPFloat.h b/mlir/include/mlir/Conversion/ArithToAPFloat/ArithToAPFloat.h new file mode 100644 index 0000000000000..64a42a228199e --- /dev/null +++ b/mlir/include/mlir/Conversion/ArithToAPFloat/ArithToAPFloat.h @@ -0,0 +1,21 @@ +//===- ArithToAPFloat.h - Arith to APFloat impl conversion ---*- C++ ----*-===// +// +// Part of the APFloat Project, under the Apache License v2.0 with APFloat +// Exceptions. See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH APFloat-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_CONVERSION_ARITHTOAPFLOAT_ARITHTOAPFLOAT_H +#define MLIR_CONVERSION_ARITHTOAPFLOAT_ARITHTOAPFLOAT_H + +#include + +namespace mlir { +class Pass; + +#define GEN_PASS_DECL_ARITHTOAPFLOATCONVERSIONPASS +#include "mlir/Conversion/Passes.h.inc" +} // namespace mlir + +#endif // MLIR_CONVERSION_ARITHTOAPFLOAT_ARITHTOAPFLOAT_H diff --git a/mlir/include/mlir/Conversion/Passes.h b/mlir/include/mlir/Conversion/Passes.h index 40d866ec7bf10..82bdfd02661a6 100644 --- a/mlir/include/mlir/Conversion/Passes.h +++ b/mlir/include/mlir/Conversion/Passes.h @@ -12,6 +12,7 @@ #include "mlir/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.h" #include "mlir/Conversion/AffineToStandard/AffineToStandard.h" #include "mlir/Conversion/ArithToAMDGPU/ArithToAMDGPU.h" +#include "mlir/Conversion/ArithToAPFloat/ArithToAPFloat.h" #include "mlir/Conversion/ArithToArmSME/ArithToArmSME.h" #include "mlir/Conversion/ArithToEmitC/ArithToEmitCPass.h" #include "mlir/Conversion/ArithToLLVM/ArithToLLVM.h" diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td index e0cac8b699c30..d5665b439b059 100644 --- a/mlir/include/mlir/Conversion/Passes.td +++ b/mlir/include/mlir/Conversion/Passes.td @@ -186,6 +186,21 @@ def ArithToLLVMConversionPass : Pass<"convert-arith-to-llvm"> { ]; } +//===----------------------------------------------------------------------===// +// ArithToAPFloat +//===----------------------------------------------------------------------===// + +def ArithToAPFloatConversionPass + : Pass<"convert-arith-to-apfloat", "ModuleOp"> { + let summary = "Convert Arith ops to APFloat runtime library calls"; + let description = [{ + This pass converts supported Arith ops to APFloat-based runtime library + calls (APFloatWrappers.cpp). APFloat is a software implementation of + floating-point arithmetic operations. + }]; + let dependentDialects = ["func::FuncDialect"]; +} + //===----------------------------------------------------------------------===// // ArithToSPIRV //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/Func/Utils/Utils.h b/mlir/include/mlir/Dialect/Func/Utils/Utils.h index 3576126a487ac..00d50874a2e8d 100644 --- a/mlir/include/mlir/Dialect/Func/Utils/Utils.h +++ b/mlir/include/mlir/Dialect/Func/Utils/Utils.h @@ -60,6 +60,13 @@ mlir::FailureOr> deduplicateArgsOfFuncOp(mlir::RewriterBase &rewriter, mlir::func::FuncOp funcOp, mlir::ModuleOp moduleOp); +/// Look up a FuncOp with signature `resultTypes`(`paramTypes`)` and name +/// `name`. Return a failure if the FuncOp is found but with a different +/// signature. +FailureOr lookupFnDecl(SymbolOpInterface symTable, StringRef name, + FunctionType funcT, + SymbolTableCollection *symbolTables = nullptr); + } // namespace func } // namespace mlir diff --git a/mlir/include/mlir/Dialect/LLVMIR/FunctionCallUtils.h b/mlir/include/mlir/Dialect/LLVMIR/FunctionCallUtils.h index 8ad9ed18acebd..b09d32022e348 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/FunctionCallUtils.h +++ b/mlir/include/mlir/Dialect/LLVMIR/FunctionCallUtils.h @@ -52,6 +52,10 @@ lookupOrCreatePrintF32Fn(OpBuilder &b, Operation *moduleOp, FailureOr lookupOrCreatePrintF64Fn(OpBuilder &b, Operation *moduleOp, SymbolTableCollection *symbolTables = nullptr); +FailureOr +lookupOrCreateApFloatPrintFn(OpBuilder &b, Operation *moduleOp, + SymbolTableCollection *symbolTables = nullptr); + /// Declares a function to print a C-string. /// If a custom runtime function is defined via `runtimeFunctionName`, it must /// have the signature void(char const*). The default function is `printString`. diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td index d11d196207b51..4c13c5ddb2886 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -921,6 +921,23 @@ def NVVM_Barrier0Op : NVVM_Op<"barrier0"> { }]; } +// Attrs describing the reduction operations for the barrier operation. +def BarrierReductionPopc : I32EnumAttrCase<"POPC", 0, "popc">; +def BarrierReductionAnd : I32EnumAttrCase<"AND", 1, "and">; +def BarrierReductionOr : I32EnumAttrCase<"OR", 2, "or">; + +def BarrierReduction + : I32EnumAttr<"BarrierReduction", "NVVM barrier reduction operation", + [BarrierReductionPopc, BarrierReductionAnd, + BarrierReductionOr]> { + let genSpecializedAttr = 0; + let cppNamespace = "::mlir::NVVM"; +} +def BarrierReductionAttr + : EnumAttr { + let assemblyFormat = "`<` $value `>`"; +} + def NVVM_BarrierOp : NVVM_Op<"barrier", [AttrSizedOperandSegments]> { let summary = "CTA Barrier Synchronization Op"; let description = [{ @@ -935,6 +952,9 @@ def NVVM_BarrierOp : NVVM_Op<"barrier", [AttrSizedOperandSegments]> { - `numberOfThreads`: Specifies the number of threads participating in the barrier. When specified, the value must be a multiple of the warp size. If not specified, all threads in the CTA participate in the barrier. + - `reductionOp`: specifies the reduction operation (`popc`, `and`, `or`). + - `reductionPredicate`: specifies the predicate to be used with the + `reductionOp`. The barrier operation guarantees that when the barrier completes, prior memory accesses requested by participating threads are performed relative to all threads @@ -951,31 +971,37 @@ def NVVM_BarrierOp : NVVM_Op<"barrier", [AttrSizedOperandSegments]> { [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-bar) }]; - let arguments = (ins - Optional:$barrierId, - Optional:$numberOfThreads); + let extraClassDeclaration = [{ + static mlir::NVVM::IDArgPair + getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt, + llvm::IRBuilderBase& builder); + }]; + + let arguments = (ins Optional:$barrierId, Optional:$numberOfThreads, + OptionalAttr:$reductionOp, + Optional:$reductionPredicate); string llvmBuilder = [{ - llvm::Value *id = $barrierId ? $barrierId : builder.getInt32(0); - if ($numberOfThreads) - createIntrinsicCall( - builder, llvm::Intrinsic::nvvm_barrier_cta_sync_aligned_count, - {id, $numberOfThreads}); - else - createIntrinsicCall( - builder, llvm::Intrinsic::nvvm_barrier_cta_sync_aligned_all, {id}); + auto [id, args] = NVVM::BarrierOp::getIntrinsicIDAndArgs( + *op, moduleTranslation, builder); + if ($reductionOp) + $res = createIntrinsicCall(builder, id, args); + else + createIntrinsicCall(builder, id, args); }]; + let results = (outs Optional:$res); + let hasVerifier = 1; - let assemblyFormat = "(`id` `=` $barrierId^)? (`number_of_threads` `=` $numberOfThreads^)? attr-dict"; + let assemblyFormat = + "(`id` `=` $barrierId^)? (`number_of_threads` `=` $numberOfThreads^)? " + "($reductionOp^ $reductionPredicate)? (`->` type($res)^)? attr-dict"; - let builders = [ - OpBuilder<(ins), [{ - return build($_builder, $_state, Value{}, Value{}); + let builders = [OpBuilder<(ins), [{ + return build($_builder, $_state, TypeRange{}, Value{}, Value{}, {}, Value{}); }]>, - OpBuilder<(ins "Value":$barrierId), [{ - return build($_builder, $_state, barrierId, Value{}); - }]> - ]; + OpBuilder<(ins "Value":$barrierId), [{ + return build($_builder, $_state, TypeRange{}, barrierId, Value{}, {}, Value{}); + }]>]; } def NVVM_BarrierArriveOp : NVVM_PTXBuilder_Op<"barrier.arrive"> diff --git a/mlir/include/mlir/Reducer/ReductionPatternInterface.h b/mlir/include/mlir/Reducer/ReductionPatternInterface.h index a85562fda4d93..a33877dc0bd77 100644 --- a/mlir/include/mlir/Reducer/ReductionPatternInterface.h +++ b/mlir/include/mlir/Reducer/ReductionPatternInterface.h @@ -10,6 +10,7 @@ #define MLIR_REDUCER_REDUCTIONPATTERNINTERFACE_H #include "mlir/IR/DialectInterface.h" +#include "mlir/Reducer/Tester.h" namespace mlir { @@ -47,10 +48,17 @@ class DialectReductionPatternInterface /// replacing an operation with a constant. virtual void populateReductionPatterns(RewritePatternSet &patterns) const = 0; + /// This method extends `populateReductionPatterns` by allowing reduction + /// patterns to use a `Tester` instance. Some reduction patterns may need to + /// run tester to determine whether certain transformations preserve the + /// "interesting" behavior of the program. This is mostly useful when pattern + /// should choose between multiple modifications. + virtual void populateReductionPatternsWithTester(RewritePatternSet &patterns, + Tester &tester) const {} + protected: DialectReductionPatternInterface(Dialect *dialect) : Base(dialect) {} }; - } // namespace mlir #endif // MLIR_REDUCER_REDUCTIONPATTERNINTERFACE_H diff --git a/mlir/include/mlir/Reducer/Tester.h b/mlir/include/mlir/Reducer/Tester.h index eb44afc7c1c15..bed4408342034 100644 --- a/mlir/include/mlir/Reducer/Tester.h +++ b/mlir/include/mlir/Reducer/Tester.h @@ -36,6 +36,9 @@ class Tester { Untested, }; + Tester() = default; + Tester(const Tester &) = default; + Tester(StringRef testScript, ArrayRef testScriptArgs); /// Runs the interestingness testing script on a MLIR test case file. Returns @@ -46,6 +49,9 @@ class Tester { /// Return whether the file in the given path is interesting. Interestingness isInteresting(StringRef testCase) const; + void setTestScript(StringRef script) { testScript = script; } + void setTestScriptArgs(ArrayRef args) { testScriptArgs = args; } + private: StringRef testScript; ArrayRef testScriptArgs; diff --git a/mlir/lib/Conversion/ArithToAPFloat/ArithToAPFloat.cpp b/mlir/lib/Conversion/ArithToAPFloat/ArithToAPFloat.cpp new file mode 100644 index 0000000000000..699edb188a70a --- /dev/null +++ b/mlir/lib/Conversion/ArithToAPFloat/ArithToAPFloat.cpp @@ -0,0 +1,163 @@ +//===- ArithToAPFloat.cpp - Arithmetic to APFloat Conversion --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Conversion/ArithToAPFloat/ArithToAPFloat.h" + +#include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/Arith/Transforms/Passes.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/Func/Utils/Utils.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/IR/Verifier.h" +#include "mlir/Transforms/WalkPatternRewriteDriver.h" + +namespace mlir { +#define GEN_PASS_DEF_ARITHTOAPFLOATCONVERSIONPASS +#include "mlir/Conversion/Passes.h.inc" +} // namespace mlir + +using namespace mlir; +using namespace mlir::func; + +static FuncOp createFnDecl(OpBuilder &b, SymbolOpInterface symTable, + StringRef name, FunctionType funcT, bool setPrivate, + SymbolTableCollection *symbolTables = nullptr) { + OpBuilder::InsertionGuard g(b); + assert(!symTable->getRegion(0).empty() && "expected non-empty region"); + b.setInsertionPointToStart(&symTable->getRegion(0).front()); + FuncOp funcOp = FuncOp::create(b, symTable->getLoc(), name, funcT); + if (setPrivate) + funcOp.setPrivate(); + if (symbolTables) { + SymbolTable &symbolTable = symbolTables->getSymbolTable(symTable); + symbolTable.insert(funcOp, symTable->getRegion(0).front().begin()); + } + return funcOp; +} + +/// Helper function to look up or create the symbol for a runtime library +/// function for a binary arithmetic operation. +/// +/// Parameter 1: APFloat semantics +/// Parameter 2: Left-hand side operand +/// Parameter 3: Right-hand side operand +/// +/// This function will return a failure if the function is found but has an +/// unexpected signature. +/// +static FailureOr +lookupOrCreateBinaryFn(OpBuilder &b, SymbolOpInterface symTable, StringRef name, + SymbolTableCollection *symbolTables = nullptr) { + auto i32Type = IntegerType::get(symTable->getContext(), 32); + auto i64Type = IntegerType::get(symTable->getContext(), 64); + + std::string funcName = (llvm::Twine("_mlir_apfloat_") + name).str(); + FunctionType funcT = + FunctionType::get(b.getContext(), {i32Type, i64Type, i64Type}, {i64Type}); + FailureOr func = + lookupFnDecl(symTable, funcName, funcT, symbolTables); + // Failed due to type mismatch. + if (failed(func)) + return func; + // Successfully matched existing decl. + if (*func) + return *func; + + return createFnDecl(b, symTable, funcName, funcT, + /*setPrivate=*/true, symbolTables); +} + +/// Rewrite a binary arithmetic operation to an APFloat function call. +template +struct BinaryArithOpToAPFloatConversion final : OpRewritePattern { + BinaryArithOpToAPFloatConversion(MLIRContext *context, + const char *APFloatName, + SymbolOpInterface symTable, + PatternBenefit benefit = 1) + : OpRewritePattern(context, benefit), symTable(symTable), + APFloatName(APFloatName) {}; + + LogicalResult matchAndRewrite(OpTy op, + PatternRewriter &rewriter) const override { + // Get APFloat function from runtime library. + FailureOr fn = + lookupOrCreateBinaryFn(rewriter, symTable, APFloatName); + if (failed(fn)) + return fn; + + rewriter.setInsertionPoint(op); + // Cast operands to 64-bit integers. + Location loc = op.getLoc(); + auto floatTy = cast(op.getType()); + auto intWType = rewriter.getIntegerType(floatTy.getWidth()); + auto int64Type = rewriter.getI64Type(); + Value lhsBits = arith::ExtUIOp::create( + rewriter, loc, int64Type, + arith::BitcastOp::create(rewriter, loc, intWType, op.getLhs())); + Value rhsBits = arith::ExtUIOp::create( + rewriter, loc, int64Type, + arith::BitcastOp::create(rewriter, loc, intWType, op.getRhs())); + + // Call APFloat function. + int32_t sem = + llvm::APFloatBase::SemanticsToEnum(floatTy.getFloatSemantics()); + Value semValue = arith::ConstantOp::create( + rewriter, loc, rewriter.getI32Type(), + rewriter.getIntegerAttr(rewriter.getI32Type(), sem)); + SmallVector params = {semValue, lhsBits, rhsBits}; + auto resultOp = + func::CallOp::create(rewriter, loc, TypeRange(rewriter.getI64Type()), + SymbolRefAttr::get(*fn), params); + + // Truncate result to the original width. + Value truncatedBits = arith::TruncIOp::create(rewriter, loc, intWType, + resultOp->getResult(0)); + rewriter.replaceOp( + op, arith::BitcastOp::create(rewriter, loc, floatTy, truncatedBits)); + return success(); + } + + SymbolOpInterface symTable; + const char *APFloatName; +}; + +namespace { +struct ArithToAPFloatConversionPass final + : impl::ArithToAPFloatConversionPassBase { + using Base::Base; + + void runOnOperation() override; +}; + +void ArithToAPFloatConversionPass::runOnOperation() { + MLIRContext *context = &getContext(); + RewritePatternSet patterns(context); + patterns.add>(context, "add", + getOperation()); + patterns.add>( + context, "subtract", getOperation()); + patterns.add>( + context, "multiply", getOperation()); + patterns.add>( + context, "divide", getOperation()); + patterns.add>( + context, "remainder", getOperation()); + LogicalResult result = success(); + ScopedDiagnosticHandler scopedHandler(context, [&result](Diagnostic &diag) { + if (diag.getSeverity() == DiagnosticSeverity::Error) { + result = failure(); + } + // NB: if you don't return failure, no other diag handlers will fire (see + // mlir/lib/IR/Diagnostics.cpp:DiagnosticEngineImpl::emit). + return failure(); + }); + walkAndApplyPatterns(getOperation(), std::move(patterns)); + if (failed(result)) + return signalPassFailure(); +} +} // namespace diff --git a/mlir/lib/Conversion/ArithToAPFloat/CMakeLists.txt b/mlir/lib/Conversion/ArithToAPFloat/CMakeLists.txt new file mode 100644 index 0000000000000..b5ec49c087163 --- /dev/null +++ b/mlir/lib/Conversion/ArithToAPFloat/CMakeLists.txt @@ -0,0 +1,18 @@ +add_mlir_conversion_library(MLIRArithToAPFloat + ArithToAPFloat.cpp + + ADDITIONAL_HEADER_DIRS + ${MLIR_MAIN_INCLUDE_DIR}/mlir/Conversion/ArithToLLVM + + DEPENDS + MLIRConversionPassIncGen + + LINK_COMPONENTS + Core + + LINK_LIBS PUBLIC + MLIRArithDialect + MLIRArithTransforms + MLIRFuncDialect + MLIRFuncUtils + ) diff --git a/mlir/lib/Conversion/ArithToLLVM/ArithToLLVM.cpp b/mlir/lib/Conversion/ArithToLLVM/ArithToLLVM.cpp index b6099902cc337..f2bacc3399144 100644 --- a/mlir/lib/Conversion/ArithToLLVM/ArithToLLVM.cpp +++ b/mlir/lib/Conversion/ArithToLLVM/ArithToLLVM.cpp @@ -14,6 +14,7 @@ #include "mlir/Conversion/LLVMCommon/VectorPattern.h" #include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/Arith/Transforms/Passes.h" +#include "mlir/Dialect/LLVMIR/FunctionCallUtils.h" #include "mlir/Dialect/LLVMIR/LLVMAttrs.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/IR/TypeUtilities.h" diff --git a/mlir/lib/Conversion/CMakeLists.txt b/mlir/lib/Conversion/CMakeLists.txt index bebf1b8fff3f9..613dc6d242ceb 100644 --- a/mlir/lib/Conversion/CMakeLists.txt +++ b/mlir/lib/Conversion/CMakeLists.txt @@ -2,6 +2,7 @@ add_subdirectory(AffineToStandard) add_subdirectory(AMDGPUToROCDL) add_subdirectory(ArithCommon) add_subdirectory(ArithToAMDGPU) +add_subdirectory(ArithToAPFloat) add_subdirectory(ArithToArmSME) add_subdirectory(ArithToEmitC) add_subdirectory(ArithToLLVM) diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp index 69a317ecd101f..c747e1b59558a 100644 --- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp +++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp @@ -1654,6 +1654,20 @@ class VectorPrintOpConversion : public ConvertOpToLLVMPattern { return failure(); } } + } else if (auto floatTy = dyn_cast(printType)) { + // Print other floating-point types using the APFloat runtime library. + int32_t sem = + llvm::APFloatBase::SemanticsToEnum(floatTy.getFloatSemantics()); + Value semValue = LLVM::ConstantOp::create( + rewriter, loc, rewriter.getI32Type(), + rewriter.getIntegerAttr(rewriter.getI32Type(), sem)); + Value floatBits = + LLVM::ZExtOp::create(rewriter, loc, rewriter.getI64Type(), value); + printer = + LLVM::lookupOrCreateApFloatPrintFn(rewriter, parent, symbolTables); + emitCall(rewriter, loc, printer.value(), + ValueRange({semValue, floatBits})); + return success(); } else { return failure(); } diff --git a/mlir/lib/Dialect/Func/Utils/Utils.cpp b/mlir/lib/Dialect/Func/Utils/Utils.cpp index b4cb0932ef631..d6dfd0229963c 100644 --- a/mlir/lib/Dialect/Func/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Func/Utils/Utils.cpp @@ -254,3 +254,28 @@ func::deduplicateArgsOfFuncOp(RewriterBase &rewriter, func::FuncOp funcOp, return std::make_pair(*newFuncOpOrFailure, newCallOp); } + +FailureOr +func::lookupFnDecl(SymbolOpInterface symTable, StringRef name, + FunctionType funcT, SymbolTableCollection *symbolTables) { + FuncOp func; + if (symbolTables) { + func = symbolTables->lookupSymbolIn( + symTable, StringAttr::get(symTable->getContext(), name)); + } else { + func = llvm::dyn_cast_or_null( + SymbolTable::lookupSymbolIn(symTable, name)); + } + + if (!func) + return func; + + mlir::FunctionType foundFuncT = func.getFunctionType(); + // Assert the signature of the found function is same as expected + if (funcT != foundFuncT) { + return func.emitError("matched function '") + << name << "' but with different type: " << foundFuncT + << " (expected " << funcT << ")"; + } + return func; +} diff --git a/mlir/lib/Dialect/LLVMIR/IR/FunctionCallUtils.cpp b/mlir/lib/Dialect/LLVMIR/IR/FunctionCallUtils.cpp index feaffa34897b6..160b6ae89215c 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/FunctionCallUtils.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/FunctionCallUtils.cpp @@ -30,6 +30,7 @@ static constexpr llvm::StringRef kPrintF16 = "printF16"; static constexpr llvm::StringRef kPrintBF16 = "printBF16"; static constexpr llvm::StringRef kPrintF32 = "printF32"; static constexpr llvm::StringRef kPrintF64 = "printF64"; +static constexpr llvm::StringRef kPrintApFloat = "printApFloat"; static constexpr llvm::StringRef kPrintString = "printString"; static constexpr llvm::StringRef kPrintOpen = "printOpen"; static constexpr llvm::StringRef kPrintClose = "printClose"; @@ -160,6 +161,16 @@ mlir::LLVM::lookupOrCreatePrintF64Fn(OpBuilder &b, Operation *moduleOp, LLVM::LLVMVoidType::get(moduleOp->getContext()), symbolTables); } +FailureOr +mlir::LLVM::lookupOrCreateApFloatPrintFn(OpBuilder &b, Operation *moduleOp, + SymbolTableCollection *symbolTables) { + return lookupOrCreateReservedFn( + b, moduleOp, kPrintApFloat, + {IntegerType::get(moduleOp->getContext(), 32), + IntegerType::get(moduleOp->getContext(), 64)}, + LLVM::LLVMVoidType::get(moduleOp->getContext()), symbolTables); +} + static LLVM::LLVMPointerType getCharPtr(MLIRContext *context) { return LLVM::LLVMPointerType::get(context); } diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp index e0c25ab6cdef7..0f7b3638fb30d 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp @@ -1517,6 +1517,15 @@ LogicalResult NVVM::BarrierOp::verify() { if (getNumberOfThreads() && !getBarrierId()) return emitOpError( "barrier id is missing, it should be set between 0 to 15"); + + if (getBarrierId() && (getReductionOp() || getReductionPredicate())) + return emitOpError("reduction are only available when id is 0"); + + if ((getReductionOp() && !getReductionPredicate()) || + (!getReductionOp() && getReductionPredicate())) + return emitOpError("reduction predicate and reduction operation must be " + "specified together"); + return success(); } @@ -1785,6 +1794,39 @@ std::string NVVM::MBarrierTryWaitParityOp::getPtx() { // getIntrinsicID/getIntrinsicIDAndArgs methods //===----------------------------------------------------------------------===// +mlir::NVVM::IDArgPair NVVM::BarrierOp::getIntrinsicIDAndArgs( + Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) { + auto thisOp = cast(op); + llvm::Value *barrierId = thisOp.getBarrierId() + ? mt.lookupValue(thisOp.getBarrierId()) + : builder.getInt32(0); + llvm::Intrinsic::ID id; + llvm::SmallVector args; + if (thisOp.getNumberOfThreads()) { + id = llvm::Intrinsic::nvvm_barrier_cta_sync_aligned_count; + args.push_back(barrierId); + args.push_back(mt.lookupValue(thisOp.getNumberOfThreads())); + } else if (thisOp.getReductionOp()) { + switch (*thisOp.getReductionOp()) { + case NVVM::BarrierReduction::AND: + id = llvm::Intrinsic::nvvm_barrier0_and; + break; + case NVVM::BarrierReduction::OR: + id = llvm::Intrinsic::nvvm_barrier0_or; + break; + case NVVM::BarrierReduction::POPC: + id = llvm::Intrinsic::nvvm_barrier0_popc; + break; + } + args.push_back(mt.lookupValue(thisOp.getReductionPredicate())); + } else { + id = llvm::Intrinsic::nvvm_barrier_cta_sync_aligned_all; + args.push_back(barrierId); + } + + return {id, std::move(args)}; +} + mlir::NVVM::IDArgPair MBarrierInitOp::getIntrinsicIDAndArgs( Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) { auto thisOp = cast(op); diff --git a/mlir/lib/ExecutionEngine/APFloatWrappers.cpp b/mlir/lib/ExecutionEngine/APFloatWrappers.cpp new file mode 100644 index 0000000000000..0a05f7369e556 --- /dev/null +++ b/mlir/lib/ExecutionEngine/APFloatWrappers.cpp @@ -0,0 +1,89 @@ +//===- APFloatWrappers.cpp - Software Implementation of FP Arithmetics --- ===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file exposes the APFloat infrastructure to MLIR programs as a runtime +// library. APFloat is a software implementation of floating point arithmetics. +// +// On the MLIR side, floating-point values must be bitcasted to 64-bit integers +// before calling a runtime function. If a floating-point type has less than +// 64 bits, it must be zero-extended to 64 bits after bitcasting it to an +// integer. +// +// Runtime functions receive the floating-point operands of the arithmeic +// operation in the form of 64-bit integers, along with the APFloat semantics +// in the form of a 32-bit integer, which will be interpreted as an +// APFloatBase::Semantics enum value. +// +#include "llvm/ADT/APFloat.h" + +#ifdef _WIN32 +#ifndef MLIR_APFLOAT_WRAPPERS_EXPORT +#ifdef mlir_apfloat_wrappers_EXPORTS +// We are building this library +#define MLIR_APFLOAT_WRAPPERS_EXPORT __declspec(dllexport) +#else +// We are using this library +#define MLIR_APFLOAT_WRAPPERS_EXPORT __declspec(dllimport) +#endif // mlir_apfloat_wrappers_EXPORTS +#endif // MLIR_APFLOAT_WRAPPERS_EXPORT +#else +// Non-windows: use visibility attributes. +#define MLIR_APFLOAT_WRAPPERS_EXPORT __attribute__((visibility("default"))) +#endif // _WIN32 + +/// Binary operations without rounding mode. +#define APFLOAT_BINARY_OP(OP) \ + MLIR_APFLOAT_WRAPPERS_EXPORT int64_t _mlir_apfloat_##OP( \ + int32_t semantics, uint64_t a, uint64_t b) { \ + const llvm::fltSemantics &sem = llvm::APFloatBase::EnumToSemantics( \ + static_cast(semantics)); \ + unsigned bitWidth = llvm::APFloatBase::semanticsSizeInBits(sem); \ + llvm::APFloat lhs(sem, llvm::APInt(bitWidth, a)); \ + llvm::APFloat rhs(sem, llvm::APInt(bitWidth, b)); \ + lhs.OP(rhs); \ + return lhs.bitcastToAPInt().getZExtValue(); \ + } + +/// Binary operations with rounding mode. +#define APFLOAT_BINARY_OP_ROUNDING_MODE(OP, ROUNDING_MODE) \ + MLIR_APFLOAT_WRAPPERS_EXPORT int64_t _mlir_apfloat_##OP( \ + int32_t semantics, uint64_t a, uint64_t b) { \ + const llvm::fltSemantics &sem = llvm::APFloatBase::EnumToSemantics( \ + static_cast(semantics)); \ + unsigned bitWidth = llvm::APFloatBase::semanticsSizeInBits(sem); \ + llvm::APFloat lhs(sem, llvm::APInt(bitWidth, a)); \ + llvm::APFloat rhs(sem, llvm::APInt(bitWidth, b)); \ + lhs.OP(rhs, ROUNDING_MODE); \ + return lhs.bitcastToAPInt().getZExtValue(); \ + } + +extern "C" { + +#define BIN_OPS_WITH_ROUNDING(X) \ + X(add, llvm::RoundingMode::NearestTiesToEven) \ + X(subtract, llvm::RoundingMode::NearestTiesToEven) \ + X(multiply, llvm::RoundingMode::NearestTiesToEven) \ + X(divide, llvm::RoundingMode::NearestTiesToEven) + +BIN_OPS_WITH_ROUNDING(APFLOAT_BINARY_OP_ROUNDING_MODE) +#undef BIN_OPS_WITH_ROUNDING +#undef APFLOAT_BINARY_OP_ROUNDING_MODE + +APFLOAT_BINARY_OP(remainder) + +#undef APFLOAT_BINARY_OP + +MLIR_APFLOAT_WRAPPERS_EXPORT void printApFloat(int32_t semantics, uint64_t a) { + const llvm::fltSemantics &sem = llvm::APFloatBase::EnumToSemantics( + static_cast(semantics)); + unsigned bitWidth = llvm::APFloatBase::semanticsSizeInBits(sem); + llvm::APFloat x(sem, llvm::APInt(bitWidth, a)); + double d = x.convertToDouble(); + fprintf(stdout, "%lg", d); +} +} diff --git a/mlir/lib/ExecutionEngine/CMakeLists.txt b/mlir/lib/ExecutionEngine/CMakeLists.txt index fdeb4dacf9278..0045675bcb448 100644 --- a/mlir/lib/ExecutionEngine/CMakeLists.txt +++ b/mlir/lib/ExecutionEngine/CMakeLists.txt @@ -2,6 +2,7 @@ # is a big dependency which most don't need. set(LLVM_OPTIONAL_SOURCES + APFloatWrappers.cpp ArmRunnerUtils.cpp ArmSMEStubs.cpp AsyncRuntime.cpp @@ -167,6 +168,20 @@ if(LLVM_ENABLE_PIC) set_property(TARGET mlir_float16_utils PROPERTY CXX_STANDARD 17) target_compile_definitions(mlir_float16_utils PRIVATE mlir_float16_utils_EXPORTS) + add_mlir_library(mlir_apfloat_wrappers + SHARED + APFloatWrappers.cpp + + EXCLUDE_FROM_LIBMLIR + ) + set_target_properties( + mlir_apfloat_wrappers + PROPERTIES CXX_STANDARD 17 + CXX_VISIBILITY_PRESET hidden + VISIBILITY_INLINES_HIDDEN ON + ) + target_compile_definitions(mlir_apfloat_wrappers PRIVATE mlir_apfloat_wrappers_EXPORTS) + add_subdirectory(SparseTensor) add_mlir_library(mlir_c_runner_utils @@ -177,6 +192,7 @@ if(LLVM_ENABLE_PIC) EXCLUDE_FROM_LIBMLIR LINK_LIBS PUBLIC + mlir_apfloat_wrappers mlir_float16_utils MLIRSparseTensorEnums MLIRSparseTensorRuntime @@ -191,6 +207,7 @@ if(LLVM_ENABLE_PIC) EXCLUDE_FROM_LIBMLIR LINK_LIBS PUBLIC + mlir_apfloat_wrappers mlir_float16_utils ) target_compile_definitions(mlir_runner_utils PRIVATE mlir_runner_utils_EXPORTS) diff --git a/mlir/lib/Reducer/ReductionTreePass.cpp b/mlir/lib/Reducer/ReductionTreePass.cpp index 5b49204013cc0..1e00ed645f71e 100644 --- a/mlir/lib/Reducer/ReductionTreePass.cpp +++ b/mlir/lib/Reducer/ReductionTreePass.cpp @@ -175,9 +175,12 @@ class ReductionPatternInterfaceCollection using Base::Base; // Collect the reduce patterns defined by each dialect. - void populateReductionPatterns(RewritePatternSet &pattern) const { - for (const DialectReductionPatternInterface &interface : *this) + void populateReductionPatterns(RewritePatternSet &pattern, + Tester &tester) const { + for (const DialectReductionPatternInterface &interface : *this) { interface.populateReductionPatterns(pattern); + interface.populateReductionPatternsWithTester(pattern, tester); + } } }; @@ -201,15 +204,21 @@ class ReductionTreePass private: LogicalResult reduceOp(ModuleOp module, Region ®ion); + Tester tester; FrozenRewritePatternSet reducerPatterns; }; } // namespace LogicalResult ReductionTreePass::initialize(MLIRContext *context) { + tester.setTestScript(testerName); + tester.setTestScriptArgs(testerArgs); + RewritePatternSet patterns(context); + ReductionPatternInterfaceCollection reducePatternCollection(context); - reducePatternCollection.populateReductionPatterns(patterns); + reducePatternCollection.populateReductionPatterns(patterns, tester); + reducerPatterns = std::move(patterns); return success(); } @@ -244,11 +253,10 @@ void ReductionTreePass::runOnOperation() { } LogicalResult ReductionTreePass::reduceOp(ModuleOp module, Region ®ion) { - Tester test(testerName, testerArgs); switch (traversalModeId) { case TraversalMode::SinglePath: return findOptimal>( - module, region, reducerPatterns, test); + module, region, reducerPatterns, tester); default: return module.emitError() << "unsupported traversal mode detected"; } diff --git a/mlir/test/Conversion/ArithToApfloat/arith-to-apfloat.mlir b/mlir/test/Conversion/ArithToApfloat/arith-to-apfloat.mlir new file mode 100644 index 0000000000000..797f42c37a26f --- /dev/null +++ b/mlir/test/Conversion/ArithToApfloat/arith-to-apfloat.mlir @@ -0,0 +1,128 @@ +// RUN: mlir-opt %s --convert-arith-to-apfloat -split-input-file -verify-diagnostics | FileCheck %s + +// CHECK-LABEL: func.func private @_mlir_apfloat_add(i32, i64, i64) -> i64 + +// CHECK-LABEL: func.func @foo() -> f8E4M3FN { +// CHECK: %[[CONSTANT_0:.*]] = arith.constant 2.250000e+00 : f8E4M3FN +// CHECK: return %[[CONSTANT_0]] : f8E4M3FN +// CHECK: } + +// CHECK-LABEL: func.func @bar() -> f6E3M2FN { +// CHECK: %[[CONSTANT_0:.*]] = arith.constant 3.000000e+00 : f6E3M2FN +// CHECK: return %[[CONSTANT_0]] : f6E3M2FN +// CHECK: } + +// Illustrate that both f8E4M3FN and f6E3M2FN calling the same _mlir_apfloat_add is fine +// because each gets its own semantics enum and gets bitcast/extui/trunci to its own width. +// CHECK-LABEL: func.func @full_example() { +// CHECK: %[[CONSTANT_0:.*]] = arith.constant 1.375000e+00 : f8E4M3FN +// CHECK: %[[VAL_0:.*]] = call @foo() : () -> f8E4M3FN +// CHECK: %[[BITCAST_0:.*]] = arith.bitcast %[[CONSTANT_0]] : f8E4M3FN to i8 +// CHECK: %[[EXTUI_0:.*]] = arith.extui %[[BITCAST_0]] : i8 to i64 +// CHECK: %[[BITCAST_1:.*]] = arith.bitcast %[[VAL_0]] : f8E4M3FN to i8 +// CHECK: %[[EXTUI_1:.*]] = arith.extui %[[BITCAST_1]] : i8 to i64 +// // fltSemantics semantics for f8E4M3FN +// CHECK: %[[CONSTANT_1:.*]] = arith.constant 10 : i32 +// CHECK: %[[VAL_1:.*]] = call @_mlir_apfloat_add(%[[CONSTANT_1]], %[[EXTUI_0]], %[[EXTUI_1]]) : (i32, i64, i64) -> i64 +// CHECK: %[[TRUNCI_0:.*]] = arith.trunci %[[VAL_1]] : i64 to i8 +// CHECK: %[[BITCAST_2:.*]] = arith.bitcast %[[TRUNCI_0]] : i8 to f8E4M3FN +// CHECK: vector.print %[[BITCAST_2]] : f8E4M3FN + +// CHECK: %[[CONSTANT_2:.*]] = arith.constant 2.500000e+00 : f6E3M2FN +// CHECK: %[[VAL_2:.*]] = call @bar() : () -> f6E3M2FN +// CHECK: %[[BITCAST_3:.*]] = arith.bitcast %[[CONSTANT_2]] : f6E3M2FN to i6 +// CHECK: %[[EXTUI_2:.*]] = arith.extui %[[BITCAST_3]] : i6 to i64 +// CHECK: %[[BITCAST_4:.*]] = arith.bitcast %[[VAL_2]] : f6E3M2FN to i6 +// CHECK: %[[EXTUI_3:.*]] = arith.extui %[[BITCAST_4]] : i6 to i64 +// // fltSemantics semantics for f6E3M2FN +// CHECK: %[[CONSTANT_3:.*]] = arith.constant 16 : i32 +// CHECK: %[[VAL_3:.*]] = call @_mlir_apfloat_add(%[[CONSTANT_3]], %[[EXTUI_2]], %[[EXTUI_3]]) : (i32, i64, i64) -> i64 +// CHECK: %[[TRUNCI_1:.*]] = arith.trunci %[[VAL_3]] : i64 to i6 +// CHECK: %[[BITCAST_5:.*]] = arith.bitcast %[[TRUNCI_1]] : i6 to f6E3M2FN +// CHECK: vector.print %[[BITCAST_5]] : f6E3M2FN +// CHECK: return +// CHECK: } + +// Put rhs into separate function so that it won't be constant-folded. +func.func @foo() -> f8E4M3FN { + %cst = arith.constant 2.2 : f8E4M3FN + return %cst : f8E4M3FN +} + +func.func @bar() -> f6E3M2FN { + %cst = arith.constant 3.2 : f6E3M2FN + return %cst : f6E3M2FN +} + +func.func @full_example() { + %a = arith.constant 1.4 : f8E4M3FN + %b = func.call @foo() : () -> (f8E4M3FN) + %c = arith.addf %a, %b : f8E4M3FN + vector.print %c : f8E4M3FN + + %d = arith.constant 2.4 : f6E3M2FN + %e = func.call @bar() : () -> (f6E3M2FN) + %f = arith.addf %d, %e : f6E3M2FN + vector.print %f : f6E3M2FN + return +} + +// ----- + +// CHECK: func.func private @_mlir_apfloat_add(i32, i64, i64) -> i64 +// CHECK: %[[sem:.*]] = arith.constant 18 : i32 +// CHECK: call @_mlir_apfloat_add(%[[sem]], %{{.*}}, %{{.*}}) : (i32, i64, i64) -> i64 +func.func @addf(%arg0: f4E2M1FN, %arg1: f4E2M1FN) { + %0 = arith.addf %arg0, %arg1 : f4E2M1FN + return +} + +// ----- + +// Test decl collision (different type) +// expected-error@+1{{matched function '_mlir_apfloat_add' but with different type: '(i32, i32, f32) -> index' (expected '(i32, i64, i64) -> i64')}} +func.func private @_mlir_apfloat_add(i32, i32, f32) -> index +func.func @addf(%arg0: f4E2M1FN, %arg1: f4E2M1FN) { + %0 = arith.addf %arg0, %arg1 : f4E2M1FN + return +} + +// ----- + +// CHECK: func.func private @_mlir_apfloat_subtract(i32, i64, i64) -> i64 +// CHECK: %[[sem:.*]] = arith.constant 18 : i32 +// CHECK: call @_mlir_apfloat_subtract(%[[sem]], %{{.*}}, %{{.*}}) : (i32, i64, i64) -> i64 +func.func @subf(%arg0: f4E2M1FN, %arg1: f4E2M1FN) { + %0 = arith.subf %arg0, %arg1 : f4E2M1FN + return +} + +// ----- + +// CHECK: func.func private @_mlir_apfloat_multiply(i32, i64, i64) -> i64 +// CHECK: %[[sem:.*]] = arith.constant 18 : i32 +// CHECK: call @_mlir_apfloat_multiply(%[[sem]], %{{.*}}, %{{.*}}) : (i32, i64, i64) -> i64 +func.func @subf(%arg0: f4E2M1FN, %arg1: f4E2M1FN) { + %0 = arith.mulf %arg0, %arg1 : f4E2M1FN + return +} + +// ----- + +// CHECK: func.func private @_mlir_apfloat_divide(i32, i64, i64) -> i64 +// CHECK: %[[sem:.*]] = arith.constant 18 : i32 +// CHECK: call @_mlir_apfloat_divide(%[[sem]], %{{.*}}, %{{.*}}) : (i32, i64, i64) -> i64 +func.func @subf(%arg0: f4E2M1FN, %arg1: f4E2M1FN) { + %0 = arith.divf %arg0, %arg1 : f4E2M1FN + return +} + +// ----- + +// CHECK: func.func private @_mlir_apfloat_remainder(i32, i64, i64) -> i64 +// CHECK: %[[sem:.*]] = arith.constant 18 : i32 +// CHECK: call @_mlir_apfloat_remainder(%[[sem]], %{{.*}}, %{{.*}}) : (i32, i64, i64) -> i64 +func.func @remf(%arg0: f4E2M1FN, %arg1: f4E2M1FN) { + %0 = arith.remf %arg0, %arg1 : f4E2M1FN + return +} diff --git a/mlir/test/Integration/Dialect/Arith/CPU/test-apfloat-emulation.mlir b/mlir/test/Integration/Dialect/Arith/CPU/test-apfloat-emulation.mlir new file mode 100644 index 0000000000000..2768afe0834b5 --- /dev/null +++ b/mlir/test/Integration/Dialect/Arith/CPU/test-apfloat-emulation.mlir @@ -0,0 +1,36 @@ +// Case 1: All floating-point arithmetics is lowered through APFloat. +// RUN: mlir-opt %s --convert-arith-to-apfloat --convert-to-llvm | \ +// RUN: mlir-runner -e entry --entry-point-result=void \ +// RUN: --shared-libs=%mlir_c_runner_utils \ +// RUN: --shared-libs=%mlir_apfloat_wrappers | FileCheck %s + +// Case 2: Only unsupported arithmetics (f8E4M3FN) is lowered through APFloat. +// Arithmetics on f32 is lowered directly to LLVM. +// RUN: mlir-opt %s --convert-to-llvm --convert-arith-to-apfloat \ +// RUN: --convert-to-llvm --reconcile-unrealized-casts | \ +// RUN: mlir-runner -e entry --entry-point-result=void \ +// RUN: --shared-libs=%mlir_c_runner_utils \ +// RUN: --shared-libs=%mlir_apfloat_wrappers | FileCheck %s + +// Put rhs into separate function so that it won't be constant-folded. +func.func @foo() -> (f8E4M3FN, f32) { + %cst1 = arith.constant 2.2 : f8E4M3FN + %cst2 = arith.constant 2.2 : f32 + return %cst1, %cst2 : f8E4M3FN, f32 +} + +func.func @entry() { + %a1 = arith.constant 1.4 : f8E4M3FN + %a2 = arith.constant 1.4 : f32 + %b1, %b2 = func.call @foo() : () -> (f8E4M3FN, f32) + %c1 = arith.addf %a1, %b1 : f8E4M3FN // not supported by LLVM + %c2 = arith.addf %a2, %b2 : f32 // supported by LLVM + + // CHECK: 3.5 + vector.print %c1 : f8E4M3FN + + // CHECK: 3.6 + vector.print %c2 : f32 + + return +} diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/runtime-verification.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/runtime-verification.mlir index 127ab70cb4539..610ed63168d87 100644 --- a/mlir/test/Integration/Dialect/Linalg/CPU/runtime-verification.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/runtime-verification.mlir @@ -24,17 +24,14 @@ func.func @main() { %d5x = tensor.cast %c5x : tensor<5xf32> to tensor %d4x = tensor.cast %c4x : tensor<4xf32> to tensor - // CHECK-NOT: ERROR: Runtime op verification failed - func.call @simple_add(%d5x, %d5x) : (tensor, tensor) -> (tensor) - // CHECK: ERROR: Runtime op verification failed - // CHECK: linalg.generic - // CHECK: ^ dimension #0 of input/output operand #1 is incompatible with inferred dimension size + // CHECK-NEXT: linalg.generic + // CHECK-NEXT: ^ dimension #0 of input/output operand #1 is incompatible with inferred dimension size func.call @simple_add(%d5x, %d4x) : (tensor, tensor) -> (tensor) // CHECK: ERROR: Runtime op verification failed - // CHECK: linalg.generic - // CHECK: ^ dimension #0 of input/output operand #1 is incompatible with inferred dimension size + // CHECK-NEXT: linalg.generic + // CHECK-NEXT: ^ dimension #0 of input/output operand #1 is incompatible with inferred dimension size func.call @simple_add(%d4x, %d5x) : (tensor, tensor) -> (tensor) %c1x1 = arith.constant dense<0.0> : tensor<1x1xf32> @@ -48,71 +45,81 @@ func.func @main() { %d4x5 = tensor.cast %c4x5 : tensor<4x5xf32> to tensor %d5x4 = tensor.cast %c5x4 : tensor<5x4xf32> to tensor - // CHECK-NOT: ERROR: Runtime op verification failed - func.call @broadcast_add(%d1x1, %d1x1) : (tensor, tensor) -> (tensor) - - // CHECK-NOT: ERROR: Runtime op verification failed - func.call @broadcast_add(%d1x1, %d4x5) : (tensor, tensor) -> (tensor) - - // CHECK-NOT: ERROR: Runtime op verification failed - func.call @broadcast_add(%d4x4, %d1x4) : (tensor, tensor) -> (tensor) + // CHECK: ERROR: Runtime op verification failed + // CHECK-NEXT: linalg.generic + // CHECK-NEXT: ^ dimension #1 of input/output operand #1 is incompatible with inferred dimension size // CHECK: ERROR: Runtime op verification failed - // CHECK: linalg.generic - // CHECK: ^ dimension #1 of input/output operand #1 is incompatible with inferred dimension size + // CHECK-NEXT: linalg.generic + // CHECK-NEXT: ^ dimension #1 of input/output operand #2 is incompatible with inferred dimension size func.call @broadcast_add(%d1x4, %d4x5) : (tensor, tensor) -> (tensor) // CHECK: ERROR: Runtime op verification failed - // CHECK: linalg.generic - // CHECK: ^ dimension #0 of input/output operand #1 is incompatible with inferred dimension size + // CHECK-NEXT: linalg.generic + // CHECK-NEXT: ^ dimension #0 of input/output operand #1 is incompatible with inferred dimension size + // CHECK: ERROR: Runtime op verification failed - // CHECK: linalg.generic - // CHECK: ^ dimension #1 of input/output operand #1 is incompatible with inferred dimension size + // CHECK-NEXT: linalg.generic + // CHECK-NEXT: ^ dimension #1 of input/output operand #1 is incompatible with inferred dimension size + // CHECK: ERROR: Runtime op verification failed - // CHECK: linalg.generic - // CHECK: ^ dimension #1 of input/output operand #2 is incompatible with inferred dimension size + // CHECK-NEXT: linalg.generic + // CHECK-NEXT: ^ dimension #1 of input/output operand #2 is incompatible with inferred dimension size func.call @broadcast_add(%d5x4, %d4x5) : (tensor, tensor) -> (tensor) - // CHECK-NOT: ERROR: Runtime op verification failed - func.call @matmul_generic(%d5x4, %d4x5) : (tensor, tensor) -> (tensor) - // CHECK: ERROR: Runtime op verification failed - // CHECK: linalg.generic - // CHECK: ^ dimension #0 of input/output operand #1 is incompatible with inferred dimension size + // CHECK-NEXT: linalg.generic + // CHECK-NEXT: ^ dimension #0 of input/output operand #1 is incompatible with inferred dimension size func.call @matmul_generic(%d4x5, %d4x5) : (tensor, tensor) -> (tensor) - // CHECK-NOT: ERROR: Runtime op verification failed - func.call @matmul_named(%d5x4, %d4x5) : (tensor, tensor) -> (tensor) - // CHECK: ERROR: Runtime op verification failed - // CHECK: linalg.matmul - // CHECK: ^ dimension #0 of input/output operand #1 is incompatible with inferred dimension size + // CHECK-NEXT: linalg.matmul + // CHECK-NEXT: ^ dimension #0 of input/output operand #1 is incompatible with inferred dimension size func.call @matmul_named(%d4x5, %d4x5) : (tensor, tensor) -> (tensor) %c64x57 = arith.constant dense<0.0> : tensor<16x29xf32> %c3x4 = arith.constant dense<0.0> : tensor<3x4xf32> + // TODO: BROKEN CHK: ERROR: Runtime op verification failed + // TODO: BROKEN CHK-NEXT: linalg.generic + // TODO: BROKEN CHK-NEXT: unexpected negative result on dimension #0 of input/output operand #0 + // TODO: BROKEN func.call @reverse_from_3(%d5x) : (tensor) -> (tensor) + + %c0x = arith.constant dense<1.0> : tensor<0xf32> + %d0x = tensor.cast %c0x : tensor<0xf32> to tensor + + %c0x5 = arith.constant dense<0.0> : tensor<0x5xf32> + %d0x5 = tensor.cast %c0x5 : tensor<0x5xf32> to tensor + + // CHECK-NOT: ERROR: Runtime op verification failed + func.call @fill_empty_1d(%d0x) : (tensor) -> (tensor) + + // CHECK-NOT: ERROR: Runtime op verification failed + func.call @simple_add(%d5x, %d5x) : (tensor, tensor) -> (tensor) + + // CHECK-NOT: ERROR: Runtime op verification failed + func.call @fill_empty_2d(%d0x5) : (tensor) -> (tensor) + // CHECK-NOT: ERROR: Runtime op verification failed func.call @conv(%c64x57, %c3x4) : (tensor<16x29xf32>, tensor<3x4xf32>) -> (tensor<5x7xf32>) // CHECK-NOT: ERROR: Runtime op verification failed func.call @reverse_from_3(%d4x) : (tensor) -> (tensor) - // CHECK: ERROR: Runtime op verification failed - // CHECK: linalg.generic - // CHECK: unexpected negative result on dimension #0 of input/output operand #0 - func.call @reverse_from_3(%d5x) : (tensor) -> (tensor) + // CHECK-NOT: ERROR: Runtime op verification failed + func.call @matmul_named(%d5x4, %d4x5) : (tensor, tensor) -> (tensor) - %c0x = arith.constant dense<1.0> : tensor<0xf32> - %d0x = tensor.cast %c0x : tensor<0xf32> to tensor // CHECK-NOT: ERROR: Runtime op verification failed - func.call @fill_empty_1d(%d0x) : (tensor) -> (tensor) + func.call @matmul_generic(%d5x4, %d4x5) : (tensor, tensor) -> (tensor) - %c0x5 = arith.constant dense<0.0> : tensor<0x5xf32> - %d0x5 = tensor.cast %c0x5 : tensor<0x5xf32> to tensor + // CHECK-NOT: ERROR: Runtime op verification failed + func.call @broadcast_add(%d1x1, %d1x1) : (tensor, tensor) -> (tensor) // CHECK-NOT: ERROR: Runtime op verification failed - func.call @fill_empty_2d(%d0x5) : (tensor) -> (tensor) + func.call @broadcast_add(%d1x1, %d4x5) : (tensor, tensor) -> (tensor) + + // CHECK-NOT: ERROR: Runtime op verification failed + func.call @broadcast_add(%d4x4, %d1x4) : (tensor, tensor) -> (tensor) return } diff --git a/mlir/test/Target/LLVMIR/nvvm/barrier.mlir b/mlir/test/Target/LLVMIR/nvvm/barrier.mlir new file mode 100644 index 0000000000000..d89f93101c1fc --- /dev/null +++ b/mlir/test/Target/LLVMIR/nvvm/barrier.mlir @@ -0,0 +1,20 @@ +// RUN: mlir-translate -mlir-to-llvmir %s -split-input-file --verify-diagnostics | FileCheck %s + +// CHECK-LABEL: @llvm_nvvm_barrier( +// CHECK-SAME: i32 %[[barId:.*]], i32 %[[numThreads:.*]], i32 %[[redOperand:.*]]) +llvm.func @llvm_nvvm_barrier(%barID : i32, %numberOfThreads : i32, %redOperand : i32) { + // CHECK: call void @llvm.nvvm.barrier.cta.sync.aligned.all(i32 0) + nvvm.barrier + // CHECK: call void @llvm.nvvm.barrier.cta.sync.aligned.all(i32 %[[barId]]) + nvvm.barrier id = %barID + // CHECK: call void @llvm.nvvm.barrier.cta.sync.aligned.count(i32 %[[barId]], i32 %[[numThreads]]) + nvvm.barrier id = %barID number_of_threads = %numberOfThreads + // CHECK: %{{.*}} = call i32 @llvm.nvvm.barrier0.and(i32 %[[redOperand]]) + %0 = nvvm.barrier #nvvm.reduction %redOperand -> i32 + // CHECK: %{{.*}} = call i32 @llvm.nvvm.barrier0.or(i32 %[[redOperand]]) + %1 = nvvm.barrier #nvvm.reduction %redOperand -> i32 + // CHECK: %{{.*}} = call i32 @llvm.nvvm.barrier0.popc(i32 %[[redOperand]]) + %2 = nvvm.barrier #nvvm.reduction %redOperand -> i32 + + llvm.return +} diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir index fec54cbf5e3e5..5cba5c4fceefd 100644 --- a/mlir/test/Target/LLVMIR/nvvmir.mlir +++ b/mlir/test/Target/LLVMIR/nvvmir.mlir @@ -166,25 +166,6 @@ llvm.func @nvvm_rcp(%0: f32) -> f32 { llvm.return %1 : f32 } -// CHECK-LABEL: @llvm_nvvm_barrier0 -llvm.func @llvm_nvvm_barrier0() { - // CHECK: call void @llvm.nvvm.barrier.cta.sync.aligned.all(i32 0) - nvvm.barrier0 - llvm.return -} - -// CHECK-LABEL: @llvm_nvvm_barrier( -// CHECK-SAME: i32 %[[barId:.*]], i32 %[[numThreads:.*]]) -llvm.func @llvm_nvvm_barrier(%barID : i32, %numberOfThreads : i32) { - // CHECK: call void @llvm.nvvm.barrier.cta.sync.aligned.all(i32 0) - nvvm.barrier - // CHECK: call void @llvm.nvvm.barrier.cta.sync.aligned.all(i32 %[[barId]]) - nvvm.barrier id = %barID - // CHECK: call void @llvm.nvvm.barrier.cta.sync.aligned.count(i32 %[[barId]], i32 %[[numThreads]]) - nvvm.barrier id = %barID number_of_threads = %numberOfThreads - llvm.return -} - // CHECK-LABEL: @llvm_nvvm_cluster_arrive llvm.func @llvm_nvvm_cluster_arrive() { // CHECK: call void @llvm.nvvm.barrier.cluster.arrive() diff --git a/mlir/test/lit.cfg.py b/mlir/test/lit.cfg.py index 6ff12d66523f5..4a38ed605be0c 100644 --- a/mlir/test/lit.cfg.py +++ b/mlir/test/lit.cfg.py @@ -208,6 +208,7 @@ def find_real_python_interpreter(): add_runtime("mlir_c_runner_utils"), add_runtime("mlir_async_runtime"), add_runtime("mlir_float16_utils"), + add_runtime("mlir_apfloat_wrappers"), "mlir-linalg-ods-yaml-gen", "mlir-reduce", "mlir-pdll",