diff --git a/clang-tools-extra/clang-tidy/ExpandModularHeadersPPCallbacks.h b/clang-tools-extra/clang-tidy/ExpandModularHeadersPPCallbacks.h index 95216368492ca..d72d021f44838 100644 --- a/clang-tools-extra/clang-tidy/ExpandModularHeadersPPCallbacks.h +++ b/clang-tools-extra/clang-tidy/ExpandModularHeadersPPCallbacks.h @@ -137,7 +137,7 @@ class ExpandModularHeadersPPCallbacks : public PPCallbacks { std::unique_ptr PP; bool EnteredMainFile = false; bool StartedLexing = false; - Token CurrentToken; + Token CurrentToken = Token(); }; } // namespace tooling diff --git a/clang-tools-extra/clang-tidy/bugprone/FloatLoopCounterCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/FloatLoopCounterCheck.cpp index adf2d2b4bcc07..38a0234337756 100644 --- a/clang-tools-extra/clang-tidy/bugprone/FloatLoopCounterCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/FloatLoopCounterCheck.cpp @@ -31,6 +31,7 @@ void FloatLoopCounterCheck::registerMatchers(MatchFinder *Finder) { void FloatLoopCounterCheck::check(const MatchFinder::MatchResult &Result) { const auto *FS = Result.Nodes.getNodeAs("for"); + assert(FS && "FS should not be null"); diag(FS->getInc()->getBeginLoc(), "loop induction expression should not have " "floating-point type") diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsAvoidUncheckedContainerAccessCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsAvoidUncheckedContainerAccessCheck.cpp index 54c4692923949..cf4b445a554e8 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsAvoidUncheckedContainerAccessCheck.cpp +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsAvoidUncheckedContainerAccessCheck.cpp @@ -176,7 +176,7 @@ void ProBoundsAvoidUncheckedContainerAccessCheck::check( } } else if (const auto *MCE = dyn_cast(MatchedExpr)) { // Case: a.operator[](i) or a->operator[](i) - const auto *Callee = dyn_cast(MCE->getCallee()); + const auto *Callee = cast(MCE->getCallee()); if (FixMode == At) { // Cases: a.operator[](i) => a.at(i) and a->operator[](i) => a->at(i) diff --git a/clang/include/clang/AST/ASTConsumer.h b/clang/include/clang/AST/ASTConsumer.h index 447f2592d2359..a1ef187ee2069 100644 --- a/clang/include/clang/AST/ASTConsumer.h +++ b/clang/include/clang/AST/ASTConsumer.h @@ -27,6 +27,7 @@ namespace clang { class VarDecl; class FunctionDecl; class ImportDecl; + class OpenACCRoutineDecl; /// ASTConsumer - This is an abstract interface that should be implemented by /// clients that read ASTs. This abstraction layer allows the client to be @@ -116,6 +117,11 @@ class ASTConsumer { // variable has been instantiated. virtual void HandleCXXStaticMemberVarInstantiation(VarDecl *D) {} + /// Callback to handle the end-of-translation unit attachment of OpenACC + /// routine declaration information. + virtual void HandleOpenACCRoutineReference(const FunctionDecl *FD, + const OpenACCRoutineDecl *RD) {} + /// Callback involved at the end of a translation unit to /// notify the consumer that a vtable for the given C++ class is /// required. diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td index 51b7dd16e5195..77531c31538c1 100644 --- a/clang/include/clang/Basic/arm_mve.td +++ b/clang/include/clang/Basic/arm_mve.td @@ -783,15 +783,15 @@ let params = T.Unsigned in { } let params = T.Float in { def vminnmq: Intrinsic $a, $b)>; + (fminnm $a, $b)>; def vmaxnmq: Intrinsic $a, $b)>; + (fmaxnm $a, $b)>; def vminnmaq: Intrinsic + (fminnm (IRIntBase<"fabs", [Vector]> $a), (IRIntBase<"fabs", [Vector]> $b))>; def vmaxnmaq: Intrinsic + (fmaxnm (IRIntBase<"fabs", [Vector]> $a), (IRIntBase<"fabs", [Vector]> $b))>; } diff --git a/clang/include/clang/Basic/arm_mve_defs.td b/clang/include/clang/Basic/arm_mve_defs.td index 3714262898476..3210549d0cb56 100644 --- a/clang/include/clang/Basic/arm_mve_defs.td +++ b/clang/include/clang/Basic/arm_mve_defs.td @@ -589,6 +589,10 @@ def fsub: strictFPAlt>; def fmul: strictFPAlt>; +def fminnm : strictFPAlt, + IRInt<"vminnm", [Vector]>>; +def fmaxnm : strictFPAlt, + IRInt<"vmaxnm", [Vector]>>; // ----------------------------------------------------------------------------- // Convenience lists of parameter types. 'T' is just a container record, so you diff --git a/clang/include/clang/CIR/CIRGenerator.h b/clang/include/clang/CIR/CIRGenerator.h index 5ea11463ffa9f..31dead2d7b585 100644 --- a/clang/include/clang/CIR/CIRGenerator.h +++ b/clang/include/clang/CIR/CIRGenerator.h @@ -81,6 +81,9 @@ class CIRGenerator : public clang::ASTConsumer { void HandleTagDeclDefinition(clang::TagDecl *d) override; void HandleTagDeclRequiredDefinition(const clang::TagDecl *D) override; void HandleCXXStaticMemberVarInstantiation(clang::VarDecl *D) override; + void + HandleOpenACCRoutineReference(const clang::FunctionDecl *FD, + const clang::OpenACCRoutineDecl *RD) override; void CompleteTentativeDefinition(clang::VarDecl *d) override; void HandleVTable(clang::CXXRecordDecl *rd) override; diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index 4b64fc56c57ad..ae199f35cb10e 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -1173,6 +1173,35 @@ def CIR_SwitchOp : CIR_Op<"switch", [ let hasLLVMLowering = false; } +//===----------------------------------------------------------------------===// +// IsConstantOp +//===----------------------------------------------------------------------===// + +def CIR_IsConstantOp : CIR_Op<"is_constant", [Pure]> { + let summary = "Test for manifest compile-time constant"; + let description = [{ + Returns `true` if the argument is known to be a manifest compile-time + constant otherwise returns `false`. If the argument is a constant expression + which refers to a global (the address of which _is_ a constant, but not + manifest during the compile), then the intrinsic evaluates to `false`. + + This is used to represent `__builtin_constant_p` in cases where the argument + isn't known to be constant during initial translation of the source code but + might be proven to be constant after later optimizations. + + Example: + ``` + %1 = cir.is_constant %2 : !s32i -> !cir.bool + ``` + }]; + let arguments = (ins CIR_AnyType:$val); + let results = (outs CIR_BoolType:$result); + + let assemblyFormat = [{ + $val `:` qualified(type($val)) `->` qualified(type($result)) attr-dict + }]; +} + //===----------------------------------------------------------------------===// // SwitchFlatOp //===----------------------------------------------------------------------===// diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td index 28c609bb8524d..84e3b4d4e59e2 100644 --- a/clang/include/clang/Options/Options.td +++ b/clang/include/clang/Options/Options.td @@ -4870,25 +4870,25 @@ def ggdb3 : Flag<["-"], "ggdb3">, Group; def glldb : Flag<["-"], "glldb">, Group; def gsce : Flag<["-"], "gsce">, Group; def gdbx : Flag<["-"], "gdbx">, Group; -// Equivalent to our default dwarf version. Forces usual dwarf emission when +// Equivalent to our default DWARF version. Forces usual DWARF emission when // CodeView is enabled. def gdwarf : Flag<["-"], "gdwarf">, Group, Visibility<[ClangOption, CLOption, DXCOption, FlangOption]>, - HelpText<"Generate source-level debug information with the default dwarf version">; + HelpText<"Generate source-level debug information with the default DWARF version">; let Visibility = [ClangOption, FlangOption] in { def gdwarf_2 : Flag<["-"], "gdwarf-2">, Group, - HelpText<"Generate source-level debug information with dwarf version 2">; + HelpText<"Generate source-level debug information with DWARF version 2">; def gdwarf_3 : Flag<["-"], "gdwarf-3">, Group, - HelpText<"Generate source-level debug information with dwarf version 3">; + HelpText<"Generate source-level debug information with DWARF version 3">; def gdwarf_4 : Flag<["-"], "gdwarf-4">, Group, - HelpText<"Generate source-level debug information with dwarf version 4">; + HelpText<"Generate source-level debug information with DWARF version 4">; def gdwarf_5 : Flag<["-"], "gdwarf-5">, Group, - HelpText<"Generate source-level debug information with dwarf version 5">; + HelpText<"Generate source-level debug information with DWARF version 5">; def gdwarf_6 : Flag<["-"], "gdwarf-6">, Group, - HelpText<"Generate source-level debug information with dwarf version 6">; + HelpText<"Generate source-level debug information with DWARF version 6">; } def gdwarf64 : Flag<["-"], "gdwarf64">, Group, Visibility<[ClangOption, CC1Option, CC1AsOption]>, @@ -4915,7 +4915,7 @@ def gno_heterogeneous_dwarf : Flag<["-"], "gno-heterogeneous-dwarf">, HelpText<"Disable DWARF extensions for heterogeneous debugging">, Alias, AliasArgs<["disabled"]>; -def gcodeview : Flag<["-"], "gcodeview">, +def gcodeview : Flag<["-"], "gcodeview">, Group, HelpText<"Generate CodeView debug information">, Visibility<[ClangOption, CC1Option, CC1AsOption, CLOption, DXCOption]>, MarshallingInfoFlag>; @@ -4923,17 +4923,20 @@ defm codeview_ghash : BoolOption<"g", "codeview-ghash", CodeGenOpts<"CodeViewGHash">, DefaultFalse, PosFlag, - NegFlag, BothFlags<[], [ClangOption, CLOption, DXCOption]>>; + NegFlag, BothFlags<[], [ClangOption, CLOption, DXCOption]>>, + Group; defm codeview_command_line : BoolOption<"g", "codeview-command-line", CodeGenOpts<"CodeViewCommandLine">, DefaultTrue, PosFlag, NegFlag, - BothFlags<[], [ClangOption, CLOption, DXCOption, CC1Option]>>; + BothFlags<[], [ClangOption, CLOption, DXCOption, CC1Option]>>, + Group; defm inline_line_tables : BoolGOption<"inline-line-tables", CodeGenOpts<"NoInlineLineTables">, DefaultFalse, NegFlag, - PosFlag, BothFlags<[], [ClangOption, CLOption, DXCOption]>>; + PosFlag, BothFlags<[], [ClangOption, CLOption, DXCOption]>>, + Group; def gfull : Flag<["-"], "gfull">, Group; def gused : Flag<["-"], "gused">, Group; @@ -4958,7 +4961,8 @@ defm strict_dwarf : BoolOption<"g", "strict-dwarf", defm omit_unreferenced_methods : BoolGOption<"omit-unreferenced-methods", CodeGenOpts<"DebugOmitUnreferencedMethods">, DefaultFalse, NegFlag, - PosFlag, BothFlags<[], [ClangOption, CLOption, DXCOption]>>; + PosFlag, BothFlags<[], [ClangOption, CLOption, DXCOption]>>, + Group; defm column_info : BoolOption<"g", "column-info", CodeGenOpts<"DebugColumnInfo">, DefaultTrue, NegFlag, @@ -5027,6 +5031,7 @@ defm structor_decl_linkage_names "Attach linkage names to C++ constructor/destructor " "declarations in DWARF.">, BothFlags<[], [ClangOption, CLOption, CC1Option]>>, + Group, DocBrief<[{On some ABIs (e.g., Itanium), constructors and destructors may have multiple variants. Historically, when generating DWARF, Clang did not attach ``DW_AT_linkage_name`` to structor DIEs because there were multiple possible manglings (depending on the structor variant) that could be used. With ``-gstructor-decl-linkage-names``, for ABIs with structor variants, we attach a "unified" mangled name to structor declarations DIEs which debuggers can use to look up all the definitions for a structor declaration. E.g., a "unified" mangled name ``_ZN3FooC4Ev`` may have multiple definitions associated with it such as ``_ZN3FooC1Ev`` and ``_ZN3FooC2Ev``. Enabling this flag results in a better interactive debugging experience (both GDB and LLDB have support for understanding these "unified" linkage names). However, it comes with a significant increase in debug-info size (particularly the `.debug_str` section). As an escape hatch, users can disable this feature using ``-gno-structor-decl-linkage-names``.}]>; @@ -5035,7 +5040,8 @@ defm key_instructions : BoolGOption<"key-instructions", NegFlag, PosFlag, - BothFlags<[], [ClangOption, CLOption, CC1Option]>>; + BothFlags<[], [ClangOption, CLOption, CC1Option]>>, + Group; def headerpad__max__install__names : Joined<["-"], "headerpad_max_install_names">; def help : Flag<["-", "--"], "help">, Visibility<[ClangOption, CC1Option, CC1AsOption, @@ -8690,7 +8696,7 @@ def main_file_name : Separate<["-"], "main-file-name">, Visibility<[CC1Option, CC1AsOption]>, MarshallingInfoString>; def split_dwarf_output : Separate<["-"], "split-dwarf-output">, - HelpText<"File name to use for split dwarf debug info output">, + HelpText<"File name to use for split DWARF debug info output">, Visibility<[CC1Option, CC1AsOption, FC1Option]>, MarshallingInfoString>; @@ -8724,7 +8730,7 @@ def dependent_lib : Joined<["--"], "dependent-lib=">, MarshallingInfoStringVector>; def split_dwarf_file : Separate<["-"], "split-dwarf-file">, - HelpText<"Name of the split dwarf debug info file to encode in the object file">, + HelpText<"Name of the split DWARF debug info file to encode in the object file">, MarshallingInfoString>; } // let Visibility = [CC1Option, FC1Option] diff --git a/clang/include/clang/Sema/SemaOpenACC.h b/clang/include/clang/Sema/SemaOpenACC.h index f751e985ae0ff..b5e3ecab36d22 100644 --- a/clang/include/clang/Sema/SemaOpenACC.h +++ b/clang/include/clang/Sema/SemaOpenACC.h @@ -37,8 +37,16 @@ class Scope; class SemaOpenACC : public SemaBase { public: using DeclGroupPtrTy = OpaquePtr; + using RoutineRefListTy = std::pair; private: + // We save a list of routine clauses that refer to a different function(that + // is, routine-with-a-name) so that we can do the emission at the 'end'. We + // have to do this, since functions can be emitted before they are referenced, + // and the OpenACCRoutineDecl isn't necessarily emitted, as it might be in a + // function/etc. So we do these emits at the end of the TU. + llvm::SmallVector RoutineRefList; + struct ComputeConstructInfo { /// Which type of compute construct we are inside of, which we can use to /// determine whether we should add loops to the above collection. We can @@ -752,6 +760,7 @@ class SemaOpenACC : public SemaBase { }; SemaOpenACC(Sema &S); + void ActOnEndOfTranslationUnit(TranslationUnitDecl *TU); // Called when we encounter a 'while' statement, before looking at its 'body'. void ActOnWhileStmt(SourceLocation WhileLoc); diff --git a/clang/lib/Basic/Targets/AMDGPU.h b/clang/lib/Basic/Targets/AMDGPU.h index a51d8d2375cfe..1d8f27ab915e2 100644 --- a/clang/lib/Basic/Targets/AMDGPU.h +++ b/clang/lib/Basic/Targets/AMDGPU.h @@ -316,8 +316,10 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : public TargetInfo { Opts["cl_amd_media_ops"] = true; Opts["cl_amd_media_ops2"] = true; + // FIXME: Check subtarget for image support. Opts["__opencl_c_images"] = true; Opts["__opencl_c_3d_image_writes"] = true; + Opts["__opencl_c_read_write_images"] = true; Opts["cl_khr_3d_image_writes"] = true; Opts["__opencl_c_program_scope_global_variables"] = true; Opts["__opencl_c_atomic_order_seq_cst"] = true; diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp index e14b5f8aac337..12d93cf4c73c6 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp @@ -542,6 +542,45 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID, return emitCall(e->getCallee()->getType(), CIRGenCallee::forDirect(fnOp), e, returnValue); } + + case Builtin::BI__builtin_constant_p: { + mlir::Type resultType = convertType(e->getType()); + + const Expr *arg = e->getArg(0); + QualType argType = arg->getType(); + // FIXME: The allowance for Obj-C pointers and block pointers is historical + // and likely a mistake. + if (!argType->isIntegralOrEnumerationType() && !argType->isFloatingType() && + !argType->isObjCObjectPointerType() && !argType->isBlockPointerType()) { + // Per the GCC documentation, only numeric constants are recognized after + // inlining. + return RValue::get( + builder.getConstInt(getLoc(e->getSourceRange()), + mlir::cast(resultType), 0)); + } + + if (arg->HasSideEffects(getContext())) { + // The argument is unevaluated, so be conservative if it might have + // side-effects. + return RValue::get( + builder.getConstInt(getLoc(e->getSourceRange()), + mlir::cast(resultType), 0)); + } + + mlir::Value argValue = emitScalarExpr(arg); + if (argType->isObjCObjectPointerType()) { + cgm.errorNYI(e->getSourceRange(), + "__builtin_constant_p: Obj-C object pointer"); + return {}; + } + argValue = builder.createBitcast(argValue, convertType(argType)); + + mlir::Value result = cir::IsConstantOp::create( + builder, getLoc(e->getSourceRange()), argValue); + // IsConstantOp returns a bool, but __builtin_constant_p returns an int. + result = builder.createBoolToInt(result, resultType); + return RValue::get(result); + } case Builtin::BI__builtin_dynamic_object_size: case Builtin::BI__builtin_object_size: { unsigned type = diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index 8d01b7dbd15f6..224a182ed17d1 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -100,6 +100,44 @@ static mlir::Value emitX86MaskAddLogic(CIRGenBuilderTy &builder, return builder.createBitcast(resVec, ops[0].getType()); } +static mlir::Value emitX86MaskUnpack(CIRGenBuilderTy &builder, + mlir::Location loc, + const std::string &intrinsicName, + SmallVectorImpl &ops) { + unsigned numElems = cast(ops[0].getType()).getWidth(); + + // Convert both operands to mask vectors. + mlir::Value lhs = getMaskVecValue(builder, loc, ops[0], numElems); + mlir::Value rhs = getMaskVecValue(builder, loc, ops[1], numElems); + + mlir::Type i32Ty = builder.getSInt32Ty(); + + // Create indices for extracting the first half of each vector. + SmallVector halfIndices; + for (auto i : llvm::seq(0, numElems / 2)) + halfIndices.push_back(cir::IntAttr::get(i32Ty, i)); + + // Extract first half of each vector. This gives better codegen than + // doing it in a single shuffle. + mlir::Value lhsHalf = builder.createVecShuffle(loc, lhs, lhs, halfIndices); + mlir::Value rhsHalf = builder.createVecShuffle(loc, rhs, rhs, halfIndices); + + // Create indices for concatenating the vectors. + // NOTE: Operands are swapped to match the intrinsic definition. + // After the half extraction, both vectors have numElems/2 elements. + // In createVecShuffle(rhsHalf, lhsHalf, indices), indices [0..numElems/2-1] + // select from rhsHalf, and indices [numElems/2..numElems-1] select from + // lhsHalf. + SmallVector concatIndices; + for (auto i : llvm::seq(0, numElems)) + concatIndices.push_back(cir::IntAttr::get(i32Ty, i)); + + // Concat the vectors (RHS first, then LHS). + mlir::Value res = + builder.createVecShuffle(loc, rhsHalf, lhsHalf, concatIndices); + return builder.createBitcast(res, ops[0].getType()); +} + static mlir::Value emitX86MaskLogic(CIRGenBuilderTy &builder, mlir::Location loc, cir::BinOpKind binOpKind, @@ -257,7 +295,15 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, return emitVecInsert(builder, getLoc(expr->getExprLoc()), ops[0], ops[1], ops[2]); } - + case X86::BI__builtin_ia32_kunpckhi: + return emitX86MaskUnpack(builder, getLoc(expr->getExprLoc()), + "x86.avx512.kunpackb", ops); + case X86::BI__builtin_ia32_kunpcksi: + return emitX86MaskUnpack(builder, getLoc(expr->getExprLoc()), + "x86.avx512.kunpackw", ops); + case X86::BI__builtin_ia32_kunpckdi: + return emitX86MaskUnpack(builder, getLoc(expr->getExprLoc()), + "x86.avx512.kunpackd", ops); case X86::BI_mm_setcsr: case X86::BI__builtin_ia32_ldmxcsr: { mlir::Location loc = getLoc(expr->getExprLoc()); @@ -947,9 +993,6 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, getMaskVecValue(builder, getLoc(expr->getExprLoc()), ops[0], numElts); return builder.createBitcast(resVec, ops[0].getType()); } - case X86::BI__builtin_ia32_kunpckdi: - case X86::BI__builtin_ia32_kunpcksi: - case X86::BI__builtin_ia32_kunpckhi: case X86::BI__builtin_ia32_sqrtsh_round_mask: case X86::BI__builtin_ia32_sqrtsd_round_mask: case X86::BI__builtin_ia32_sqrtss_round_mask: diff --git a/clang/lib/CIR/CodeGen/CIRGenClass.cpp b/clang/lib/CIR/CodeGen/CIRGenClass.cpp index c98d9bb0724f6..ca9fe939139cd 100644 --- a/clang/lib/CIR/CodeGen/CIRGenClass.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenClass.cpp @@ -126,8 +126,7 @@ static void emitMemberInitializer(CIRGenFunction &cgf, lhs.isVolatileQualified()); // Ensure that we destroy the objects if an exception is thrown later in // the constructor. - QualType::DestructionKind dtorKind = fieldType.isDestructedType(); - assert(!cgf.needsEHCleanup(dtorKind) && + assert(!cgf.needsEHCleanup(fieldType.isDestructedType()) && "Arrays of non-record types shouldn't need EH cleanup"); return; } diff --git a/clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp b/clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp index d52986db49ea6..a5322ac4e1930 100644 --- a/clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp @@ -287,9 +287,92 @@ void CIRGenModule::emitGlobalOpenACCDeclareDecl(const OpenACCDeclareDecl *d) { } void CIRGenFunction::emitOpenACCRoutine(const OpenACCRoutineDecl &d) { - getCIRGenModule().errorNYI(d.getSourceRange(), "OpenACC Routine Construct"); + // Do nothing here. The OpenACCRoutineDeclAttr handles the implicit name + // cases, and the end-of-TU handling manages the named cases. This is + // necessary because these references aren't necessarily emitted themselves, + // but can be named anywhere. } void CIRGenModule::emitGlobalOpenACCRoutineDecl(const OpenACCRoutineDecl *d) { - errorNYI(d->getSourceRange(), "OpenACC Global Routine Construct"); + // Do nothing here. The OpenACCRoutineDeclAttr handles the implicit name + // cases, and the end-of-TU handling manages the named cases. This is + // necessary because these references aren't necessarily emitted themselves, + // but can be named anywhere. +} + +namespace { +class OpenACCRoutineClauseEmitter final + : public OpenACCClauseVisitor { + CIRGen::CIRGenBuilderTy &builder; + mlir::acc::RoutineOp routineOp; + llvm::SmallVector lastDeviceTypeValues; + +public: + OpenACCRoutineClauseEmitter(CIRGen::CIRGenBuilderTy &builder, + mlir::acc::RoutineOp routineOp) + : builder(builder), routineOp(routineOp) {} + + void emitClauses(ArrayRef clauses) { + this->VisitClauseList(clauses); + } + + void VisitClause(const OpenACCClause &clause) { + llvm_unreachable("Invalid OpenACC clause on routine"); + } + + void VisitSeqClause(const OpenACCSeqClause &clause) { + routineOp.addSeq(builder.getContext(), lastDeviceTypeValues); + } + void VisitWorkerClause(const OpenACCWorkerClause &clause) { + routineOp.addWorker(builder.getContext(), lastDeviceTypeValues); + } + void VisitVectorClause(const OpenACCVectorClause &clause) { + routineOp.addVector(builder.getContext(), lastDeviceTypeValues); + } + + void VisitNoHostClause(const OpenACCNoHostClause &clause) { + routineOp.setNohost(/*attrValue=*/true); + } +}; +} // namespace + +void CIRGenModule::emitOpenACCRoutineDecl( + const clang::FunctionDecl *funcDecl, cir::FuncOp func, + SourceLocation pragmaLoc, ArrayRef clauses) { + mlir::OpBuilder::InsertionGuard guardCase(builder); + // These need to appear at the global module. + builder.setInsertionPointToEnd(&getModule().getBodyRegion().front()); + + mlir::Location routineLoc = getLoc(pragmaLoc); + + std::stringstream routineNameSS; + // This follows the same naming format as Flang. + routineNameSS << "acc_routine_" << routineCounter++; + std::string routineName = routineNameSS.str(); + + // There isn't a good constructor for RoutineOp that just takes a location + + // name + function, so we use one that creates an otherwise RoutineOp and + // count on the visitor/emitter to fill these in. + auto routineOp = mlir::acc::RoutineOp::create( + builder, routineLoc, routineName, + mlir::SymbolRefAttr::get(builder.getContext(), func.getName()), + /*implicit=*/false); + + // We have to add a pointer going the other direction via an acc.routine_info, + // from the func to the routine. + llvm::SmallVector funcRoutines; + if (auto routineInfo = + func.getOperation()->getAttrOfType( + mlir::acc::getRoutineInfoAttrName())) + funcRoutines.append(routineInfo.getAccRoutines().begin(), + routineInfo.getAccRoutines().end()); + + funcRoutines.push_back( + mlir::SymbolRefAttr::get(builder.getContext(), routineName)); + func.getOperation()->setAttr( + mlir::acc::getRoutineInfoAttrName(), + mlir::acc::RoutineInfoAttr::get(func.getContext(), funcRoutines)); + + OpenACCRoutineClauseEmitter emitter{builder, routineOp}; + emitter.emitClauses(clauses); } diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp index 03bbfbffce717..1d8e4a3b444ee 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp @@ -2227,6 +2227,15 @@ CIRGenModule::createCIRFunction(mlir::Location loc, StringRef name, if (!cgf) theModule.push_back(func); + + if (this->getLangOpts().OpenACC) { + // We only have to handle this attribute, since OpenACCAnnotAttrs are + // handled via the end-of-TU work. + for (const auto *attr : + funcDecl->specific_attrs()) + emitOpenACCRoutineDecl(funcDecl, func, attr->getLocation(), + attr->Clauses); + } } return func; } diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.h b/clang/lib/CIR/CodeGen/CIRGenModule.h index 6600d086f8f61..d7aee8ebf4d7a 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.h +++ b/clang/lib/CIR/CodeGen/CIRGenModule.h @@ -461,6 +461,12 @@ class CIRGenModule : public CIRGenTypeCache { OpenACCModifierKind modifiers, bool structured, bool implicit, bool requiresDtor); + // Each of the acc.routine operations must have a unique name, so we just use + // an integer counter. This is how Flang does it, so it seems reasonable. + unsigned routineCounter = 0; + void emitOpenACCRoutineDecl(const clang::FunctionDecl *funcDecl, + cir::FuncOp func, SourceLocation pragmaLoc, + ArrayRef clauses); // C++ related functions. void emitDeclContext(const DeclContext *dc); diff --git a/clang/lib/CIR/CodeGen/CIRGenerator.cpp b/clang/lib/CIR/CodeGen/CIRGenerator.cpp index aa4d9eba35c04..0208eeea7146a 100644 --- a/clang/lib/CIR/CodeGen/CIRGenerator.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenerator.cpp @@ -166,6 +166,18 @@ void CIRGenerator::HandleCXXStaticMemberVarInstantiation(VarDecl *D) { cgm->handleCXXStaticMemberVarInstantiation(D); } +void CIRGenerator::HandleOpenACCRoutineReference(const FunctionDecl *FD, + const OpenACCRoutineDecl *RD) { + llvm::StringRef mangledName = cgm->getMangledName(FD); + cir::FuncOp entry = + mlir::dyn_cast_if_present(cgm->getGlobalValue(mangledName)); + + // if this wasn't generated, don't force it to be. + if (!entry) + return; + cgm->emitOpenACCRoutineDecl(FD, entry, RD->getBeginLoc(), RD->clauses()); +} + void CIRGenerator::CompleteTentativeDefinition(VarDecl *d) { if (diags.hasErrorOccurred()) return; diff --git a/clang/lib/CIR/FrontendAction/CIRGenAction.cpp b/clang/lib/CIR/FrontendAction/CIRGenAction.cpp index 67bb5657d4001..daec8ae409e0f 100644 --- a/clang/lib/CIR/FrontendAction/CIRGenAction.cpp +++ b/clang/lib/CIR/FrontendAction/CIRGenAction.cpp @@ -88,6 +88,11 @@ class CIRGenConsumer : public clang::ASTConsumer { Gen->HandleCXXStaticMemberVarInstantiation(VD); } + void HandleOpenACCRoutineReference(const FunctionDecl *FD, + const OpenACCRoutineDecl *RD) override { + Gen->HandleOpenACCRoutineReference(FD, RD); + } + void HandleInlineFunctionDefinition(FunctionDecl *D) override { Gen->HandleInlineFunctionDefinition(D); } diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index 8e9780754f68f..40e14474890dc 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -3979,6 +3979,13 @@ mlir::LogicalResult CIRToLLVMGetBitfieldOpLowering::matchAndRewrite( return mlir::success(); } +mlir::LogicalResult CIRToLLVMIsConstantOpLowering::matchAndRewrite( + cir::IsConstantOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const { + rewriter.replaceOpWithNewOp(op, adaptor.getVal()); + return mlir::success(); +} + mlir::LogicalResult CIRToLLVMInlineAsmOpLowering::matchAndRewrite( cir::InlineAsmOp op, OpAdaptor adaptor, mlir::ConversionPatternRewriter &rewriter) const { diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 423be1826aa47..c2ee8d0fd1748 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -6356,8 +6356,15 @@ LValue CodeGenFunction::EmitBinaryOperatorLValue(const BinaryOperator *E) { LValue CodeGenFunction::EmitHLSLArrayAssignLValue(const BinaryOperator *E) { // Don't emit an LValue for the RHS because it might not be an LValue LValue LHS = EmitLValue(E->getLHS()); + + // If the RHS is a global resource array, copy all individual resources + // into LHS. + if (E->getRHS()->getType()->isHLSLResourceRecordArray()) + if (CGM.getHLSLRuntime().emitResourceArrayCopy(LHS, E->getRHS(), *this)) + return LHS; + // In C the RHS of an assignment operator is an RValue. - // EmitAggregateAssign takes anan LValue for the RHS. Instead we can call + // EmitAggregateAssign takes an LValue for the RHS. Instead we can call // EmitInitializationToLValue to emit an RValue into an LValue. EmitInitializationToLValue(E->getRHS(), LHS); return LHS; diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp index 0c3701eb01679..f485fdd49e43f 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.cpp +++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp @@ -22,6 +22,7 @@ #include "clang/AST/ASTContext.h" #include "clang/AST/Attrs.inc" #include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" #include "clang/AST/HLSLResource.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/Type.h" @@ -94,6 +95,14 @@ void addRootSignatureMD(llvm::dxbc::RootSignatureVersion RootSigVer, RootSignatureValMD->addOperand(MDVals); } +// Find array variable declaration from DeclRef expression +static const ValueDecl *getArrayDecl(const Expr *E) { + if (const DeclRefExpr *DRE = + dyn_cast_or_null(E->IgnoreImpCasts())) + return DRE->getDecl(); + return nullptr; +} + // Find array variable declaration from nested array subscript AST nodes static const ValueDecl *getArrayDecl(const ArraySubscriptExpr *ASE) { const Expr *E = nullptr; @@ -103,9 +112,7 @@ static const ValueDecl *getArrayDecl(const ArraySubscriptExpr *ASE) { return nullptr; ASE = dyn_cast(E); } - if (const DeclRefExpr *DRE = dyn_cast_or_null(E)) - return DRE->getDecl(); - return nullptr; + return getArrayDecl(E); } // Get the total size of the array, or -1 if the array is unbounded. @@ -1214,12 +1221,13 @@ std::optional CGHLSLRuntime::emitResourceArraySubscriptExpr( ArraySubsExpr->getType()->isHLSLResourceRecordArray()) && "expected resource array subscript expression"); - // Let clang codegen handle local resource array subscripts, + // Let clang codegen handle local and static resource array subscripts, // or when the subscript references on opaque expression (as part of // ArrayInitLoopExpr AST node). const VarDecl *ArrayDecl = dyn_cast_or_null(getArrayDecl(ArraySubsExpr)); - if (!ArrayDecl || !ArrayDecl->hasGlobalStorage()) + if (!ArrayDecl || !ArrayDecl->hasGlobalStorage() || + ArrayDecl->getStorageClass() == SC_Static) return std::nullopt; // get the resource array type @@ -1249,7 +1257,7 @@ std::optional CGHLSLRuntime::emitResourceArraySubscriptExpr( // Find binding info for the resource array. For implicit binding // an HLSLResourceBindingAttr should have been added by SemaHLSL. ResourceBindingAttrs Binding(ArrayDecl); - assert((Binding.hasBinding()) && + assert(Binding.hasBinding() && "resource array must have a binding attribute"); // Find the individual resource type. @@ -1305,6 +1313,49 @@ std::optional CGHLSLRuntime::emitResourceArraySubscriptExpr( return CGF.MakeAddrLValue(TmpVar, ResultTy, AlignmentSource::Decl); } +// If RHSExpr is a global resource array, initialize all of its resources and +// set them into LHS. Returns false if no copy has been performed and the +// array copy should be handled by Clang codegen. +bool CGHLSLRuntime::emitResourceArrayCopy(LValue &LHS, Expr *RHSExpr, + CodeGenFunction &CGF) { + QualType ResultTy = RHSExpr->getType(); + assert(ResultTy->isHLSLResourceRecordArray() && "expected resource array"); + + // Let Clang codegen handle local and static resource array copies. + const VarDecl *ArrayDecl = dyn_cast_or_null(getArrayDecl(RHSExpr)); + if (!ArrayDecl || !ArrayDecl->hasGlobalStorage() || + ArrayDecl->getStorageClass() == SC_Static) + return false; + + // Find binding info for the resource array. For implicit binding + // the HLSLResourceBindingAttr should have been added by SemaHLSL. + ResourceBindingAttrs Binding(ArrayDecl); + assert(Binding.hasBinding() && + "resource array must have a binding attribute"); + + // Find the individual resource type. + ASTContext &AST = ArrayDecl->getASTContext(); + QualType ResTy = AST.getBaseElementType(ResultTy); + const auto *ResArrayTy = cast(ResultTy.getTypePtr()); + + // Use the provided LHS for the result. + AggValueSlot ValueSlot = AggValueSlot::forAddr( + LHS.getAddress(), Qualifiers(), AggValueSlot::IsDestructed_t(true), + AggValueSlot::DoesNotNeedGCBarriers, AggValueSlot::IsAliased_t(false), + AggValueSlot::DoesNotOverlap); + + // Create Value for index and total array size (= range size). + int Size = getTotalArraySize(AST, ResArrayTy); + llvm::Value *Zero = llvm::ConstantInt::get(CGM.IntTy, 0); + llvm::Value *Range = llvm::ConstantInt::get(CGM.IntTy, Size); + + // Initialize individual resources in the array into LHS. + std::optional EndIndex = initializeLocalResourceArray( + CGF, ResTy->getAsCXXRecordDecl(), ResArrayTy, ValueSlot, Range, Zero, + ArrayDecl->getName(), Binding, {Zero}, RHSExpr->getExprLoc()); + return EndIndex.has_value(); +} + std::optional CGHLSLRuntime::emitBufferArraySubscriptExpr( const ArraySubscriptExpr *E, CodeGenFunction &CGF, llvm::function_ref EmitIdxAfterBase) { diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h index 77f43e8766745..c7cd668419d10 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.h +++ b/clang/lib/CodeGen/CGHLSLRuntime.h @@ -258,6 +258,7 @@ class CGHLSLRuntime { std::optional emitResourceArraySubscriptExpr(const ArraySubscriptExpr *E, CodeGenFunction &CGF); + bool emitResourceArrayCopy(LValue &LHS, Expr *RHSExpr, CodeGenFunction &CGF); std::optional emitBufferArraySubscriptExpr( const ArraySubscriptExpr *E, CodeGenFunction &CGF, diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index eda65739771cf..8d14ab7d6dccd 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -5983,7 +5983,8 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, (D->getType()->isHLSLResourceRecord() || D->getType()->isHLSLResourceRecordArray())) { Init = llvm::PoisonValue::get(getTypes().ConvertType(ASTTy)); - NeedsGlobalCtor = D->getType()->isHLSLResourceRecord(); + NeedsGlobalCtor = D->getType()->isHLSLResourceRecord() || + D->getStorageClass() == SC_Static; } else if (D->hasAttr()) { Init = llvm::UndefValue::get(getTypes().ConvertTypeForMem(ASTTy)); } else if (!InitExpr) { diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp index 325f62cf33444..71e74613973c5 100644 --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -1502,6 +1502,9 @@ void Sema::ActOnEndOfTranslationUnit() { if (LangOpts.HLSL) HLSL().ActOnEndOfTranslationUnit(getASTContext().getTranslationUnitDecl()); + if (LangOpts.OpenACC) + OpenACC().ActOnEndOfTranslationUnit( + getASTContext().getTranslationUnitDecl()); // If there were errors, disable 'unused' warnings since they will mostly be // noise. Don't warn for a use from a module: either we should warn on all diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 89645e3b67db3..82b964c478dc7 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -4018,7 +4018,9 @@ void SemaHLSL::ActOnVariableDeclarator(VarDecl *VD) { // process explicit bindings processExplicitBindingsOnDecl(VD); - if (VD->getType()->isHLSLResourceRecordArray()) { + // Add implicit binding attribute to non-static resource arrays. + if (VD->getType()->isHLSLResourceRecordArray() && + VD->getStorageClass() != SC_Static) { // If the resource array does not have an explicit binding attribute, // create an implicit one. It will be used to transfer implicit binding // order_ID to codegen. @@ -4212,8 +4214,8 @@ bool SemaHLSL::ActOnUninitializedVarDecl(VarDecl *VD) { if (VD->getType().getAddressSpace() == LangAS::hlsl_constant) return true; - // Initialize resources at the global scope - if (VD->hasGlobalStorage()) { + // Initialize non-static resources at the global scope. + if (VD->hasGlobalStorage() && VD->getStorageClass() != SC_Static) { const Type *Ty = VD->getType().getTypePtr(); if (Ty->isHLSLResourceRecord()) return initGlobalResourceDecl(VD); @@ -4237,10 +4239,10 @@ bool SemaHLSL::CheckResourceBinOp(BinaryOperatorKind Opc, Expr *LHSExpr, while (auto *ASE = dyn_cast(E)) E = ASE->getBase()->IgnoreParenImpCasts(); - // Report error if LHS is a resource declared at a global scope. + // Report error if LHS is a non-static resource declared at a global scope. if (DeclRefExpr *DRE = dyn_cast(E->IgnoreParens())) { if (VarDecl *VD = dyn_cast(DRE->getDecl())) { - if (VD->hasGlobalStorage()) { + if (VD->hasGlobalStorage() && VD->getStorageClass() != SC_Static) { // assignment to global resource is not allowed SemaRef.Diag(Loc, diag::err_hlsl_assign_to_global_resource) << VD; SemaRef.Diag(VD->getLocation(), diag::note_var_declared_here) << VD; diff --git a/clang/lib/Sema/SemaOpenACC.cpp b/clang/lib/Sema/SemaOpenACC.cpp index f0f3832e160cd..1115efbb8305c 100644 --- a/clang/lib/Sema/SemaOpenACC.cpp +++ b/clang/lib/Sema/SemaOpenACC.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "clang/Sema/SemaOpenACC.h" +#include "clang/AST/ASTConsumer.h" #include "clang/AST/DeclOpenACC.h" #include "clang/AST/StmtOpenACC.h" #include "clang/Basic/DiagnosticSema.h" @@ -2457,7 +2458,8 @@ OpenACCRoutineDecl *SemaOpenACC::CheckRoutineDecl( ArrayRef Clauses, SourceLocation EndLoc) { assert(LParenLoc.isValid()); - if (FunctionDecl *FD = getFunctionFromRoutineName(FuncRef)) { + FunctionDecl *FD = nullptr; + if ((FD = getFunctionFromRoutineName(FuncRef))) { // OpenACC 3.3 2.15: // In C and C++, function static variables are not supported in functions to // which a routine directive applies. @@ -2509,11 +2511,9 @@ OpenACCRoutineDecl *SemaOpenACC::CheckRoutineDecl( {DirLoc, BindLoc}); FD->addAttr(RAA); // In case we are referencing not the 'latest' version, make sure we add - // the attribute to all declarations. - while (FD != FD->getMostRecentDecl()) { - FD = FD->getMostRecentDecl(); - FD->addAttr(RAA); - } + // the attribute to all declarations after the 'found' one. + for (auto *CurFD : FD->redecls()) + CurFD->addAttr(RAA->clone(getASTContext())); } LastRoutineDecl = OpenACCRoutineDecl::Create( @@ -2522,9 +2522,20 @@ OpenACCRoutineDecl *SemaOpenACC::CheckRoutineDecl( LastRoutineDecl->setAccess(AS_public); getCurContext()->addDecl(LastRoutineDecl); + if (FD) { + // Add this attribute to the list of annotations so that codegen can visit + // it later. FD doesn't necessarily exist, but that case should be + // diagnosed. + RoutineRefList.emplace_back(FD, LastRoutineDecl); + } return LastRoutineDecl; } +void SemaOpenACC::ActOnEndOfTranslationUnit(TranslationUnitDecl *TU) { + for (auto [FD, RoutineDecl] : RoutineRefList) + SemaRef.Consumer.HandleOpenACCRoutineReference(FD, RoutineDecl); +} + DeclGroupRef SemaOpenACC::ActOnEndRoutineDeclDirective( SourceLocation StartLoc, SourceLocation DirLoc, SourceLocation LParenLoc, Expr *ReferencedFunc, SourceLocation RParenLoc, diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c index 4863ba0bd8848..774e1452d10fa 100644 --- a/clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c +++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c @@ -465,3 +465,57 @@ __mmask64 test_kmov_q(__mmask64 A) { return __builtin_ia32_kmovq(A); } + +__mmask32 test_mm512_kunpackw(__mmask32 A, __mmask32 B) { + // CIR-LABEL: _mm512_kunpackw + // CIR: cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x !cir.int> + // CIR: cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x !cir.int> + // CIR: cir.vec.shuffle + // CIR: cir.vec.shuffle + // CIR: cir.vec.shuffle + // CIR: cir.cast bitcast {{.*}} : !cir.vector<32 x !cir.int> -> !u32i + + // LLVM-LABEL: _mm512_kunpackw + // LLVM: [[A_VEC:%.*]] = bitcast i32 %{{.*}} to <32 x i1> + // LLVM: [[B_VEC:%.*]] = bitcast i32 %{{.*}} to <32 x i1> + // LLVM: [[A_HALF:%.*]] = shufflevector <32 x i1> [[A_VEC]], <32 x i1> [[A_VEC]], <16 x i32> + // LLVM: [[B_HALF:%.*]] = shufflevector <32 x i1> [[B_VEC]], <32 x i1> [[B_VEC]], <16 x i32> + // LLVM: [[RES:%.*]] = shufflevector <16 x i1> [[B_HALF]], <16 x i1> [[A_HALF]], <32 x i32> + // LLVM: bitcast <32 x i1> [[RES]] to i32 + + // OGCG-LABEL: _mm512_kunpackw + // OGCG: [[A_VEC:%.*]] = bitcast i32 %{{.*}} to <32 x i1> + // OGCG: [[B_VEC:%.*]] = bitcast i32 %{{.*}} to <32 x i1> + // OGCG: [[A_HALF:%.*]] = shufflevector <32 x i1> [[A_VEC]], <32 x i1> [[A_VEC]], <16 x i32> + // OGCG: [[B_HALF:%.*]] = shufflevector <32 x i1> [[B_VEC]], <32 x i1> [[B_VEC]], <16 x i32> + // OGCG: [[RES:%.*]] = shufflevector <16 x i1> [[B_HALF]], <16 x i1> [[A_HALF]], <32 x i32> + // OGCG: bitcast <32 x i1> [[RES]] to i32 + return _mm512_kunpackw(A, B); +} + +__mmask64 test_mm512_kunpackd(__mmask64 A, __mmask64 B) { + // CIR-LABEL: _mm512_kunpackd + // CIR: cir.cast bitcast {{.*}} : !u64i -> !cir.vector<64 x !cir.int> + // CIR: cir.cast bitcast {{.*}} : !u64i -> !cir.vector<64 x !cir.int> + // CIR: cir.vec.shuffle + // CIR: cir.vec.shuffle + // CIR: cir.vec.shuffle + // CIR: cir.cast bitcast {{.*}} : !cir.vector<64 x !cir.int> -> !u64i + + // LLVM-LABEL: _mm512_kunpackd + // LLVM: [[A_VEC:%.*]] = bitcast i64 %{{.*}} to <64 x i1> + // LLVM: [[B_VEC:%.*]] = bitcast i64 %{{.*}} to <64 x i1> + // LLVM: [[A_HALF:%.*]] = shufflevector <64 x i1> [[A_VEC]], <64 x i1> [[A_VEC]], <32 x i32> + // LLVM: [[B_HALF:%.*]] = shufflevector <64 x i1> [[B_VEC]], <64 x i1> [[B_VEC]], <32 x i32> + // LLVM: [[RES:%.*]] = shufflevector <32 x i1> [[B_HALF]], <32 x i1> [[A_HALF]], <64 x i32> + // LLVM: bitcast <64 x i1> [[RES]] to i64 + + // OGCG-LABEL: _mm512_kunpackd + // OGCG: [[A_VEC:%.*]] = bitcast i64 %{{.*}} to <64 x i1> + // OGCG: [[B_VEC:%.*]] = bitcast i64 %{{.*}} to <64 x i1> + // OGCG: [[A_HALF:%.*]] = shufflevector <64 x i1> [[A_VEC]], <64 x i1> [[A_VEC]], <32 x i32> + // OGCG: [[B_HALF:%.*]] = shufflevector <64 x i1> [[B_VEC]], <64 x i1> [[B_VEC]], <32 x i32> + // OGCG: [[RES:%.*]] = shufflevector <32 x i1> [[B_HALF]], <32 x i1> [[A_HALF]], <64 x i32> + // OGCG: bitcast <64 x i1> [[RES]] to i64 + return _mm512_kunpackd(A, B); +} diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c index 9d957f5de554d..e03109510a931 100644 --- a/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c +++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c @@ -228,6 +228,33 @@ __mmask16 test_kmov_w(__mmask16 A) { // OGCG: bitcast <16 x i1> {{.*}} to i16 return __builtin_ia32_kmovw(A); } + +__mmask16 test_mm512_kunpackb(__mmask16 A, __mmask16 B) { + // CIR-LABEL: _mm512_kunpackb + // CIR: cir.cast bitcast {{.*}} : !u16i -> !cir.vector<16 x !cir.int> + // CIR: cir.cast bitcast {{.*}} : !u16i -> !cir.vector<16 x !cir.int> + // CIR: cir.vec.shuffle + // CIR: cir.vec.shuffle + // CIR: cir.vec.shuffle + // CIR: cir.cast bitcast {{.*}} : !cir.vector<16 x !cir.int> -> !u16i + + // LLVM-LABEL: _mm512_kunpackb + // LLVM: [[A_VEC:%.*]] = bitcast i16 %{{.*}} to <16 x i1> + // LLVM: [[B_VEC:%.*]] = bitcast i16 %{{.*}} to <16 x i1> + // LLVM: [[A_HALF:%.*]] = shufflevector <16 x i1> [[A_VEC]], <16 x i1> [[A_VEC]], <8 x i32> + // LLVM: [[B_HALF:%.*]] = shufflevector <16 x i1> [[B_VEC]], <16 x i1> [[B_VEC]], <8 x i32> + // LLVM: [[RES:%.*]] = shufflevector <8 x i1> [[B_HALF]], <8 x i1> [[A_HALF]], <16 x i32> + // LLVM: bitcast <16 x i1> [[RES]] to i16 + + // OGCG-LABEL: _mm512_kunpackb + // OGCG: [[A_VEC:%.*]] = bitcast i16 %{{.*}} to <16 x i1> + // OGCG: [[B_VEC:%.*]] = bitcast i16 %{{.*}} to <16 x i1> + // OGCG: [[A_HALF:%.*]] = shufflevector <16 x i1> [[A_VEC]], <16 x i1> [[A_VEC]], <8 x i32> + // OGCG: [[B_HALF:%.*]] = shufflevector <16 x i1> [[B_VEC]], <16 x i1> [[B_VEC]], <8 x i32> + // OGCG: [[RES:%.*]] = shufflevector <8 x i1> [[B_HALF]], <8 x i1> [[A_HALF]], <16 x i32> + // OGCG: bitcast <16 x i1> [[RES]] to i16 + return _mm512_kunpackb(A, B); +} __m256 test_mm512_i64gather_ps(__m512i __index, void const *__addr) { // CIR-LABEL: test_mm512_i64gather_ps // CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather.qps.512" diff --git a/clang/test/CIR/CodeGenBuiltins/builtin-constant-p.c b/clang/test/CIR/CodeGenBuiltins/builtin-constant-p.c new file mode 100644 index 0000000000000..d684659216cba --- /dev/null +++ b/clang/test/CIR/CodeGenBuiltins/builtin-constant-p.c @@ -0,0 +1,281 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-cir %s -o %t.cir +// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll +// RUN: FileCheck --input-file=%t-cir.ll %s -check-prefix=LLVM +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll +// RUN: FileCheck --input-file=%t.ll %s -check-prefix=OGCG + +int a = 42; + +/* --- Compound literals */ + +struct foo { int x, y; }; + +int y; +struct foo f = (struct foo){ __builtin_constant_p(y), 42 }; + +// CIR: cir.global external @f = #cir.const_record<{#cir.int<0> : !s32i, #cir.int<42> : !s32i}> : !rec_foo +// LLVM: @f = global %struct.foo { i32 0, i32 42 } +// OGCG: @f = global %struct.foo { i32 0, i32 42 } + +struct foo test0(int expr) { + struct foo f = (struct foo){ __builtin_constant_p(expr), 42 }; + return f; +} + +// CIR: cir.func {{.*}} @test0(%[[ARG0:.*]]: !s32i {{.*}}) -> !rec_foo +// CIR: %[[EXPR_ADDR:.*]] = cir.alloca !s32i, !cir.ptr, ["expr", init] +// CIR: cir.store %[[ARG0]], %[[EXPR_ADDR]] +// CIR: %[[EXPR:.*]] = cir.load{{.*}} %[[EXPR_ADDR]] +// CIR: %[[IS_CONSTANT:.*]] = cir.is_constant %[[EXPR]] : !s32i -> !cir.bool + +// LLVM: define{{.*}} %struct.foo @test0(i32 %[[ARG0:.*]]) +// LLVM: %[[EXPR_ADDR:.*]] = alloca i32 +// LLVM: store i32 %[[ARG0]], ptr %[[EXPR_ADDR]] +// LLVM: %[[EXPR:.*]] = load i32, ptr %[[EXPR_ADDR]] +// LLVM: %[[IS_CONSTANT:.*]] = call i1 @llvm.is.constant.i32(i32 %[[EXPR]]) + +// OGCG: define{{.*}} i64 @test0(i32 {{.*}} %[[ARG0:.*]]) +// OGCG: %[[EXPR_ADDR:.*]] = alloca i32 +// OGCG: store i32 %[[ARG0]], ptr %[[EXPR_ADDR]] +// OGCG: %[[EXPR:.*]] = load i32, ptr %[[EXPR_ADDR]] +// OGCG: %[[IS_CONSTANT:.*]] = call i1 @llvm.is.constant.i32(i32 %[[EXPR]]) + +/* --- Pointer types */ + +int test1(void) { + return __builtin_constant_p(&a - 13); +} + +// CIR: cir.func {{.*}} @test1() -> !s32i +// CIR: %[[TMP1:.*]] = cir.alloca !s32i, !cir.ptr, ["__retval"] +// CIR: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i +// CIR: cir.store %[[ZERO]], %[[TMP1]] : !s32i, !cir.ptr +// CIR: %[[TMP2:.*]] = cir.load %[[TMP1]] : !cir.ptr, !s32i +// CIR: cir.return %[[TMP2]] : !s32i + +// LLVM: define{{.*}} i32 @test1() +// LLVM: %[[TMP1:.*]] = alloca i32 +// LLVM: store i32 0, ptr %[[TMP1]] +// LLVM: %[[TMP2:.*]] = load i32, ptr %[[TMP1]] +// LLVM: ret i32 %[[TMP2]] + +// OGCG: define{{.*}} i32 @test1() +// OGCG: ret i32 0 + +/* --- Aggregate types */ + +int b[] = {1, 2, 3}; + +int test2(void) { + return __builtin_constant_p(b); +} + +// CIR: cir.func {{.*}} @test2() -> !s32i +// CIR: %[[TMP1:.*]] = cir.alloca !s32i, !cir.ptr, ["__retval"] +// CIR: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i +// CIR: cir.store %[[ZERO]], %[[TMP1]] : !s32i, !cir.ptr +// CIR: %[[TMP2:.*]] = cir.load %[[TMP1]] : !cir.ptr, !s32i +// CIR: cir.return %[[TMP2]] : !s32i + +// LLVM: define{{.*}} i32 @test2() +// LLVM: %[[TMP1:.*]] = alloca i32 +// LLVM: store i32 0, ptr %[[TMP1]] +// LLVM: %[[TMP2:.*]] = load i32, ptr %[[TMP1]] +// LLVM: ret i32 %[[TMP2]] + +// OGCG: define{{.*}} i32 @test2() +// OGCG: ret i32 0 + +const char test3_c[] = {1, 2, 3, 0}; + +int test3(void) { + return __builtin_constant_p(test3_c); +} + +// CIR: cir.func {{.*}} @test3() -> !s32i +// CIR: %[[TMP1:.*]] = cir.alloca !s32i, !cir.ptr, ["__retval"] +// CIR: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i +// CIR: cir.store %[[ZERO]], %[[TMP1]] : !s32i, !cir.ptr +// CIR: %[[TMP2:.*]] = cir.load %[[TMP1]] : !cir.ptr, !s32i +// CIR: cir.return %[[TMP2]] : !s32i + +// LLVM: define{{.*}} i32 @test3() +// LLVM: %[[TMP1:.*]] = alloca i32 +// LLVM: store i32 0, ptr %[[TMP1]] +// LLVM: %[[TMP2:.*]] = load i32, ptr %[[TMP1]] +// LLVM: ret i32 %[[TMP2]] + +// OGCG: define{{.*}} i32 @test3() +// OGCG: ret i32 0 + +inline char test4_i(const char *x) { + return x[1]; +} + +int test4(void) { + return __builtin_constant_p(test4_i(test3_c)); +} + +// CIR: cir.func {{.*}} @test4() -> !s32i +// CIR: %[[TMP1:.*]] = cir.alloca !s32i, !cir.ptr, ["__retval"] +// CIR: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i +// CIR: cir.store %[[ZERO]], %[[TMP1]] : !s32i, !cir.ptr +// CIR: %[[TMP2:.*]] = cir.load %[[TMP1]] : !cir.ptr, !s32i +// CIR: cir.return %[[TMP2]] : !s32i + +// LLVM: define{{.*}} i32 @test4() +// LLVM: %[[TMP1:.*]] = alloca i32 +// LLVM: store i32 0, ptr %[[TMP1]] +// LLVM: %[[TMP2:.*]] = load i32, ptr %[[TMP1]] +// LLVM: ret i32 %[[TMP2]] + +// OGCG: define{{.*}} i32 @test4() +// OGCG: ret i32 0 + +/* --- Constant global variables */ + +const int c = 42; + +int test5(void) { + return __builtin_constant_p(c); +} + +// CIR: cir.func {{.*}} @test5() -> !s32i +// CIR: %[[TMP1:.*]] = cir.alloca !s32i, !cir.ptr, ["__retval"] +// CIR: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i +// CIR: cir.store %[[ONE]], %[[TMP1]] : !s32i, !cir.ptr +// CIR: %[[TMP2:.*]] = cir.load %[[TMP1]] : !cir.ptr, !s32i +// CIR: cir.return %[[TMP2]] : !s32i + +// LLVM: define{{.*}} i32 @test5() +// LLVM: %[[TMP1:.*]] = alloca i32 +// LLVM: store i32 1, ptr %[[TMP1]] +// LLVM: %[[TMP2:.*]] = load i32, ptr %[[TMP1]] +// LLVM: ret i32 %[[TMP2]] + +// OGCG: define{{.*}} i32 @test5() +// OGCG: ret i32 1 + +/* --- Array types */ + +int arr[] = { 1, 2, 3 }; + +int test6(void) { + return __builtin_constant_p(arr[2]); +} + +// CIR: cir.func {{.*}} @test6() -> !s32i +// CIR: %[[TWO:.*]] = cir.const #cir.int<2> : !s32i +// CIR: %[[ARR:.*]] = cir.get_global @arr : !cir.ptr> +// CIR: %[[ARR_PTR:.*]] = cir.cast array_to_ptrdecay %[[ARR]] : !cir.ptr> -> !cir.ptr +// CIR: %[[ELE_PTR:.*]] = cir.ptr_stride %[[ARR_PTR]], %[[TWO]] : (!cir.ptr, !s32i) -> !cir.ptr +// CIR: %[[ELE:.*]] = cir.load{{.*}} %[[ELE_PTR]] : !cir.ptr, !s32i +// CIR: %[[IS_CONSTANT:.*]] = cir.is_constant %[[ELE]] : !s32i -> !cir.bool + +// LLVM: define {{.*}} i32 @test6() +// LLVM: %[[TMP1:.*]] = load i32, ptr getelementptr inbounds nuw (i8, ptr @arr, i64 8) +// LLVM: %[[TMP2:.*]] = call i1 @llvm.is.constant.i32(i32 %[[TMP1]]) + +// OGCG: define {{.*}} i32 @test6() +// OGCG: %[[TMP1:.*]] = load i32, ptr getelementptr inbounds ([3 x i32], ptr @arr, i64 0, i64 2) +// OGCG: %[[TMP2:.*]] = call i1 @llvm.is.constant.i32(i32 %[[TMP1]]) + +const int c_arr[] = { 1, 2, 3 }; + +int test7(void) { + return __builtin_constant_p(c_arr[2]); +} + +// CIR: cir.func {{.*}} @test7() -> !s32i +// CIR: %[[TMP1:.*]] = cir.alloca !s32i, !cir.ptr, ["__retval"] +// CIR: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i +// CIR: cir.store %[[ONE]], %[[TMP1]] : !s32i, !cir.ptr +// CIR: %[[TMP2:.*]] = cir.load %[[TMP1]] : !cir.ptr, !s32i +// CIR: cir.return %[[TMP2]] : !s32i + +// LLVM: define{{.*}} i32 @test7() +// LLVM: %[[TMP1:.*]] = alloca i32 +// LLVM: store i32 1, ptr %[[TMP1]] +// LLVM: %[[TMP2:.*]] = load i32, ptr %[[TMP1]] +// LLVM: ret i32 %[[TMP2]] + +// OGCG: define{{.*}} i32 @test7() +// OGCG: ret i32 1 + +int test8(void) { + return __builtin_constant_p(c_arr); +} + +// CIR: cir.func {{.*}} @test8() -> !s32i +// CIR: %[[TMP1:.*]] = cir.alloca !s32i, !cir.ptr, ["__retval"] +// CIR: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i +// CIR: cir.store %[[ZERO]], %[[TMP1]] : !s32i, !cir.ptr +// CIR: %[[TMP2:.*]] = cir.load %[[TMP1]] : !cir.ptr, !s32i +// CIR: cir.return %[[TMP2]] : !s32i + +// LLVM: define{{.*}} i32 @test8() +// LLVM: %[[TMP1:.*]] = alloca i32 +// LLVM: store i32 0, ptr %[[TMP1]] +// LLVM: %[[TMP2:.*]] = load i32, ptr %[[TMP1]] +// LLVM: ret i32 %[[TMP2]] + +// OGCG: define{{.*}} i32 @test8() +// OGCG: ret i32 0 + +/* --- Function pointers */ + +int test9(void) { + return __builtin_constant_p(&test9); +} + +// CIR: cir.func {{.*}} @test9() -> !s32i +// CIR: %[[TMP1:.*]] = cir.alloca !s32i, !cir.ptr, ["__retval"] +// CIR: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i +// CIR: cir.store %[[ZERO]], %[[TMP1]] : !s32i, !cir.ptr +// CIR: %[[TMP2:.*]] = cir.load %[[TMP1]] : !cir.ptr, !s32i +// CIR: cir.return %[[TMP2]] : !s32i + +// LLVM: define{{.*}} i32 @test9() +// LLVM: %[[TMP1:.*]] = alloca i32 +// LLVM: store i32 0, ptr %[[TMP1]] +// LLVM: %[[TMP2:.*]] = load i32, ptr %[[TMP1]] +// LLVM: ret i32 %[[TMP2]] + +// OGCG: define{{.*}} i32 @test9() +// OGCG: ret i32 0 + +int test10(void) { + return __builtin_constant_p(&test10 != 0); +} + +// CIR: cir.func {{.*}} @test10() -> !s32i +// CIR: %[[TMP1:.*]] = cir.alloca !s32i, !cir.ptr, ["__retval"] +// CIR: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i +// CIR: cir.store %[[ONE]], %[[TMP1]] : !s32i, !cir.ptr +// CIR: %[[TMP2:.*]] = cir.load %[[TMP1]] : !cir.ptr, !s32i +// CIR: cir.return %[[TMP2]] : !s32i + +// LLVM: define{{.*}} i32 @test10() +// LLVM: %[[TMP1:.*]] = alloca i32 +// LLVM: store i32 1, ptr %[[TMP1]] +// LLVM: %[[TMP2:.*]] = load i32, ptr %[[TMP1]] +// LLVM: ret i32 %[[TMP2]] + +// OGCG: define{{.*}} i32 @test10() +// OGCG: ret i32 1 + +int test11_f(void); +void test11(void) { + int a, b; + (void)__builtin_constant_p((a = b, test11_f())); +} + +// CIR: cir.func {{.*}} @test11() +// CIR-NOT: call {{.*}}test11_f + +// LLVM: define{{.*}} void @test11() +// LLVM-NOT: call {{.*}}test11_f + +// OGCG: define{{.*}} void @test11() +// OGCG-NOT: call {{.*}}test11_f diff --git a/clang/test/CIR/CodeGenOpenACC/openacc-not-implemented-global.cpp b/clang/test/CIR/CodeGenOpenACC/openacc-not-implemented-global.cpp deleted file mode 100644 index a5e4694c6f5e6..0000000000000 --- a/clang/test/CIR/CodeGenOpenACC/openacc-not-implemented-global.cpp +++ /dev/null @@ -1,6 +0,0 @@ -// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fopenacc -fclangir -emit-cir %s -o %t.cir -verify -// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fopenacc -fclangir -emit-llvm %s -o %t-cir.ll -verify - -void foo() {} -// expected-error@+1{{ClangIR code gen Not Yet Implemented: OpenACC Global Routine Construct}} -#pragma acc routine(foo) seq diff --git a/clang/test/CIR/CodeGenOpenACC/routine-anon-ns.cpp b/clang/test/CIR/CodeGenOpenACC/routine-anon-ns.cpp new file mode 100644 index 0000000000000..7c0a2edee5257 --- /dev/null +++ b/clang/test/CIR/CodeGenOpenACC/routine-anon-ns.cpp @@ -0,0 +1,27 @@ +// RUN: %clang_cc1 -fopenacc -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir %s -o - | FileCheck %s + +namespace { +#pragma acc routine seq + void NSFunc1(){} +#pragma acc routine seq + auto Lambda1 = [](){}; + + auto Lambda2 = [](){}; +} // namespace + +#pragma acc routine(NSFunc1) seq +#pragma acc routine(Lambda2) seq +void force_emit() { + NSFunc1(); + Lambda1(); + Lambda2(); +} + +// CHECK: cir.func{{.*}} @[[F1_NAME:[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[F1_R_NAME:.*]], @[[F1_R2_NAME:.*]]]>} +// CHECK: cir.func {{.*}}lambda{{.*}} @[[L1_NAME:[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[L1_R_NAME:.*]]]>} +// CHECK: cir.func {{.*}}lambda{{.*}} @[[L2_NAME:[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[L2_R_NAME:.*]]]>} +// +// CHECK: acc.routine @[[F1_R_NAME]] func(@[[F1_NAME]]) seq +// CHECK: acc.routine @[[L1_R_NAME]] func(@[[L1_NAME]]) seq +// CHECK: acc.routine @[[F1_R2_NAME]] func(@[[F1_NAME]]) seq +// CHECK: acc.routine @[[L2_R_NAME]] func(@[[L2_NAME]]) seq diff --git a/clang/test/CIR/CodeGenOpenACC/routine-clauses.cpp b/clang/test/CIR/CodeGenOpenACC/routine-clauses.cpp new file mode 100644 index 0000000000000..81437e7e02ab1 --- /dev/null +++ b/clang/test/CIR/CodeGenOpenACC/routine-clauses.cpp @@ -0,0 +1,38 @@ +// RUN: %clang_cc1 -fopenacc -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir %s -o - | FileCheck %s + +#pragma acc routine seq nohost +void Func1() {} + +void Func2() {} +#pragma acc routine(Func2) seq + +#pragma acc routine worker +void Func3() {} + +void Func4() {} +#pragma acc routine(Func4) worker nohost + +#pragma acc routine nohost vector +void Func5() {} + +void Func6() {} +#pragma acc routine(Func6) nohost vector + +// CHECK: cir.func{{.*}} @[[F1_NAME:.*Func1[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[F1_R_NAME:.*]]]>} +// CHECK: acc.routine @[[F1_R_NAME]] func(@[[F1_NAME]]) seq nohost + +// CHECK: cir.func{{.*}} @[[F2_NAME:.*Func2[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[F2_R_NAME:.*]]]>} + +// CHECK: cir.func{{.*}} @[[F3_NAME:.*Func3[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[F3_R_NAME:.*]]]>} +// CHECK: acc.routine @[[F3_R_NAME]] func(@[[F3_NAME]]) worker + +// CHECK: cir.func{{.*}} @[[F4_NAME:.*Func4[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[F4_R_NAME:.*]]]>} + +// CHECK: cir.func{{.*}} @[[F5_NAME:.*Func5[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[F5_R_NAME:.*]]]>} +// CHECK: acc.routine @[[F5_R_NAME]] func(@[[F5_NAME]]) vector + +// CHECK: cir.func{{.*}} @[[F6_NAME:.*Func6[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[F6_R_NAME:.*]]]>} + +// CHECK: acc.routine @[[F2_R_NAME]] func(@[[F2_NAME]]) seq +// CHECK: acc.routine @[[F4_R_NAME]] func(@[[F4_NAME]]) worker nohost +// CHECK: acc.routine @[[F6_R_NAME]] func(@[[F6_NAME]]) vector nohost diff --git a/clang/test/CIR/CodeGenOpenACC/routine-globals.cpp b/clang/test/CIR/CodeGenOpenACC/routine-globals.cpp new file mode 100644 index 0000000000000..5f125bbce6cb8 --- /dev/null +++ b/clang/test/CIR/CodeGenOpenACC/routine-globals.cpp @@ -0,0 +1,35 @@ +// RUN: %clang_cc1 -fopenacc -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir %s -o - | FileCheck %s + +#pragma acc routine seq +auto Lambda1 = [](){}; + +auto Lambda2 = [](){}; +#pragma acc routine(Lambda2) seq +#pragma acc routine(Lambda2) seq + +#pragma acc routine seq +int GlobalFunc1(); + +int GlobalFunc2(); +#pragma acc routine(GlobalFunc2) seq +#pragma acc routine(GlobalFunc1) seq + +void force_emit() { + Lambda1(); + Lambda2(); + GlobalFunc1(); + GlobalFunc2(); +} + +// CHECK: cir.func {{.*}}lambda{{.*}} @[[L1_NAME:[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[L1_R_NAME:.*]]]>} +// CHECK: cir.func {{.*}}lambda{{.*}} @[[L2_NAME:[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[L2_R_NAME:.*]], @[[L2_R2_NAME:.*]]]>} +// +// CHECK: cir.func{{.*}} @[[G1_NAME:[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[G1_R_NAME:.*]], @[[G1_R2_NAME:.*]]]>} +// CHECK: cir.func{{.*}} @[[G2_NAME:[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[G2_R_NAME:.*]]]>} + +// CHECK: acc.routine @[[L1_R_NAME]] func(@[[L1_NAME]]) seq +// CHECK: acc.routine @[[G1_R_NAME]] func(@[[G1_NAME]]) seq +// CHECK: acc.routine @[[L2_R_NAME]] func(@[[L2_NAME]]) seq +// CHECK: acc.routine @[[L2_R2_NAME]] func(@[[L2_NAME]]) seq +// CHECK: acc.routine @[[G2_R_NAME]] func(@[[G2_NAME]]) seq +// CHECK: acc.routine @[[G1_R2_NAME]] func(@[[G1_NAME]]) seq diff --git a/clang/test/CIR/CodeGenOpenACC/routine-globals2.cpp b/clang/test/CIR/CodeGenOpenACC/routine-globals2.cpp new file mode 100644 index 0000000000000..e1aa5046684da --- /dev/null +++ b/clang/test/CIR/CodeGenOpenACC/routine-globals2.cpp @@ -0,0 +1,44 @@ +// RUN: %clang_cc1 -fopenacc -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir %s -o - | FileCheck %s + +#pragma acc routine seq +void GlobalFunc4(); +#pragma acc routine(GlobalFunc4) seq + +#pragma acc routine seq +#pragma acc routine seq +void GlobalFunc5(); +#pragma acc routine(GlobalFunc5) seq +#pragma acc routine(GlobalFunc5) seq + +void GlobalFunc6(); +void GlobalFunc6(); +#pragma acc routine(GlobalFunc6) seq +void GlobalFunc6(){} + +void GlobalFunc7(){} +#pragma acc routine(GlobalFunc7) seq + +void force_emit() { + GlobalFunc4(); + GlobalFunc5(); + GlobalFunc6(); + GlobalFunc7(); +} + +// CHECK: cir.func{{.*}} @[[G6_NAME:[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[G6_R_NAME:.*]]]>} +// CHECK: cir.func{{.*}} @[[G7_NAME:[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[G7_R_NAME:.*]]]>} + +// CHECK: cir.func{{.*}} @[[G4_NAME:[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[G4_R_NAME:.*]], @[[G4_R2_NAME:.*]]]>} +// CHECK: cir.func{{.*}} @[[G5_NAME:[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[G5_R_NAME:.*]], @[[G5_R1_NAME:.*]], @[[G5_R2_NAME:.*]], @[[G5_R3_NAME:.*]]]>} + +// CHECK: acc.routine @[[G4_R_NAME]] func(@[[G4_NAME]]) seq +// CHECK: acc.routine @[[G5_R_NAME]] func(@[[G5_NAME]]) seq +// CHECK: acc.routine @[[G5_R1_NAME]] func(@[[G5_NAME]]) seq +// +// CHECK: acc.routine @[[G4_R2_NAME]] func(@[[G4_NAME]]) seq +// +// CHECK: acc.routine @[[G5_R2_NAME]] func(@[[G5_NAME]]) seq +// CHECK: acc.routine @[[G5_R3_NAME]] func(@[[G5_NAME]]) seq +// +// CHECK: acc.routine @[[G6_R_NAME]] func(@[[G6_NAME]]) seq +// CHECK: acc.routine @[[G7_R_NAME]] func(@[[G7_NAME]]) seq diff --git a/clang/test/CIR/CodeGenOpenACC/routine-locals.cpp b/clang/test/CIR/CodeGenOpenACC/routine-locals.cpp new file mode 100644 index 0000000000000..d338a9cea0d09 --- /dev/null +++ b/clang/test/CIR/CodeGenOpenACC/routine-locals.cpp @@ -0,0 +1,24 @@ +// RUN: %clang_cc1 -fopenacc -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir %s -o - | FileCheck %s + +void GlobalFunc(); +void InFunc() { + +#pragma acc routine(GlobalFunc) seq + GlobalFunc(); + +#pragma acc routine seq + auto Lambda1 = [](){}; + Lambda1(); + + auto Lambda2 = [](){}; +#pragma acc routine(Lambda2) seq + Lambda2(); +}; + +// CHECK: cir.func{{.*}} @[[G1_NAME:[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[G1_R_NAME:.*]]]>} +// CHECK: cir.func {{.*}}lambda{{.*}} @[[L1_NAME:[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[L1_R_NAME:.*]]]>} +// CHECK: cir.func {{.*}}lambda{{.*}} @[[L2_NAME:[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[L2_R_NAME:.*]]]>} + +// CHECK: acc.routine @[[L1_R_NAME]] func(@[[L1_NAME]]) seq +// CHECK: acc.routine @[[G1_R_NAME]] func(@[[G1_NAME]]) seq +// CHECK: acc.routine @[[L2_R_NAME]] func(@[[L2_NAME]]) seq diff --git a/clang/test/CIR/CodeGenOpenACC/routine-members.cpp b/clang/test/CIR/CodeGenOpenACC/routine-members.cpp new file mode 100644 index 0000000000000..713500cfe3868 --- /dev/null +++ b/clang/test/CIR/CodeGenOpenACC/routine-members.cpp @@ -0,0 +1,55 @@ +// RUN: %clang_cc1 -fopenacc -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir %s -o - | FileCheck %s + +struct S { +#pragma acc routine seq + void MemFunc1(); + void MemFunc2(); +#pragma acc routine(S::MemFunc2) seq + void MemFunc3(); +#pragma acc routine(S::MemFunc3) seq + +#pragma acc routine seq + static void StaticMemFunc1(); + static void StaticMemFunc2(); + static void StaticMemFunc3(); +#pragma acc routine(StaticMemFunc3) seq + +#pragma acc routine seq + static constexpr auto StaticLambda1 = [](){}; + static constexpr auto StaticLambda2 = [](){}; +}; +#pragma acc routine(S::MemFunc2) seq +#pragma acc routine(S::StaticLambda2) seq +#pragma acc routine(S::StaticMemFunc2) seq + +void force_emit() { + S{}.MemFunc1(); + S{}.MemFunc2(); + S{}.MemFunc3(); + S::StaticMemFunc1(); + S::StaticMemFunc2(); + S::StaticMemFunc3(); + S::StaticLambda1(); + S::StaticLambda2(); +} + +// CHECK: cir.func{{.*}} @[[MEM1_NAME:[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[MEM1_R_NAME:.*]]]>} +// CHECK: cir.func{{.*}} @[[MEM2_NAME:[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[MEM2_R_NAME:.*]], @[[MEM2_R2_NAME:.*]]]>} +// CHECK: cir.func{{.*}} @[[MEM3_NAME:[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[MEM3_R_NAME:.*]]]>} +// +// CHECK: cir.func{{.*}} @[[STATICMEM1_NAME:[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[STATICMEM1_R_NAME:.*]]]>} +// CHECK: cir.func{{.*}} @[[STATICMEM2_NAME:[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[STATICMEM2_R_NAME:.*]]]>} +// CHECK: cir.func{{.*}} @[[STATICMEM3_NAME:[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[STATICMEM3_R_NAME:.*]]]>} +// +// CHECK: cir.func {{.*}}lambda{{.*}} @[[L1_NAME:[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[L1_R_NAME:.*]]]>} +// CHECK: cir.func {{.*}}lambda{{.*}} @[[L2_NAME:[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[L2_R_NAME:.*]]]>} +// +// CHECK: acc.routine @[[MEM1_R_NAME]] func(@[[MEM1_NAME]]) seq +// CHECK: acc.routine @[[STATICMEM1_R_NAME]] func(@[[STATICMEM1_NAME]]) seq +// CHECK: acc.routine @[[L1_R_NAME]] func(@[[L1_NAME]]) seq +// CHECK: acc.routine @[[MEM2_R_NAME]] func(@[[MEM2_NAME]]) seq +// CHECK: acc.routine @[[MEM3_R_NAME]] func(@[[MEM3_NAME]]) seq +// CHECK: acc.routine @[[STATICMEM3_R_NAME]] func(@[[STATICMEM3_NAME]]) seq +// CHECK: acc.routine @[[MEM2_R2_NAME]] func(@[[MEM2_NAME]]) seq +// CHECK: acc.routine @[[L2_R_NAME]] func(@[[L2_NAME]]) seq +// CHECK: acc.routine @[[STATICMEM2_R_NAME]] func(@[[STATICMEM2_NAME]]) seq diff --git a/clang/test/CIR/CodeGenOpenACC/routine-ns.cpp b/clang/test/CIR/CodeGenOpenACC/routine-ns.cpp new file mode 100644 index 0000000000000..9d1d677e79db8 --- /dev/null +++ b/clang/test/CIR/CodeGenOpenACC/routine-ns.cpp @@ -0,0 +1,28 @@ +// RUN: %clang_cc1 -fopenacc -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir %s -o - | FileCheck %s + +namespace NS1 { +#pragma acc routine seq + int NSFunc1(); +#pragma acc routine seq + auto Lambda1 = [](){}; + + auto Lambda2 = [](){}; +} // namespace NS1 + +#pragma acc routine(NS1::NSFunc1) seq +#pragma acc routine(NS1::Lambda2) seq + +void force_emit() { + NS1::NSFunc1(); + NS1::Lambda1(); + NS1::Lambda2(); +} + +// CHECK: cir.func{{.*}} @[[F1_NAME:[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[F1_R_NAME:.*]], @[[F1_R2_NAME:.*]]]>} +// CHECK: cir.func {{.*}}lambda{{.*}} @[[L1_NAME:[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[L1_R_NAME:.*]]]>} +// CHECK: cir.func {{.*}}lambda{{.*}} @[[L2_NAME:[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[L2_R_NAME:.*]]]>} +// +// CHECK: acc.routine @[[F1_R_NAME]] func(@[[F1_NAME]]) seq +// CHECK: acc.routine @[[L1_R_NAME]] func(@[[L1_NAME]]) seq +// CHECK: acc.routine @[[F1_R2_NAME]] func(@[[F1_NAME]]) seq +// CHECK: acc.routine @[[L2_R_NAME]] func(@[[L2_NAME]]) seq diff --git a/clang/test/CIR/CodeGenOpenACC/routine-templ.cpp b/clang/test/CIR/CodeGenOpenACC/routine-templ.cpp new file mode 100644 index 0000000000000..419442220a1ba --- /dev/null +++ b/clang/test/CIR/CodeGenOpenACC/routine-templ.cpp @@ -0,0 +1,16 @@ +// RUN: %clang_cc1 -fopenacc -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir %s -o - | FileCheck %s + +#pragma acc routine seq +template +void func(){} + +void use() { + func(); + func(); +} + +// CHECK: cir.func{{.*}} @[[T1_NAME:[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[T1_R_NAME:.*]]]>} +// CHECK: cir.func{{.*}} @[[T2_NAME:[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[T2_R_NAME:.*]]]>} +// +// CHECK: acc.routine @[[T1_R_NAME]] func(@[[T1_NAME]]) seq +// CHECK: acc.routine @[[T2_R_NAME]] func(@[[T2_NAME]]) seq diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmaq.c b/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmaq.c index 613a390bc6d36..04834ece3a4a6 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmaq.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmaq.c @@ -1,17 +1,26 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s -// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -frounding-math -fexperimental-strict-floating-point -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -frounding-math -fexperimental-strict-floating-point -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT // REQUIRES: aarch64-registered-target || arm-registered-target #include -// CHECK-LABEL: @test_vmaxnmaq_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[A:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[B:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.maxnum.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]]) -// CHECK-NEXT: ret <8 x half> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vmaxnmaq_f16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[A:%.*]]) +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[B:%.*]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.maxnum.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]]) +// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vmaxnmaq_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[A:%.*]]) #[[ATTR3:[0-9]+]] +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[B:%.*]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vmaxnm.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vmaxnmaq_f16(float16x8_t a, float16x8_t b) { @@ -22,12 +31,19 @@ float16x8_t test_vmaxnmaq_f16(float16x8_t a, float16x8_t b) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vmaxnmaq_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[A:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[B:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -// CHECK-NEXT: ret <4 x float> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vmaxnmaq_f32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[A:%.*]]) +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[B:%.*]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vmaxnmaq_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[A:%.*]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[B:%.*]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vmaxnm.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vmaxnmaq_f32(float32x4_t a, float32x4_t b) { @@ -38,12 +54,19 @@ float32x4_t test_vmaxnmaq_f32(float32x4_t a, float32x4_t b) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vmaxnmaq_m_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vmaxnma.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]]) -// CHECK-NEXT: ret <8 x half> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vmaxnmaq_m_f16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vmaxnma.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]]) +// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vmaxnmaq_m_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vmaxnma.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vmaxnmaq_m_f16(float16x8_t a, float16x8_t b, mve_pred16_t p) { @@ -54,12 +77,19 @@ float16x8_t test_vmaxnmaq_m_f16(float16x8_t a, float16x8_t b, mve_pred16_t p) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vmaxnmaq_m_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vmaxnma.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]]) -// CHECK-NEXT: ret <4 x float> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vmaxnmaq_m_f32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vmaxnma.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]]) +// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vmaxnmaq_m_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vmaxnma.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vmaxnmaq_m_f32(float32x4_t a, float32x4_t b, mve_pred16_t p) { @@ -69,3 +99,5 @@ float32x4_t test_vmaxnmaq_m_f32(float32x4_t a, float32x4_t b, mve_pred16_t p) return vmaxnmaq_m_f32(a, b, p); #endif /* POLYMORPHIC */ } +//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +// CHECK: {{.*}} diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmq.c b/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmq.c index bad7cd903ab16..1225353a5a9d2 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmq.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmq.c @@ -1,15 +1,22 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s -// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -frounding-math -fexperimental-strict-floating-point -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -frounding-math -fexperimental-strict-floating-point -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT // REQUIRES: aarch64-registered-target || arm-registered-target #include -// CHECK-LABEL: @test_vmaxnmq_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.maxnum.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]]) -// CHECK-NEXT: ret <8 x half> [[TMP0]] +// CHECK-NOSTRICT-LABEL: @test_vmaxnmq_f16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.maxnum.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]]) +// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP0]] +// +// CHECK-STRICT-LABEL: @test_vmaxnmq_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vmaxnm.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]]) #[[ATTR2:[0-9]+]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP0]] // float16x8_t test_vmaxnmq_f16(float16x8_t a, float16x8_t b) { @@ -20,10 +27,15 @@ float16x8_t test_vmaxnmq_f16(float16x8_t a, float16x8_t b) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vmaxnmq_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) -// CHECK-NEXT: ret <4 x float> [[TMP0]] +// CHECK-NOSTRICT-LABEL: @test_vmaxnmq_f32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) +// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP0]] +// +// CHECK-STRICT-LABEL: @test_vmaxnmq_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vmaxnm.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP0]] // float32x4_t test_vmaxnmq_f32(float32x4_t a, float32x4_t b) { @@ -34,12 +46,19 @@ float32x4_t test_vmaxnmq_f32(float32x4_t a, float32x4_t b) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vmaxnmq_m_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.max.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) -// CHECK-NEXT: ret <8 x half> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vmaxnmq_m_f16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.max.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) +// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vmaxnmq_m_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.max.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vmaxnmq_m_f16(float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) { @@ -50,12 +69,19 @@ float16x8_t test_vmaxnmq_m_f16(float16x8_t inactive, float16x8_t a, float16x8_t #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vmaxnmq_m_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) -// CHECK-NEXT: ret <4 x float> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vmaxnmq_m_f32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) +// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vmaxnmq_m_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vmaxnmq_m_f32(float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) { @@ -66,12 +92,19 @@ float32x4_t test_vmaxnmq_m_f32(float32x4_t inactive, float32x4_t a, float32x4_t #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vmaxnmq_x_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.max.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> undef) -// CHECK-NEXT: ret <8 x half> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vmaxnmq_x_f16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.max.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> undef) +// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vmaxnmq_x_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.max.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> undef) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vmaxnmq_x_f16(float16x8_t a, float16x8_t b, mve_pred16_t p) { @@ -82,12 +115,19 @@ float16x8_t test_vmaxnmq_x_f16(float16x8_t a, float16x8_t b, mve_pred16_t p) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vmaxnmq_x_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> undef) -// CHECK-NEXT: ret <4 x float> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vmaxnmq_x_f32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> undef) +// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vmaxnmq_x_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> undef) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vmaxnmq_x_f32(float32x4_t a, float32x4_t b, mve_pred16_t p) { @@ -97,3 +137,5 @@ float32x4_t test_vmaxnmq_x_f32(float32x4_t a, float32x4_t b, mve_pred16_t p) return vmaxnmq_x_f32(a, b, p); #endif /* POLYMORPHIC */ } +//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +// CHECK: {{.*}} diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vminnmaq.c b/clang/test/CodeGen/arm-mve-intrinsics/vminnmaq.c index 0182cf7c5b6b3..fc0dc5701e4d9 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vminnmaq.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vminnmaq.c @@ -1,17 +1,26 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s -// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -frounding-math -fexperimental-strict-floating-point -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -frounding-math -fexperimental-strict-floating-point -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT // REQUIRES: aarch64-registered-target || arm-registered-target #include -// CHECK-LABEL: @test_vminnmaq_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[A:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[B:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.minnum.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]]) -// CHECK-NEXT: ret <8 x half> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vminnmaq_f16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[A:%.*]]) +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[B:%.*]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.minnum.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]]) +// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vminnmaq_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[A:%.*]]) #[[ATTR3:[0-9]+]] +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[B:%.*]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vminnm.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vminnmaq_f16(float16x8_t a, float16x8_t b) { @@ -22,12 +31,19 @@ float16x8_t test_vminnmaq_f16(float16x8_t a, float16x8_t b) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vminnmaq_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[A:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[B:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -// CHECK-NEXT: ret <4 x float> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vminnmaq_f32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[A:%.*]]) +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[B:%.*]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vminnmaq_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[A:%.*]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[B:%.*]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vminnm.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vminnmaq_f32(float32x4_t a, float32x4_t b) { @@ -38,12 +54,19 @@ float32x4_t test_vminnmaq_f32(float32x4_t a, float32x4_t b) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vminnmaq_m_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vminnma.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]]) -// CHECK-NEXT: ret <8 x half> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vminnmaq_m_f16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vminnma.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]]) +// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vminnmaq_m_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vminnma.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vminnmaq_m_f16(float16x8_t a, float16x8_t b, mve_pred16_t p) { @@ -54,12 +77,19 @@ float16x8_t test_vminnmaq_m_f16(float16x8_t a, float16x8_t b, mve_pred16_t p) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vminnmaq_m_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vminnma.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]]) -// CHECK-NEXT: ret <4 x float> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vminnmaq_m_f32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vminnma.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]]) +// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vminnmaq_m_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vminnma.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vminnmaq_m_f32(float32x4_t a, float32x4_t b, mve_pred16_t p) { @@ -69,3 +99,5 @@ float32x4_t test_vminnmaq_m_f32(float32x4_t a, float32x4_t b, mve_pred16_t p) return vminnmaq_m_f32(a, b, p); #endif /* POLYMORPHIC */ } +//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +// CHECK: {{.*}} diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vminnmq.c b/clang/test/CodeGen/arm-mve-intrinsics/vminnmq.c index b48ff9d84b8f6..7dbad94c77674 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vminnmq.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vminnmq.c @@ -1,15 +1,22 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s -// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -frounding-math -fexperimental-strict-floating-point -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -frounding-math -fexperimental-strict-floating-point -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT // REQUIRES: aarch64-registered-target || arm-registered-target #include -// CHECK-LABEL: @test_vminnmq_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.minnum.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]]) -// CHECK-NEXT: ret <8 x half> [[TMP0]] +// CHECK-NOSTRICT-LABEL: @test_vminnmq_f16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.minnum.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]]) +// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP0]] +// +// CHECK-STRICT-LABEL: @test_vminnmq_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vminnm.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]]) #[[ATTR2:[0-9]+]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP0]] // float16x8_t test_vminnmq_f16(float16x8_t a, float16x8_t b) { @@ -20,10 +27,15 @@ float16x8_t test_vminnmq_f16(float16x8_t a, float16x8_t b) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vminnmq_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) -// CHECK-NEXT: ret <4 x float> [[TMP0]] +// CHECK-NOSTRICT-LABEL: @test_vminnmq_f32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) +// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP0]] +// +// CHECK-STRICT-LABEL: @test_vminnmq_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vminnm.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP0]] // float32x4_t test_vminnmq_f32(float32x4_t a, float32x4_t b) { @@ -34,12 +46,19 @@ float32x4_t test_vminnmq_f32(float32x4_t a, float32x4_t b) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vminnmq_m_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.min.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) -// CHECK-NEXT: ret <8 x half> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vminnmq_m_f16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.min.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) +// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vminnmq_m_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.min.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vminnmq_m_f16(float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) { @@ -50,12 +69,19 @@ float16x8_t test_vminnmq_m_f16(float16x8_t inactive, float16x8_t a, float16x8_t #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vminnmq_m_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.min.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) -// CHECK-NEXT: ret <4 x float> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vminnmq_m_f32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.min.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) +// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vminnmq_m_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.min.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vminnmq_m_f32(float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) { @@ -66,12 +92,19 @@ float32x4_t test_vminnmq_m_f32(float32x4_t inactive, float32x4_t a, float32x4_t #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vminnmq_x_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.min.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> undef) -// CHECK-NEXT: ret <8 x half> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vminnmq_x_f16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.min.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> undef) +// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vminnmq_x_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.min.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> undef) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vminnmq_x_f16(float16x8_t a, float16x8_t b, mve_pred16_t p) { @@ -82,12 +115,19 @@ float16x8_t test_vminnmq_x_f16(float16x8_t a, float16x8_t b, mve_pred16_t p) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vminnmq_x_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.min.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> undef) -// CHECK-NEXT: ret <4 x float> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vminnmq_x_f32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.min.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> undef) +// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vminnmq_x_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.min.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> undef) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vminnmq_x_f32(float32x4_t a, float32x4_t b, mve_pred16_t p) { @@ -97,3 +137,5 @@ float32x4_t test_vminnmq_x_f32(float32x4_t a, float32x4_t b, mve_pred16_t p) return vminnmq_x_f32(a, b, p); #endif /* POLYMORPHIC */ } +//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +// CHECK: {{.*}} diff --git a/clang/test/CodeGen/distributed-thin-lto/memprof-pgho.cpp b/clang/test/CodeGen/distributed-thin-lto/memprof-pgho.cpp index ed05962846aff..317efd1b3a138 100644 --- a/clang/test/CodeGen/distributed-thin-lto/memprof-pgho.cpp +++ b/clang/test/CodeGen/distributed-thin-lto/memprof-pgho.cpp @@ -1,6 +1,8 @@ // Test end-to-end ThinLTO optimization pipeline with PGHO, that it does not // interfere with other allocation instrumentation features. // +// REQUIRES: x86-registered-target +// // RUN: split-file %s %t // RUN: llvm-profdata merge %t/memprof.yaml -o %t/use.memprofdata // diff --git a/clang/test/Misc/amdgcn.languageOptsOpenCL.cl b/clang/test/Misc/amdgcn.languageOptsOpenCL.cl index 80c0825895c86..57ea891b3eb29 100644 --- a/clang/test/Misc/amdgcn.languageOptsOpenCL.cl +++ b/clang/test/Misc/amdgcn.languageOptsOpenCL.cl @@ -162,6 +162,10 @@ #ifndef __opencl_c_program_scope_global_variables #error "Missing __opencl_c_program_scope_global_variables define" #endif + + #ifndef __opencl_c_read_write_images + #error "Missing __opencl_c_read_write_images define" + #endif #endif #if (__OPENCL_C_VERSION__ >= 300) diff --git a/clang/test/OpenMP/amdgcn_weak_alias.c b/clang/test/OpenMP/amdgcn_weak_alias.c index a9d5c1737b321..33c7dc0041810 100644 --- a/clang/test/OpenMP/amdgcn_weak_alias.c +++ b/clang/test/OpenMP/amdgcn_weak_alias.c @@ -94,10 +94,3 @@ int Three(void) __attribute__ ((weak, alias("__Three"))); int Three_(void) __attribute__ ((alias("__Three"))); extern int __attribute__((weak, alias("__Three_var"))) Three_var; extern int __attribute__((alias("__Three_var"))) Three_var_; -//. -// HOST: [[META0:![0-9]+]] = !{i32 1, !"__Two_var", i32 0, i32 0} -// HOST: [[META1:![0-9]+]] = !{i32 1, !"__Three_var", i32 0, i32 1} -//. -// DEVICE: [[META0:![0-9]+]] = !{i32 1, !"__Two_var", i32 0, i32 0} -// DEVICE: [[META1:![0-9]+]] = !{i32 1, !"__Three_var", i32 0, i32 1} -//. diff --git a/clang/test/SemaHLSL/static_resources.hlsl b/clang/test/SemaHLSL/static_resources.hlsl new file mode 100644 index 0000000000000..f71e9ea98e0d9 --- /dev/null +++ b/clang/test/SemaHLSL/static_resources.hlsl @@ -0,0 +1,138 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-compute -emit-llvm -disable-llvm-passes -o - %s | llvm-cxxfilt | FileCheck %s + +// CHECK-DAG: [[ONE_STR:@.*]] = private unnamed_addr constant [4 x i8] c"One\00" +// CHECK-DAG: [[ARRAY_STR:@.*]] = private unnamed_addr constant [6 x i8] c"Array\00" +// CHECK-DAG: [[ONEWITHCOUNTER_STR:@.*]] = private unnamed_addr constant [15 x i8] c"OneWithCounter\00" +// CHECK-DAG: [[ARRAYWITHCOUNTER_STR:@.*]] = private unnamed_addr constant [17 x i8] c"ArrayWithCounter\00" +// CHECK-NOT: private unnamed_addr constant [{{[0-9]+}} x i8] c"Static + +RWBuffer One : register(u1, space5); +RWBuffer Array[2] : register(u10, space6); +RWStructuredBuffer OneWithCounter : register(u2, space4); +RWStructuredBuffer ArrayWithCounter[2] : register(u7, space4); + +// Check that the non-static resource One is initialized from binding on +// startup (register 1, space 5). +// CHECK: define internal void @__cxx_global_var_init{{.*}} +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @hlsl::RWBuffer::__createFromBinding(unsigned int, unsigned int, int, unsigned int, char const*) +// CHECK-SAME: (ptr {{.*}} @One, i32 noundef 1, i32 noundef 5, i32 noundef 1, i32 noundef 0, ptr noundef [[ONE_STR]]) + +// Check that the non-static resource OneWithCounter is initialized from binding on +// startup (register 2, space 4). +// CHECK: define internal void @__cxx_global_var_init{{.*}} +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @hlsl::RWStructuredBuffer::__createFromBindingWithImplicitCounter(unsigned int, unsigned int, int, unsigned int, char const*, unsigned int) +// CHECK-SAME: (ptr {{.*}} @OneWithCounter, i32 noundef 2, i32 noundef 4, i32 noundef 1, i32 noundef 0, ptr noundef [[ONEWITHCOUNTER_STR]], i32 noundef 0) + +// Note that non-static resource arrays are not initialized on startup. +// The individual resources from the array are initialized on access. + +static RWBuffer StaticOne; +static RWBuffer StaticArray[2]; + +// Check that StaticOne resource is initialized on startup with the default +// constructor and not from binding. It will initalize the handle to poison. +// CHECK: define internal void @__cxx_global_var_init{{.*}} +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @hlsl::RWBuffer::RWBuffer()(ptr {{.*}} @StaticOne) + +// Check that StaticArray elements are initialized on startup with the default +// constructor and not from binding. The initializer will loop over the array +// elements and call the default constructor for each one, setting the handle to poison. +// CHECK: define internal void @__cxx_global_var_init{{.*}} +// CHECK-NEXT: entry: +// CHECK-NEXT: br label %arrayctor.loop +// CHECK: arrayctor.loop: ; preds = %arrayctor.loop, %entry +// CHECK-NEXT: %arrayctor.cur = phi ptr [ @StaticArray, %entry ], [ %arrayctor.next, %arrayctor.loop ] +// CHECK-NEXT: call void @hlsl::RWBuffer::RWBuffer()(ptr {{.*}} %arrayctor.cur) +// CHECK-NEXT: %arrayctor.next = getelementptr inbounds %"class.hlsl::RWBuffer", ptr %arrayctor.cur, i32 1 +// CHECK-NEXT: %arrayctor.done = icmp eq ptr %arrayctor.next, getelementptr inbounds (%"class.hlsl::RWBuffer", ptr @StaticArray, i32 2) +// CHECK-NEXT: br i1 %arrayctor.done, label %arrayctor.cont, label %arrayctor.loop +// CHECK: arrayctor.cont: ; preds = %arrayctor.loop +// CHECK-NEXT: ret void + +static RWStructuredBuffer StaticOneWithCounter; + +// Check that StaticOneWithCounter resource is initialized on startup with the default +// constructor and not from binding. It will initalize the handle to poison. +// CHECK: define internal void @__cxx_global_var_init{{.*}} +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @hlsl::RWStructuredBuffer::RWStructuredBuffer()(ptr {{.*}} @StaticOneWithCounter) + +// No other global initialization routines should be present. +// CHECK-NOT: define internal void @__cxx_global_var_init{{.*}} + +[numthreads(4,1,1)] +void main() { +// CHECK: define internal void @main()() +// CHECK-NEXT: entry: +// CHECK-NEXT: %[[TMP0:.*]] = alloca %"class.hlsl::RWBuffer" + + static RWBuffer StaticLocal; +// Check that StaticLocal is initialized by default constructor (handle set to poison) +// and not from binding. +// call void @hlsl::RWBuffer::RWBuffer()(ptr {{.*}} @main()::StaticLocal) + + StaticLocal = Array[1]; +// A[2][0] is accessed here, so it should be initialized from binding (register 10, space 6, index 1), +// and then assigned to StaticLocal using = operator. +// CHECK: call void @hlsl::RWBuffer::__createFromBinding(unsigned int, unsigned int, int, unsigned int, char const*) +// CHECK-SAME: (ptr {{.*}} %[[TMP0]], i32 noundef 10, i32 noundef 6, i32 noundef 2, i32 noundef 1, ptr noundef [[ARRAY_STR]]) +// CHECK-NEXT: call {{.*}} ptr @hlsl::RWBuffer::operator=({{.*}})(ptr {{.*}} @main()::StaticLocal, ptr {{.*}} %[[TMP0]]) + + StaticOne = One; +// Operator = call to assign non-static One handle to static StaticOne. +// CHECK-NEXT: call {{.*}} ptr @hlsl::RWBuffer::operator=({{.*}})(ptr {{.*}} @StaticOne, ptr {{.*}} @One) + + StaticArray = Array; +// Check that each elements of StaticArray is initialized from binding (register 10, space 6, indices 0 and 1). +// CHECK: call void @hlsl::RWBuffer::__createFromBinding(unsigned int, unsigned int, int, unsigned int, char const*) +// CHECK-SAME: (ptr {{.*}} sret(%"class.hlsl::RWBuffer") align 4 @StaticArray, i32 noundef 10, i32 noundef 6, i32 noundef 2, i32 noundef 0, ptr noundef [[ARRAY_STR]]) +// CHECK-NEXT: call void @hlsl::RWBuffer::__createFromBinding(unsigned int, unsigned int, int, unsigned int, char const*) +// CHECK-SAME: (ptr {{.*}} sret(%"class.hlsl::RWBuffer") align 4 getelementptr ([2 x %"class.hlsl::RWBuffer"], ptr @StaticArray, i32 0, i32 1), +// CHECK-SAME: i32 noundef 10, i32 noundef 6, i32 noundef 2, i32 noundef 1, ptr noundef [[ARRAY_STR]] + + StaticArray[1] = One; +// Operator = call to assign non-static One handle to StaticArray element. +// CHECK-NEXT: call {{.*}} ptr @hlsl::RWBuffer::operator=(hlsl::RWBuffer const&) +// CHECK-SAME: (ptr {{.*}} getelementptr inbounds ([2 x %"class.hlsl::RWBuffer"], ptr @StaticArray, i32 0, i32 1), ptr {{.*}} @One) + + StaticLocal[0] = 123; +// CHECK-NEXT: %[[PTR0:.*]] = call {{.*}} ptr @hlsl::RWBuffer::operator[](unsigned int)(ptr {{.*}} @main()::StaticLocal, i32 noundef 0) +// CHECK-NEXT: store float 1.230000e+02, ptr %[[PTR0]] + + StaticOne[1] = 456; +// CHECK-NEXT: %[[PTR1:.*]] = call {{.*}} ptr @hlsl::RWBuffer::operator[](unsigned int)(ptr {{.*}}) @StaticOne, i32 noundef 1) +// CHECK-NEXT: store float 4.560000e+02, ptr %[[PTR1]], align 4 + + StaticArray[1][2] = 789; +// CHECK-NEXT: %[[PTR2:.*]] = call {{.*}} ptr @hlsl::RWBuffer::operator[](unsigned int) +// CHECK-SAME: (ptr {{.*}} getelementptr inbounds ([2 x %"class.hlsl::RWBuffer"], ptr @StaticArray, i32 0, i32 1), i32 noundef 2) +// CHECK-NEXT: store float 7.890000e+02, ptr %[[PTR2]], align 4 + + static RWStructuredBuffer StaticLocalWithCounter; +// Check that StaticLocalWithCounter is initialized by default constructor (handle set to poison) +// and not from binding. +// call void @hlsl::RWStructuredBuffer::RWStructuredBuffer()(ptr {{.*}} @main()::StaticLocalWithCounter) + + static RWStructuredBuffer StaticLocalArrayWithCounter[2]; + + StaticLocalWithCounter = OneWithCounter; +// Operator = call to assign non-static OneWithCounter handles to StaticLocalWithCounter handles. +// CHECK: call {{.*}} ptr @hlsl::RWStructuredBuffer::operator=(hlsl::RWStructuredBuffer const&)(ptr {{.*}} @main()::StaticLocalWithCounter, ptr {{.*}} @OneWithCounter) + + StaticLocalArrayWithCounter = ArrayWithCounter; +// Check that each elements of StaticLocalArrayWithCounter is initialized from binding +// of ArrayWithCounter (register 7, space 4, indices 0 and 1). +// CHECK: call void @hlsl::RWStructuredBuffer::__createFromBindingWithImplicitCounter(unsigned int, unsigned int, int, unsigned int, char const*, unsigned int) +// CHECK-SAME: (ptr {{.*}} sret(%"class.hlsl::RWStructuredBuffer") align 4 @main()::StaticLocalArrayWithCounter, +// CHECK-SAME: i32 noundef 7, i32 noundef 4, i32 noundef 2, i32 noundef 0, ptr noundef [[ARRAYWITHCOUNTER_STR]], i32 noundef 1) + +// CHECK-NEXT: call void @hlsl::RWStructuredBuffer::__createFromBindingWithImplicitCounter(unsigned int, unsigned int, int, unsigned int, char const*, unsigned int) +// CHECK-SAME: (ptr {{.*}} sret(%"class.hlsl::RWStructuredBuffer") align 4 getelementptr ([2 x %"class.hlsl::RWStructuredBuffer"], ptr @main()::StaticLocalArrayWithCounter, i32 0, i32 1), +// CHECK-SAME: i32 noundef 7, i32 noundef 4, i32 noundef 2, i32 noundef 1, ptr noundef [[ARRAYWITHCOUNTER_STR]], i32 noundef 1) +} + +// No other binding initialization calls should be present. +// CHECK-NOT: call void @hlsl::RWBuffer::__createFrom{{.*}}Binding{{.*}} diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp index 5f5bf42df5e6b..3efa28b0469c1 100644 --- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp +++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp @@ -284,11 +284,9 @@ class ResourceDirectoryCache { if (CachedResourceDir != Cache.end()) return CachedResourceDir->second; - std::vector PrintResourceDirArgs{ClangBinaryName}; - if (ClangCLMode) - PrintResourceDirArgs.push_back("/clang:-print-resource-dir"); - else - PrintResourceDirArgs.push_back("-print-resource-dir"); + const std::array PrintResourceDirArgs{ + ClangBinaryName, + ClangCLMode ? "/clang:-print-resource-dir" : "-print-resource-dir"}; llvm::SmallString<64> OutputFile, ErrorFile; llvm::sys::fs::createTemporaryFile("print-resource-dir-output", diff --git a/flang-rt/include/flang-rt/runtime/descriptor.h b/flang-rt/include/flang-rt/runtime/descriptor.h index 92d2210cbc640..8c848fcab24ee 100644 --- a/flang-rt/include/flang-rt/runtime/descriptor.h +++ b/flang-rt/include/flang-rt/runtime/descriptor.h @@ -512,9 +512,7 @@ class Descriptor { RT_API_ATTRS void Check() const; - // When dumpRawType, dumps stringified CFI_type_*, otherwise - // try to canonicalize and print as a Fortran type. - void Dump(FILE * = stdout, bool dumpRawType = true) const; + void Dump(FILE * = stdout) const; RT_API_ATTRS inline bool HasAddendum() const { return raw_.extra & _CFI_ADDENDUM_FLAG; diff --git a/flang-rt/lib/runtime/descriptor.cpp b/flang-rt/lib/runtime/descriptor.cpp index 8c547107a47de..c95da0a5371e5 100644 --- a/flang-rt/lib/runtime/descriptor.cpp +++ b/flang-rt/lib/runtime/descriptor.cpp @@ -292,168 +292,14 @@ RT_API_ATTRS void Descriptor::Check() const { // TODO } -static const char *GetTypeStr(ISO::CFI_type_t type, bool dumpRawType) { - if (dumpRawType) { -#define CASE(x) \ - case (x): \ - return #x; - switch (type) { - CASE(CFI_type_signed_char) - CASE(CFI_type_short) - CASE(CFI_type_int) - CASE(CFI_type_long) - CASE(CFI_type_long_long) - CASE(CFI_type_size_t) - CASE(CFI_type_int8_t) - CASE(CFI_type_int16_t) - CASE(CFI_type_int32_t) - CASE(CFI_type_int64_t) - CASE(CFI_type_int128_t) - CASE(CFI_type_int_least8_t) - CASE(CFI_type_int_least16_t) - CASE(CFI_type_int_least32_t) - CASE(CFI_type_int_least64_t) - CASE(CFI_type_int_least128_t) - CASE(CFI_type_int_fast8_t) - CASE(CFI_type_int_fast16_t) - CASE(CFI_type_int_fast32_t) - CASE(CFI_type_int_fast64_t) - CASE(CFI_type_int_fast128_t) - CASE(CFI_type_intmax_t) - CASE(CFI_type_intptr_t) - CASE(CFI_type_ptrdiff_t) - CASE(CFI_type_half_float) - CASE(CFI_type_bfloat) - CASE(CFI_type_float) - CASE(CFI_type_double) - CASE(CFI_type_extended_double) - CASE(CFI_type_long_double) - CASE(CFI_type_float128) - CASE(CFI_type_half_float_Complex) - CASE(CFI_type_bfloat_Complex) - CASE(CFI_type_float_Complex) - CASE(CFI_type_double_Complex) - CASE(CFI_type_extended_double_Complex) - CASE(CFI_type_long_double_Complex) - CASE(CFI_type_float128_Complex) - CASE(CFI_type_Bool) - CASE(CFI_type_char) - CASE(CFI_type_cptr) - CASE(CFI_type_struct) - CASE(CFI_type_char16_t) - CASE(CFI_type_char32_t) - CASE(CFI_type_uint8_t) - CASE(CFI_type_uint16_t) - CASE(CFI_type_uint32_t) - CASE(CFI_type_uint64_t) - CASE(CFI_type_uint128_t) - } -#undef CASE - return nullptr; - } - TypeCode code{type}; - - if (!code.IsValid()) - return "invalid"; - - common::optional> categoryAndKind = - code.GetCategoryAndKind(); - if (!categoryAndKind) - return nullptr; - - TypeCategory tcat; - int kind; - std::tie(tcat, kind) = *categoryAndKind; - -#define CASE(cat, k) \ - case (k): \ - return #cat "(kind=" #k ")"; - switch (tcat) { - case TypeCategory::Integer: - switch (kind) { - CASE(INTEGER, 1) - CASE(INTEGER, 2) - CASE(INTEGER, 4) - CASE(INTEGER, 8) - CASE(INTEGER, 16) - } - break; - case TypeCategory::Unsigned: - switch (kind) { - CASE(UNSIGNED, 1) - CASE(UNSIGNED, 2) - CASE(UNSIGNED, 4) - CASE(UNSIGNED, 8) - CASE(UNSIGNED, 16) - } - break; - case TypeCategory::Real: - switch (kind) { - CASE(REAL, 2) - CASE(REAL, 3) - CASE(REAL, 4) - CASE(REAL, 8) - CASE(REAL, 10) - CASE(REAL, 16) - } - break; - case TypeCategory::Complex: - switch (kind) { - CASE(COMPLEX, 2) - CASE(COMPLEX, 3) - CASE(COMPLEX, 4) - CASE(COMPLEX, 8) - CASE(COMPLEX, 10) - CASE(COMPLEX, 16) - } - break; - case TypeCategory::Character: - switch (kind) { - CASE(CHARACTER, 1) - CASE(CHARACTER, 2) - CASE(CHARACTER, 4) - } - break; - case TypeCategory::Logical: - switch (kind) { - CASE(LOGICAL, 1) - CASE(LOGICAL, 2) - CASE(LOGICAL, 4) - CASE(LOGICAL, 8) - } - break; - case TypeCategory::Derived: - return "DERIVED"; - } -#undef CASE - return nullptr; -} - -void Descriptor::Dump(FILE *f, bool dumpRawType) const { +void Descriptor::Dump(FILE *f) const { std::fprintf(f, "Descriptor @ %p:\n", reinterpret_cast(this)); std::fprintf(f, " base_addr %p\n", raw_.base_addr); - std::fprintf(f, " elem_len %zd\n", ElementBytes()); + std::fprintf(f, " elem_len %zd\n", static_cast(raw_.elem_len)); std::fprintf(f, " version %d\n", static_cast(raw_.version)); - if (rank() > 0) { - std::fprintf(f, " rank %d\n", rank()); - } else { - std::fprintf(f, " scalar\n"); - } - int ty = static_cast(raw_.type); - if (const char *tyStr = GetTypeStr(raw_.type, dumpRawType)) { - std::fprintf(f, " type %d \"%s\"\n", ty, tyStr); - } else { - std::fprintf(f, " type %d\n", ty); - } - int attr = static_cast(raw_.attribute); - if (IsPointer()) { - std::fprintf(f, " attribute %d (pointer) \n", attr); - } else if (IsAllocatable()) { - std::fprintf(f, " attribute %d (allocatable)\n", attr); - } else { - std::fprintf(f, " attribute %d\n", attr); - } - + std::fprintf(f, " rank %d\n", static_cast(raw_.rank)); + std::fprintf(f, " type %d\n", static_cast(raw_.type)); + std::fprintf(f, " attribute %d\n", static_cast(raw_.attribute)); std::fprintf(f, " extra %d\n", static_cast(raw_.extra)); std::fprintf(f, " addendum %d\n", static_cast(HasAddendum())); std::fprintf(f, " alloc_idx %d\n", static_cast(GetAllocIdx())); diff --git a/flang-rt/lib/runtime/extensions.cpp b/flang-rt/lib/runtime/extensions.cpp index d39e429eb88b9..c110b0381890c 100644 --- a/flang-rt/lib/runtime/extensions.cpp +++ b/flang-rt/lib/runtime/extensions.cpp @@ -416,14 +416,6 @@ std::int64_t RTNAME(time)() { return time(nullptr); } // MCLOCK: returns accumulated CPU time in ticks std::int32_t FORTRAN_PROCEDURE_NAME(mclock)() { return std::clock(); } -void RTNAME(ShowDescriptor)(const Fortran::runtime::Descriptor *descr) { - if (descr) { - descr->Dump(stderr, /*dumpRawType=*/false); - } else { - std::fprintf(stderr, "NULL\n"); - } -} - static void _internal_srand(int seed) { rand_seed = seed ? seed : 123459876; } // IRAND(I) diff --git a/flang-rt/unittests/Runtime/Descriptor.cpp b/flang-rt/unittests/Runtime/Descriptor.cpp index f86ff4cf16a55..4a7bb43a492af 100644 --- a/flang-rt/unittests/Runtime/Descriptor.cpp +++ b/flang-rt/unittests/Runtime/Descriptor.cpp @@ -9,7 +9,6 @@ #include "flang-rt/runtime/descriptor.h" #include "tools.h" #include "gtest/gtest.h" -#include using namespace Fortran::runtime; @@ -159,115 +158,3 @@ TEST(Descriptor, FixedStride) { EXPECT_TRUE(descriptor.IsContiguous()); EXPECT_EQ(descriptor.FixedStride().value_or(-666), 0); } - -static std::string getAddrFilteredContent(FILE *fin) { - rewind(fin); - std::ostringstream content; - char buffer[1024]; - size_t bytes_read; - while ((bytes_read = fread(buffer, 1, sizeof(buffer), fin)) > 0) { - content.write(buffer, bytes_read); - } - return std::regex_replace( - content.str(), std::regex("(0x[0-9a-fA-F]*)"), "[address]"); -} - -TEST(Descriptor, Dump) { - StaticDescriptor<4> staticDesc[2]; - Descriptor &descriptor{staticDesc[0].descriptor()}; - using Type = std::int32_t; - Type data[8][8][8]; - constexpr int four{static_cast(sizeof data[0][0][0])}; - TypeCode integer{TypeCategory::Integer, four}; - // Scalar - descriptor.Establish(integer, four, data, 0); - FILE *tmpf = tmpfile(); - ASSERT_TRUE(tmpf) << "tmpfile returned NULL"; - auto resetTmpFile = [tmpf]() { - rewind(tmpf); - ftruncate(fileno(tmpf), 0); - }; - - descriptor.Dump(tmpf, /*dumpRawType=*/false); - // also dump as CFI type - descriptor.Dump(tmpf, /*dumpRawType=*/true); - std::string output = getAddrFilteredContent(tmpf); - ASSERT_STREQ(output.c_str(), - "Descriptor @ [address]:\n" - " base_addr [address]\n" - " elem_len 4\n" - " version 20240719\n" - " scalar\n" - " type 9 \"INTEGER(kind=4)\"\n" - " attribute 0\n" - " extra 0\n" - " addendum 0\n" - " alloc_idx 0\n" - "Descriptor @ [address]:\n" - " base_addr [address]\n" - " elem_len 4\n" - " version 20240719\n" - " scalar\n" - " type 9 \"CFI_type_int32_t\"\n" - " attribute 0\n" - " extra 0\n" - " addendum 0\n" - " alloc_idx 0\n"); - - // Contiguous matrix (0:7, 0:7) - SubscriptValue extent[3]{8, 8, 8}; - descriptor.Establish(integer, four, data, 2, extent); - resetTmpFile(); - descriptor.Dump(tmpf, /*dumpRawType=*/false); - output = getAddrFilteredContent(tmpf); - ASSERT_STREQ(output.c_str(), - "Descriptor @ [address]:\n" - " base_addr [address]\n" - " elem_len 4\n" - " version 20240719\n" - " rank 2\n" - " type 9 \"INTEGER(kind=4)\"\n" - " attribute 0\n" - " extra 0\n" - " addendum 0\n" - " alloc_idx 0\n" - " dim[0] lower_bound 0\n" - " extent 8\n" - " sm 4\n" - " dim[1] lower_bound 0\n" - " extent 8\n" - " sm 32\n"); - - TypeCode real{TypeCategory::Real, four}; - // Discontiguous real 3-D array (0:7, 0:6:2, 0:6:2) - descriptor.Establish(real, four, data, 3, extent); - descriptor.GetDimension(1).SetExtent(4); - descriptor.GetDimension(1).SetByteStride(8 * 2 * four); - descriptor.GetDimension(2).SetExtent(4); - descriptor.GetDimension(2).SetByteStride(8 * 8 * 2 * four); - - resetTmpFile(); - descriptor.Dump(tmpf, /*dumpRawType=*/false); - output = getAddrFilteredContent(tmpf); - ASSERT_STREQ(output.c_str(), - "Descriptor @ [address]:\n" - " base_addr [address]\n" - " elem_len 4\n" - " version 20240719\n" - " rank 3\n" - " type 27 \"REAL(kind=4)\"\n" - " attribute 0\n" - " extra 0\n" - " addendum 0\n" - " alloc_idx 0\n" - " dim[0] lower_bound 0\n" - " extent 8\n" - " sm 4\n" - " dim[1] lower_bound 0\n" - " extent 4\n" - " sm 64\n" - " dim[2] lower_bound 0\n" - " extent 4\n" - " sm 512\n"); - fclose(tmpf); -} diff --git a/flang/docs/Intrinsics.md b/flang/docs/Intrinsics.md index ecc9143af69fd..31bead9f8bfdc 100644 --- a/flang/docs/Intrinsics.md +++ b/flang/docs/Intrinsics.md @@ -1414,48 +1414,6 @@ This is prefixed by `STRING`, a colon and a space. - **Class:** subroutine - **Syntax:** `CALL PERROR(STRING)` -### Non-Standard Intrinsics: SHOW_DESCRIPTOR - -#### Description -`SHOW_DESCRIPTOR(VAR)` prints (on the C stderr stream) a contents of a descriptor for the variable VAR, -which can be of any type and rank, including scalars. -Requires use of flang_debug module. - -Here is an example of its output: -``` -Descriptor @ 0x7ffe506fc368: - base_addr 0x55944caef0f0 - elem_len 4 - version 20240719 - rank 1 - type 9 "INTEGER(kind=4)" - attribute 2 (allocatable) - extra 0 - addendum 0 - alloc_idx 0 - dim[0] lower_bound 1 - extent 5 - sm 4 -``` - -#### Usage and Info -- **Standard:** flang extension -- **Class:** subroutine -- **Syntax:** `CALL show_descriptor(VAR)` - -#### Example -```Fortran -subroutine test - use flang_debug - implicit none - character(len=9) :: c = 'Hey buddy' - integer :: a(5) - call show_descriptor(c) - call show_descriptor(c(1:3)) - call show_descriptor(a) -end subroutine test -``` - ### Non-Standard Intrinsics: SRAND #### Description diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h index b248106b51101..0ae9177f98fd8 100644 --- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h +++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h @@ -413,7 +413,6 @@ struct IntrinsicLibrary { template mlir::Value genShift(mlir::Type resultType, llvm::ArrayRef); mlir::Value genShiftA(mlir::Type resultType, llvm::ArrayRef); - void genShowDescriptor(llvm::ArrayRef); mlir::Value genSign(mlir::Type, llvm::ArrayRef); mlir::Value genSind(mlir::Type, llvm::ArrayRef); mlir::Value genSinpi(mlir::Type, llvm::ArrayRef); diff --git a/flang/include/flang/Optimizer/Builder/Runtime/Intrinsics.h b/flang/include/flang/Optimizer/Builder/Runtime/Intrinsics.h index 2f52ffce5eb43..30c3189366cec 100644 --- a/flang/include/flang/Optimizer/Builder/Runtime/Intrinsics.h +++ b/flang/include/flang/Optimizer/Builder/Runtime/Intrinsics.h @@ -111,9 +111,6 @@ void genSleep(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value genChdir(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value name); -/// generate dump of a descriptor -void genShowDescriptor(fir::FirOpBuilder &builder, mlir::Location loc, - mlir::Value descriptor); mlir::Value genIrand(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value i); diff --git a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td index 07bb47e26b968..3fda523acb382 100644 --- a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td +++ b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td @@ -350,15 +350,14 @@ def cuf_SharedMemoryOp let arguments = (ins TypeAttr:$in_type, OptionalAttr:$uniq_name, OptionalAttr:$bindc_name, Variadic:$typeparams, Variadic:$shape, - Optional:$offset // offset in bytes from the shared memory - // base address. - ); + // offset in bytes from the shared memory base address. + Optional:$offset, OptionalAttr:$alignment); let results = (outs fir_ReferenceType:$ptr); let assemblyFormat = [{ (`[` $offset^ `:` type($offset) `]`)? $in_type (`(` $typeparams^ `:` type($typeparams) `)`)? - (`,` $shape^ `:` type($shape) )? attr-dict `->` qualified(type($ptr)) + (`,` $shape^ `:` type($shape) )? (`align` $alignment^ )? attr-dict `->` qualified(type($ptr)) }]; let builders = [OpBuilder<(ins "mlir::Type":$inType, diff --git a/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.h b/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.h index 3167c554abbdd..0f133623475f8 100644 --- a/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.h +++ b/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.h @@ -43,6 +43,15 @@ struct OpenACCPointerLikeModel mlir::TypedValue destination, mlir::TypedValue source, mlir::Type varType) const; + + mlir::Value genLoad(mlir::Type pointer, mlir::OpBuilder &builder, + mlir::Location loc, + mlir::TypedValue srcPtr, + mlir::Type valueType) const; + + bool genStore(mlir::Type pointer, mlir::OpBuilder &builder, + mlir::Location loc, mlir::Value valueToStore, + mlir::TypedValue destPtr) const; }; template diff --git a/flang/include/flang/Runtime/extensions.h b/flang/include/flang/Runtime/extensions.h index f3ed15b86f1be..f2765a5987ea1 100644 --- a/flang/include/flang/Runtime/extensions.h +++ b/flang/include/flang/Runtime/extensions.h @@ -25,11 +25,6 @@ typedef std::uint32_t gid_t; #else #include "sys/types.h" //pid_t #endif -namespace Fortran { -namespace runtime { -class Descriptor; -} -} // namespace Fortran extern "C" { @@ -107,9 +102,6 @@ int FORTRAN_PROCEDURE_NAME(mclock)(); float FORTRAN_PROCEDURE_NAME(secnds)(float *refTime); float RTNAME(Secnds)(float *refTime, const char *sourceFile, int line); -// Extension subroutine SHOW_DESCRIPTOR(D) -void RTNAME(ShowDescriptor)(const Fortran::runtime::Descriptor *descr); - // GNU extension function IRAND(I) int RTNAME(Irand)(int *i); diff --git a/flang/lib/Evaluate/intrinsics.cpp b/flang/lib/Evaluate/intrinsics.cpp index d69400e0ec188..bbcb766274e7f 100644 --- a/flang/lib/Evaluate/intrinsics.cpp +++ b/flang/lib/Evaluate/intrinsics.cpp @@ -1713,8 +1713,6 @@ static const IntrinsicInterface intrinsicSubroutine[]{ {}, Rank::scalar, IntrinsicClass::impureSubroutine}, {"second", {{"time", DefaultReal, Rank::scalar}}, {}, Rank::scalar, IntrinsicClass::impureSubroutine}, - {"__builtin_show_descriptor", {{"d", AnyData, Rank::anyOrAssumedRank}}, {}, - Rank::elemental, IntrinsicClass::impureSubroutine}, {"system", {{"command", DefaultChar, Rank::scalar}, {"exitstat", DefaultInt, Rank::scalar, Optionality::optional, diff --git a/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp b/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp index 270037f5fcb00..ae6120826f8d2 100644 --- a/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp @@ -17,6 +17,8 @@ #include "flang/Evaluate/common.h" #include "flang/Optimizer/Builder/FIRBuilder.h" #include "flang/Optimizer/Builder/MutableBox.h" +#include "flang/Optimizer/Dialect/CUF/CUFOps.h" +#include "flang/Optimizer/HLFIR/HLFIROps.h" #include "mlir/Dialect/Index/IR/IndexOps.h" #include "mlir/Dialect/SCF/IR/SCF.h" #include "mlir/Dialect/Vector/IR/VectorOps.h" @@ -1489,6 +1491,13 @@ void CUDAIntrinsicLibrary::genTMABulkG2S( builder, loc, dst, src, barrier, fir::getBase(args[3]), {}, {}); } +static void setAlignment(mlir::Value ptr, unsigned alignment) { + if (auto declareOp = mlir::dyn_cast(ptr.getDefiningOp())) + if (auto sharedOp = mlir::dyn_cast( + declareOp.getMemref().getDefiningOp())) + sharedOp.setAlignment(alignment); +} + static void genTMABulkLoad(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value barrier, mlir::Value src, mlir::Value dst, mlir::Value nelem, @@ -1496,8 +1505,11 @@ static void genTMABulkLoad(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value size = mlir::arith::MulIOp::create(builder, loc, nelem, eleSize); auto llvmPtrTy = mlir::LLVM::LLVMPointerType::get(builder.getContext()); barrier = builder.createConvert(loc, llvmPtrTy, barrier); - dst = builder.createConvert(loc, llvmPtrTy, dst); - src = builder.createConvert(loc, llvmPtrTy, src); + setAlignment(dst, 16); + dst = convertPtrToNVVMSpace(builder, loc, dst, + mlir::NVVM::NVVMMemorySpace::Shared); + src = convertPtrToNVVMSpace(builder, loc, src, + mlir::NVVM::NVVMMemorySpace::Shared); mlir::NVVM::InlinePtxOp::create( builder, loc, mlir::TypeRange{}, {dst, src, size, barrier}, {}, "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], " diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index 75a74eeb18417..3619e5bb942db 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -727,10 +727,6 @@ static constexpr IntrinsicHandler handlers[]{ {"shifta", &I::genShiftA}, {"shiftl", &I::genShift}, {"shiftr", &I::genShift}, - {"show_descriptor", - &I::genShowDescriptor, - {{{"d", asBox}}}, - /*isElemental=*/false}, {"sign", &I::genSign}, {"signal", &I::genSignalSubroutine, @@ -7888,16 +7884,6 @@ mlir::Value IntrinsicLibrary::genShiftA(mlir::Type resultType, return result; } -void IntrinsicLibrary::genShowDescriptor( - llvm::ArrayRef args) { - assert(args.size() == 1 && "expected single argument for show_descriptor"); - const mlir::Value descriptor = fir::getBase(args[0]); - - assert(fir::isa_box_type(descriptor.getType()) && - "argument must have been lowered to box type"); - fir::runtime::genShowDescriptor(builder, loc, descriptor); -} - // SIGNAL void IntrinsicLibrary::genSignalSubroutine( llvm::ArrayRef args) { diff --git a/flang/lib/Optimizer/Builder/Runtime/Intrinsics.cpp b/flang/lib/Optimizer/Builder/Runtime/Intrinsics.cpp index caeed0c0f6bfe..4d366135c305f 100644 --- a/flang/lib/Optimizer/Builder/Runtime/Intrinsics.cpp +++ b/flang/lib/Optimizer/Builder/Runtime/Intrinsics.cpp @@ -471,13 +471,6 @@ mlir::Value fir::runtime::genChdir(fir::FirOpBuilder &builder, return fir::CallOp::create(builder, loc, func, args).getResult(0); } -void fir::runtime::genShowDescriptor(fir::FirOpBuilder &builder, - mlir::Location loc, mlir::Value descAddr) { - mlir::func::FuncOp func{ - fir::runtime::getRuntimeFunc(loc, builder)}; - fir::CallOp::create(builder, loc, func, descAddr); -} - mlir::Value fir::runtime::genIrand(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value i) { auto runtimeFunc = fir::runtime::getRuntimeFunc(loc, builder); diff --git a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp index 687007d957225..671e5f9455c22 100644 --- a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp +++ b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp @@ -333,7 +333,7 @@ void cuf::SharedMemoryOp::build( bindcName.empty() ? mlir::StringAttr{} : builder.getStringAttr(bindcName); build(builder, result, wrapAllocaResultType(inType), mlir::TypeAttr::get(inType), nameAttr, bindcAttr, typeparams, shape, - /*offset=*/mlir::Value{}); + /*offset=*/mlir::Value{}, /*alignment=*/mlir::IntegerAttr{}); result.addAttributes(attributes); } diff --git a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp index ae0f5fb8197fa..9fcc7d3681c39 100644 --- a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp +++ b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp @@ -1014,4 +1014,114 @@ template bool OpenACCPointerLikeModel::genCopy( mlir::TypedValue source, mlir::Type varType) const; +template +mlir::Value OpenACCPointerLikeModel::genLoad( + mlir::Type pointer, mlir::OpBuilder &builder, mlir::Location loc, + mlir::TypedValue srcPtr, + mlir::Type valueType) const { + + // Unwrap to get the pointee type. + mlir::Type pointeeTy = fir::dyn_cast_ptrEleTy(pointer); + assert(pointeeTy && "expected pointee type to be extractable"); + + // Box types contain both a descriptor and referenced data. The genLoad API + // handles simple loads and cannot properly manage both parts. + if (fir::isa_box_type(pointeeTy)) + return {}; + + // Unlimited polymorphic (class(*)) cannot be handled because type is unknown. + if (fir::isUnlimitedPolymorphicType(pointeeTy)) + return {}; + + // Return empty for dynamic size types because the load logic + // cannot be determined simply from the type. + if (fir::hasDynamicSize(pointeeTy)) + return {}; + + mlir::Value loadedValue = fir::LoadOp::create(builder, loc, srcPtr); + + // If valueType is provided and differs from the loaded type, insert a convert + if (valueType && loadedValue.getType() != valueType) + return fir::ConvertOp::create(builder, loc, valueType, loadedValue); + + return loadedValue; +} + +template mlir::Value OpenACCPointerLikeModel::genLoad( + mlir::Type pointer, mlir::OpBuilder &builder, mlir::Location loc, + mlir::TypedValue srcPtr, + mlir::Type valueType) const; + +template mlir::Value OpenACCPointerLikeModel::genLoad( + mlir::Type pointer, mlir::OpBuilder &builder, mlir::Location loc, + mlir::TypedValue srcPtr, + mlir::Type valueType) const; + +template mlir::Value OpenACCPointerLikeModel::genLoad( + mlir::Type pointer, mlir::OpBuilder &builder, mlir::Location loc, + mlir::TypedValue srcPtr, + mlir::Type valueType) const; + +template mlir::Value OpenACCPointerLikeModel::genLoad( + mlir::Type pointer, mlir::OpBuilder &builder, mlir::Location loc, + mlir::TypedValue srcPtr, + mlir::Type valueType) const; + +template +bool OpenACCPointerLikeModel::genStore( + mlir::Type pointer, mlir::OpBuilder &builder, mlir::Location loc, + mlir::Value valueToStore, + mlir::TypedValue destPtr) const { + + // Unwrap to get the pointee type. + mlir::Type pointeeTy = fir::dyn_cast_ptrEleTy(pointer); + assert(pointeeTy && "expected pointee type to be extractable"); + + // Box types contain both a descriptor and referenced data. The genStore API + // handles simple stores and cannot properly manage both parts. + if (fir::isa_box_type(pointeeTy)) + return false; + + // Unlimited polymorphic (class(*)) cannot be handled because type is unknown. + if (fir::isUnlimitedPolymorphicType(pointeeTy)) + return false; + + // Return false for dynamic size types because the store logic + // cannot be determined simply from the type. + if (fir::hasDynamicSize(pointeeTy)) + return false; + + // Get the type from the value being stored + mlir::Type valueType = valueToStore.getType(); + mlir::Value convertedValue = valueToStore; + + // If the value type differs from the pointee type, insert a convert + if (valueType != pointeeTy) + convertedValue = + fir::ConvertOp::create(builder, loc, pointeeTy, valueToStore); + + fir::StoreOp::create(builder, loc, convertedValue, destPtr); + return true; +} + +template bool OpenACCPointerLikeModel::genStore( + mlir::Type pointer, mlir::OpBuilder &builder, mlir::Location loc, + mlir::Value valueToStore, + mlir::TypedValue destPtr) const; + +template bool OpenACCPointerLikeModel::genStore( + mlir::Type pointer, mlir::OpBuilder &builder, mlir::Location loc, + mlir::Value valueToStore, + mlir::TypedValue destPtr) const; + +template bool OpenACCPointerLikeModel::genStore( + mlir::Type pointer, mlir::OpBuilder &builder, mlir::Location loc, + mlir::Value valueToStore, + mlir::TypedValue destPtr) const; + +template bool OpenACCPointerLikeModel::genStore( + mlir::Type pointer, mlir::OpBuilder &builder, mlir::Location loc, + mlir::Value valueToStore, + mlir::TypedValue destPtr) const; + } // namespace fir::acc diff --git a/flang/module/__fortran_builtins.f90 b/flang/module/__fortran_builtins.f90 index a9b60508785db..4d134fa4b62b1 100644 --- a/flang/module/__fortran_builtins.f90 +++ b/flang/module/__fortran_builtins.f90 @@ -28,9 +28,6 @@ intrinsic :: __builtin_c_f_pointer public :: __builtin_c_f_pointer - intrinsic :: __builtin_show_descriptor - public :: __builtin_show_descriptor - intrinsic :: sizeof ! extension public :: sizeof diff --git a/flang/module/flang_debug.f90 b/flang/module/flang_debug.f90 deleted file mode 100644 index baab3b2477f49..0000000000000 --- a/flang/module/flang_debug.f90 +++ /dev/null @@ -1,14 +0,0 @@ -!===-- module/flang_debug.f90 ----------------------------------------------===! -! -! Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -! See https://llvm.org/LICENSE.txt for license information. -! SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -! -!===------------------------------------------------------------------------===! - -module flang_debug - - use __fortran_builtins, only: & - show_descriptor => __builtin_show_descriptor - -end module flang_debug diff --git a/flang/test/Fir/OpenACC/pointer-like-interface-load.mlir b/flang/test/Fir/OpenACC/pointer-like-interface-load.mlir new file mode 100644 index 0000000000000..170ea56b24742 --- /dev/null +++ b/flang/test/Fir/OpenACC/pointer-like-interface-load.mlir @@ -0,0 +1,95 @@ +// RUN: fir-opt %s --split-input-file --pass-pipeline="builtin.module(func.func(test-acc-pointer-like-interface{test-mode=load}))" 2>&1 | FileCheck %s + +func.func @test_load_scalar_f32() { + %ptr = fir.alloca f32 {test.ptr} + // CHECK: Successfully generated load for operation: %{{.*}} = fir.alloca f32 {test.ptr} + // CHECK: Loaded value type: f32 + // CHECK: Generated: %{{.*}} = fir.load %{{.*}} : !fir.ref + return +} + +// ----- + +func.func @test_load_scalar_i32() { + %ptr = fir.alloca i32 {test.ptr} + // CHECK: Successfully generated load for operation: %{{.*}} = fir.alloca i32 {test.ptr} + // CHECK: Loaded value type: i32 + // CHECK: Generated: %{{.*}} = fir.load %{{.*}} : !fir.ref + return +} + +// ----- + +func.func @test_load_scalar_i64() { + %ptr = fir.alloca i64 {test.ptr} + // CHECK: Successfully generated load for operation: %{{.*}} = fir.alloca i64 {test.ptr} + // CHECK: Loaded value type: i64 + // CHECK: Generated: %{{.*}} = fir.load %{{.*}} : !fir.ref + return +} + +// ----- + +func.func @test_load_heap_scalar() { + %ptr = fir.allocmem f64 {test.ptr} + // CHECK: Successfully generated load for operation: %{{.*}} = fir.allocmem f64 {test.ptr} + // CHECK: Loaded value type: f64 + // CHECK: Generated: %{{.*}} = fir.load %{{.*}} : !fir.heap + return +} + +// ----- + +func.func @test_load_logical() { + %ptr = fir.alloca !fir.logical<4> {test.ptr} + // CHECK: Successfully generated load for operation: %{{.*}} = fir.alloca !fir.logical<4> {test.ptr} + // CHECK: Loaded value type: !fir.logical<4> + // CHECK: Generated: %{{.*}} = fir.load %{{.*}} : !fir.ref> + return +} + +// ----- + +func.func @test_load_derived_type() { + %ptr = fir.alloca !fir.type<_QTt{i:i32}> {test.ptr} + // CHECK: Successfully generated load for operation: %{{.*}} = fir.alloca !fir.type<_QTt{i:i32}> {test.ptr} + // CHECK: Loaded value type: !fir.type<_QTt{i:i32}> + // CHECK: Generated: %{{.*}} = fir.load %{{.*}} : !fir.ref> + return +} + +// ----- + +func.func @test_load_constant_array() { + %ptr = fir.alloca !fir.array<10xf32> {test.ptr} + // CHECK: Successfully generated load for operation: %{{.*}} = fir.alloca !fir.array<10xf32> {test.ptr} + // CHECK: Loaded value type: !fir.array<10xf32> + // CHECK: Generated: %{{.*}} = fir.load %{{.*}} : !fir.ref> + return +} + +// ----- + +func.func @test_load_dynamic_array_fails() { + %c10 = arith.constant 10 : index + %ptr = fir.alloca !fir.array, %c10 {test.ptr} + // CHECK: Failed to generate load for operation: %{{.*}} = fir.alloca !fir.array + return +} + +// ----- + +func.func @test_load_box_fails() { + %ptr = fir.alloca !fir.box> {test.ptr} + // CHECK: Failed to generate load for operation: %{{.*}} = fir.alloca !fir.box> + return +} + +// ----- + +func.func @test_load_unlimited_polymorphic_fails() { + %ptr = fir.alloca !fir.class {test.ptr} + // CHECK: Failed to generate load for operation: %{{.*}} = fir.alloca !fir.class + return +} + diff --git a/flang/test/Fir/OpenACC/pointer-like-interface-store.mlir b/flang/test/Fir/OpenACC/pointer-like-interface-store.mlir new file mode 100644 index 0000000000000..5ea4f0e750c65 --- /dev/null +++ b/flang/test/Fir/OpenACC/pointer-like-interface-store.mlir @@ -0,0 +1,85 @@ +// RUN: fir-opt %s --split-input-file --pass-pipeline="builtin.module(func.func(test-acc-pointer-like-interface{test-mode=store}))" 2>&1 | FileCheck %s + +func.func @test_store_scalar_f32() { + %ptr = fir.alloca f32 {test.ptr} + // CHECK: Successfully generated store for operation: %{{.*}} = fir.alloca f32 {test.ptr} + // CHECK: Generated: %[[VAL:.*]] = arith.constant 4.200000e+01 : f32 + // CHECK: Generated: fir.store %[[VAL]] to %{{.*}} : !fir.ref + return +} + +// ----- + +func.func @test_store_scalar_i32() { + %ptr = fir.alloca i32 {test.ptr} + // CHECK: Successfully generated store for operation: %{{.*}} = fir.alloca i32 {test.ptr} + // CHECK: Generated: %[[VAL:.*]] = arith.constant 42 : i32 + // CHECK: Generated: fir.store %[[VAL]] to %{{.*}} : !fir.ref + return +} + +// ----- + +func.func @test_store_scalar_i64() { + %ptr = fir.alloca i64 {test.ptr} + // CHECK: Successfully generated store for operation: %{{.*}} = fir.alloca i64 {test.ptr} + // CHECK: Generated: %[[VAL:.*]] = arith.constant 42 : i64 + // CHECK: Generated: fir.store %[[VAL]] to %{{.*}} : !fir.ref + return +} + +// ----- + +func.func @test_store_heap_scalar() { + %ptr = fir.allocmem f64 {test.ptr} + // CHECK: Successfully generated store for operation: %{{.*}} = fir.allocmem f64 {test.ptr} + // CHECK: Generated: %[[VAL:.*]] = arith.constant 4.200000e+01 : f64 + // CHECK: Generated: fir.store %[[VAL]] to %{{.*}} : !fir.heap + return +} + +// ----- + +func.func @test_store_with_type_conversion() { + %ptr = fir.alloca i32 {test.ptr} + // CHECK: Successfully generated store for operation: %{{.*}} = fir.alloca i32 {test.ptr} + // CHECK: Generated: %[[VAL:.*]] = arith.constant 42 : i32 + // CHECK: Generated: fir.store %[[VAL]] to %{{.*}} : !fir.ref + return +} + +// ----- + +func.func @test_store_constant_array() { + %val = fir.undefined !fir.array<10xf32> {test.value} + %ptr = fir.alloca !fir.array<10xf32> {test.ptr} + // CHECK: Successfully generated store for operation: %{{.*}} = fir.alloca !fir.array<10xf32> {test.ptr} + // CHECK: Generated: fir.store %{{.*}} to %{{.*}} : !fir.ref> + return +} + +// ----- + +func.func @test_store_dynamic_array_fails() { + %c10 = arith.constant 10 : index + %ptr = fir.alloca !fir.array, %c10 {test.ptr} + // CHECK: Failed to generate store for operation: %{{.*}} = fir.alloca !fir.array + return +} + +// ----- + +func.func @test_store_box_fails() { + %ptr = fir.alloca !fir.box> {test.ptr} + // CHECK: Failed to generate store for operation: %{{.*}} = fir.alloca !fir.box> + return +} + +// ----- + +func.func @test_store_unlimited_polymorphic_fails() { + %ptr = fir.alloca !fir.class {test.ptr} + // CHECK: Failed to generate store for operation: %{{.*}} = fir.alloca !fir.class + return +} + diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf index 434322ea22265..1e3c66307c334 100644 --- a/flang/test/Lower/CUDA/cuda-device-proc.cuf +++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf @@ -538,11 +538,12 @@ end subroutine ! CHECK-LABEL: func.func @_QPtest_tma_bulk_load_c4 ! CHECK: %[[BARRIER:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda, uniq_name = "_QFtest_tma_bulk_load_c4Ebarrier1"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[ELEM_COUNT:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda, uniq_name = "_QFtest_tma_bulk_load_c4Eelem_count"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: cuf.shared_memory !fir.array<1024xcomplex> align 16 {bindc_name = "tmp", uniq_name = "_QFtest_tma_bulk_load_c4Etmp"} -> !fir.ref>> ! CHECK: %[[COUNT:.*]] = fir.load %[[ELEM_COUNT]]#0 : !fir.ref ! CHECK: %[[ELEM_SIZE:.*]] = arith.constant 8 : i32 ! CHECK: %[[SIZE:.*]] = arith.muli %[[COUNT]], %[[ELEM_SIZE]] : i32 ! CHECK: %[[BARRIER_PTR:.*]] = fir.convert %[[BARRIER]]#0 : (!fir.ref) -> !llvm.ptr -! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !llvm.ptr, !llvm.ptr, i32, !llvm.ptr) +! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !llvm.ptr<3>, !llvm.ptr<3>, i32, !llvm.ptr) ! CHECK: nvvm.inline_ptx "mbarrier.expect_tx.relaxed.cta.shared::cta.b64 [%0], %1;" ro(%[[BARRIER_PTR]], %[[SIZE]] : !llvm.ptr, i32) attributes(global) subroutine test_tma_bulk_load_c8(a, n) @@ -557,11 +558,12 @@ end subroutine ! CHECK-LABEL: func.func @_QPtest_tma_bulk_load_c8 ! CHECK: %[[BARRIER:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda, uniq_name = "_QFtest_tma_bulk_load_c8Ebarrier1"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[ELEM_COUNT:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda, uniq_name = "_QFtest_tma_bulk_load_c8Eelem_count"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: cuf.shared_memory !fir.array<1024xcomplex> align 16 {bindc_name = "tmp", uniq_name = "_QFtest_tma_bulk_load_c8Etmp"} -> !fir.ref>> ! CHECK: %[[COUNT:.*]] = fir.load %[[ELEM_COUNT]]#0 : !fir.ref ! CHECK: %[[ELEM_SIZE:.*]] = arith.constant 16 : i32 ! CHECK: %[[SIZE:.*]] = arith.muli %[[COUNT]], %[[ELEM_SIZE]] : i32 ! CHECK: %[[BARRIER_PTR:.*]] = fir.convert %[[BARRIER]]#0 : (!fir.ref) -> !llvm.ptr -! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !llvm.ptr, !llvm.ptr, i32, !llvm.ptr) +! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !llvm.ptr<3>, !llvm.ptr<3>, i32, !llvm.ptr) ! CHECK: nvvm.inline_ptx "mbarrier.expect_tx.relaxed.cta.shared::cta.b64 [%0], %1;" ro(%[[BARRIER_PTR]], %[[SIZE]] : !llvm.ptr, i32) attributes(global) subroutine test_tma_bulk_load_i4(a, n) @@ -576,11 +578,12 @@ end subroutine ! CHECK-LABEL: func.func @_QPtest_tma_bulk_load_i4 ! CHECK: %[[BARRIER:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda, uniq_name = "_QFtest_tma_bulk_load_i4Ebarrier1"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[ELEM_COUNT:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda, uniq_name = "_QFtest_tma_bulk_load_i4Eelem_count"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: cuf.shared_memory !fir.array<1024xi32> align 16 {bindc_name = "tmp", uniq_name = "_QFtest_tma_bulk_load_i4Etmp"} -> !fir.ref> ! CHECK: %[[COUNT:.*]] = fir.load %[[ELEM_COUNT]]#0 : !fir.ref ! CHECK: %[[ELEM_SIZE:.*]] = arith.constant 4 : i32 ! CHECK: %[[SIZE:.*]] = arith.muli %[[COUNT]], %[[ELEM_SIZE]] : i32 ! CHECK: %[[BARRIER_PTR:.*]] = fir.convert %[[BARRIER]]#0 : (!fir.ref) -> !llvm.ptr -! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !llvm.ptr, !llvm.ptr, i32, !llvm.ptr) +! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !llvm.ptr<3>, !llvm.ptr<3>, i32, !llvm.ptr) ! CHECK: nvvm.inline_ptx "mbarrier.expect_tx.relaxed.cta.shared::cta.b64 [%0], %1;" ro(%[[BARRIER_PTR]], %[[SIZE]] : !llvm.ptr, i32) attributes(global) subroutine test_tma_bulk_load_i8(a, n) @@ -595,11 +598,12 @@ end subroutine ! CHECK-LABEL: func.func @_QPtest_tma_bulk_load_i8 ! CHECK: %[[BARRIER:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda, uniq_name = "_QFtest_tma_bulk_load_i8Ebarrier1"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[ELEM_COUNT:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda, uniq_name = "_QFtest_tma_bulk_load_i8Eelem_count"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: cuf.shared_memory !fir.array<1024xi64> align 16 {bindc_name = "tmp", uniq_name = "_QFtest_tma_bulk_load_i8Etmp"} -> !fir.ref> ! CHECK: %[[COUNT:.*]] = fir.load %[[ELEM_COUNT]]#0 : !fir.ref ! CHECK: %[[ELEM_SIZE:.*]] = arith.constant 8 : i32 ! CHECK: %[[SIZE:.*]] = arith.muli %[[COUNT]], %[[ELEM_SIZE]] : i32 ! CHECK: %[[BARRIER_PTR:.*]] = fir.convert %[[BARRIER]]#0 : (!fir.ref) -> !llvm.ptr -! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !llvm.ptr, !llvm.ptr, i32, !llvm.ptr) +! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !llvm.ptr<3>, !llvm.ptr<3>, i32, !llvm.ptr) ! CHECK: nvvm.inline_ptx "mbarrier.expect_tx.relaxed.cta.shared::cta.b64 [%0], %1;" ro(%[[BARRIER_PTR]], %[[SIZE]] : !llvm.ptr, i32) attributes(global) subroutine test_tma_bulk_load_r2(a, n) @@ -614,11 +618,12 @@ end subroutine ! CHECK-LABEL: func.func @_QPtest_tma_bulk_load_r2 ! CHECK: %[[BARRIER:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda, uniq_name = "_QFtest_tma_bulk_load_r2Ebarrier1"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[ELEM_COUNT:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda, uniq_name = "_QFtest_tma_bulk_load_r2Eelem_count"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: cuf.shared_memory !fir.array<1024xf16> align 16 {bindc_name = "tmp", uniq_name = "_QFtest_tma_bulk_load_r2Etmp"} -> !fir.ref> ! CHECK: %[[COUNT:.*]] = fir.load %[[ELEM_COUNT]]#0 : !fir.ref ! CHECK: %[[ELEM_SIZE:.*]] = arith.constant 2 : i32 ! CHECK: %[[SIZE:.*]] = arith.muli %[[COUNT]], %[[ELEM_SIZE]] : i32 ! CHECK: %[[BARRIER_PTR:.*]] = fir.convert %[[BARRIER]]#0 : (!fir.ref) -> !llvm.ptr -! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !llvm.ptr, !llvm.ptr, i32, !llvm.ptr) +! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !llvm.ptr<3>, !llvm.ptr<3>, i32, !llvm.ptr) ! CHECK: nvvm.inline_ptx "mbarrier.expect_tx.relaxed.cta.shared::cta.b64 [%0], %1;" ro(%[[BARRIER_PTR]], %[[SIZE]] : !llvm.ptr, i32) attributes(global) subroutine test_tma_bulk_load_r4(a, n) @@ -633,11 +638,12 @@ end subroutine ! CHECK-LABEL: func.func @_QPtest_tma_bulk_load_r4 ! CHECK: %[[BARRIER:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda, uniq_name = "_QFtest_tma_bulk_load_r4Ebarrier1"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[ELEM_COUNT:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda, uniq_name = "_QFtest_tma_bulk_load_r4Eelem_count"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: cuf.shared_memory !fir.array<1024xf32> align 16 {bindc_name = "tmp", uniq_name = "_QFtest_tma_bulk_load_r4Etmp"} -> !fir.ref> ! CHECK: %[[COUNT:.*]] = fir.load %[[ELEM_COUNT]]#0 : !fir.ref ! CHECK: %[[ELEM_SIZE:.*]] = arith.constant 4 : i32 ! CHECK: %[[SIZE:.*]] = arith.muli %[[COUNT]], %[[ELEM_SIZE]] : i32 ! CHECK: %[[BARRIER_PTR:.*]] = fir.convert %[[BARRIER]]#0 : (!fir.ref) -> !llvm.ptr -! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !llvm.ptr, !llvm.ptr, i32, !llvm.ptr) +! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !llvm.ptr<3>, !llvm.ptr<3>, i32, !llvm.ptr) ! CHECK: nvvm.inline_ptx "mbarrier.expect_tx.relaxed.cta.shared::cta.b64 [%0], %1;" ro(%[[BARRIER_PTR]], %[[SIZE]] : !llvm.ptr, i32) attributes(global) subroutine test_tma_bulk_load_r8(a, n) @@ -652,11 +658,12 @@ end subroutine ! CHECK-LABEL: func.func @_QPtest_tma_bulk_load_r8 ! CHECK: %[[BARRIER:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda, uniq_name = "_QFtest_tma_bulk_load_r8Ebarrier1"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[ELEM_COUNT:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda, uniq_name = "_QFtest_tma_bulk_load_r8Eelem_count"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: cuf.shared_memory !fir.array<1024xf64> align 16 {bindc_name = "tmp", uniq_name = "_QFtest_tma_bulk_load_r8Etmp"} -> !fir.ref> ! CHECK: %[[COUNT:.*]] = fir.load %[[ELEM_COUNT]]#0 : !fir.ref ! CHECK: %[[ELEM_SIZE:.*]] = arith.constant 8 : i32 ! CHECK: %[[SIZE:.*]] = arith.muli %[[COUNT]], %[[ELEM_SIZE]] : i32 ! CHECK: %[[BARRIER_PTR:.*]] = fir.convert %[[BARRIER]]#0 : (!fir.ref) -> !llvm.ptr -! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !llvm.ptr, !llvm.ptr, i32, !llvm.ptr) +! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !llvm.ptr<3>, !llvm.ptr<3>, i32, !llvm.ptr) ! CHECK: nvvm.inline_ptx "mbarrier.expect_tx.relaxed.cta.shared::cta.b64 [%0], %1;" ro(%[[BARRIER_PTR]], %[[SIZE]] : !llvm.ptr, i32) attributes(global) subroutine test_tma_bulk_store_c4(c, n) diff --git a/flang/test/Lower/Intrinsics/show_descriptor.f90 b/flang/test/Lower/Intrinsics/show_descriptor.f90 deleted file mode 100644 index a0b8d3eb4348f..0000000000000 --- a/flang/test/Lower/Intrinsics/show_descriptor.f90 +++ /dev/null @@ -1,241 +0,0 @@ -! RUN: bbc -emit-fir %s -o - | FileCheck %s - -module test_show_descriptor -use flang_debug -contains -subroutine test_int -! CHECK-LABEL: func.func @_QMtest_show_descriptorPtest_int() { - implicit none - integer :: n - integer,allocatable :: a(:) - n = 5 - allocate(a(n)) -! CHECK: %[[C3:.*]] = arith.constant 3 : index -! CHECK: %[[C1:.*]] = arith.constant 1 : index -! CHECK: %[[C5:.*]] = arith.constant 5 : i32 -! CHECK: %[[C0:.*]] = arith.constant 0 : index -! CHECK: %[[DUMMY_SCOPE_0:.*]] = fir.dummy_scope : !fir.dscope -! CHECK: %[[ALLOCA_0:.*]] = fir.alloca !fir.box>> {bindc_name = "a", uniq_name = "_QMtest_show_descriptorFtest_intEa"} -! CHECK: %[[ZERO_BITS_0:.*]] = fir.zero_bits !fir.heap> -! CHECK: %[[SHAPE_0:.*]] = fir.shape %[[C0]] : (index) -> !fir.shape<1> -! CHECK: %[[EMBOX_0:.*]] = fir.embox %[[ZERO_BITS_0]](%[[SHAPE_0]]) : (!fir.heap>, !fir.shape<1>) -> !fir.box>> -! CHECK: fir.store %[[EMBOX_0]] to %[[ALLOCA_0]] : !fir.ref>>> -! CHECK: %[[DECLARE_0:.*]] = fir.declare %[[ALLOCA_0]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QMtest_show_descriptorFtest_intEa"} : (!fir.ref>>>) -> !fir.ref>>> -! CHECK: %[[ALLOCA_1:.*]] = fir.alloca i32 {bindc_name = "n", uniq_name = "_QMtest_show_descriptorFtest_intEn"} -! CHECK: %[[DECLARE_1:.*]] = fir.declare %[[ALLOCA_1]] {uniq_name = "_QMtest_show_descriptorFtest_intEn"} : (!fir.ref) -> !fir.ref -! CHECK: fir.store %[[C5]] to %[[DECLARE_1]] : !fir.ref -! CHECK: %[[LOAD_0:.*]] = fir.load %[[DECLARE_1]] : !fir.ref -! CHECK: %[[CONVERT_0:.*]] = fir.convert %[[LOAD_0]] : (i32) -> index -! CHECK: %[[CMPI_0:.*]] = arith.cmpi sgt, %[[CONVERT_0]], %[[C0]] : index -! CHECK: %[[SELECT_0:.*]] = arith.select %[[CMPI_0]], %[[CONVERT_0]], %[[C0]] : index -! CHECK: %[[ALLOCMEM_0:.*]] = fir.allocmem !fir.array, %[[SELECT_0]] {fir.must_be_heap = true, uniq_name = "_QMtest_show_descriptorFtest_intEa.alloc"} - - call show_descriptor(a) -! CHECK: %[[SHAPE_1:.*]] = fir.shape %[[SELECT_0]] : (index) -> !fir.shape<1> -! CHECK: %[[EMBOX_1:.*]] = fir.embox %[[ALLOCMEM_0]](%[[SHAPE_1]]) : (!fir.heap>, !fir.shape<1>) -> !fir.box>> -! CHECK: fir.store %[[EMBOX_1]] to %[[DECLARE_0]] : !fir.ref>>> -! CHECK: %[[LOAD_1:.*]] = fir.load %[[DECLARE_0]] : !fir.ref>>> -! CHECK: fir.call @_FortranAShowDescriptor(%[[LOAD_1]]) fastmath : (!fir.box>>) -> () - - call show_descriptor(a(1:3)) -! CHECK: %[[LOAD_2:.*]] = fir.load %[[DECLARE_0]] : !fir.ref>>> -! CHECK: %[[SHAPE_2:.*]] = fir.shape %[[C3]] : (index) -> !fir.shape<1> -! CHECK: %[[BOX_ADDR_0:.*]] = fir.box_addr %[[LOAD_2]] : (!fir.box>>) -> !fir.heap> -! CHECK: %[[CONSTANT_4:.*]] = arith.constant 0 : index -! CHECK: %[[BOX_DIMS_0:.*]]:3 = fir.box_dims %[[LOAD_2]], %[[CONSTANT_4]] : (!fir.box>>, index) -> (index, index, index) -! CHECK: %[[SHAPE_SHIFT_0:.*]] = fir.shape_shift %[[BOX_DIMS_0]]#0, %[[BOX_DIMS_0]]#1 : (index, index) -> !fir.shapeshift<1> -! CHECK: %[[ARRAY_COOR_0:.*]] = fir.array_coor %[[BOX_ADDR_0]](%[[SHAPE_SHIFT_0]]) %[[C1]] : (!fir.heap>, !fir.shapeshift<1>, index) -> !fir.ref -! CHECK: %[[CONVERT_1:.*]] = fir.convert %[[ARRAY_COOR_0]] : (!fir.ref) -> !fir.ref> -! CHECK: %[[EMBOX_2:.*]] = fir.embox %[[CONVERT_1]](%[[SHAPE_2]]) : (!fir.ref>, !fir.shape<1>) -> !fir.box> -! CHECK: fir.call @_FortranAShowDescriptor(%[[EMBOX_2]]) fastmath : (!fir.box>) -> () - deallocate(a) -end subroutine test_int - -subroutine test_char -! CHECK-LABEL: func.func @_QMtest_show_descriptorPtest_char() { - implicit none - character(len=9) :: c = 'Hey buddy' - call show_descriptor(c) -! CHECK: %[[C3:.*]] = arith.constant 3 : index -! CHECK: %[[C1:.*]] = arith.constant 1 : index -! CHECK: %[[C9:.*]] = arith.constant 9 : index -! CHECK: %[[DUMMY_SCOPE_0:.*]] = fir.dummy_scope : !fir.dscope -! CHECK: %[[ADDRESS_OF_0:.*]] = fir.address_of(@_QMtest_show_descriptorFtest_charEc) : !fir.ref> -! CHECK: %[[DECLARE_0:.*]] = fir.declare %[[ADDRESS_OF_0]] typeparams %[[C9]] {uniq_name = "_QMtest_show_descriptorFtest_charEc"} : (!fir.ref>, index) -> !fir.ref> -! CHECK: %[[EMBOX_0:.*]] = fir.embox %[[DECLARE_0]] : (!fir.ref>) -> !fir.box> -! CHECK: fir.call @_FortranAShowDescriptor(%[[EMBOX_0]]) fastmath : (!fir.box>) -> () - - call show_descriptor(c(1:3)) -! CHECK: %[[C1_0:.*]] = arith.constant 1 : index -! CHECK: %[[SUBI_0:.*]] = arith.subi %[[C1]], %[[C1_0]] : index -! CHECK: %[[CONVERT_0:.*]] = fir.convert %[[DECLARE_0]] : (!fir.ref>) -> !fir.ref>> -! CHECK: %[[COORDINATE_OF_0:.*]] = fir.coordinate_of %[[CONVERT_0]], %[[SUBI_0]] : (!fir.ref>>, index) -> !fir.ref> -! CHECK: %[[CONVERT_1:.*]] = fir.convert %[[COORDINATE_OF_0]] : (!fir.ref>) -> !fir.ref> -! CHECK: %[[EMBOX_1:.*]] = fir.embox %[[CONVERT_1]] : (!fir.ref>) -> !fir.box> -! CHECK: fir.call @_FortranAShowDescriptor(%[[EMBOX_1]]) fastmath : (!fir.box>) -> () -! CHECK: return -end subroutine test_char - -subroutine test_logical -! CHECK-LABEL: func.func @_QMtest_show_descriptorPtest_logical() { - implicit none - logical(kind=1) :: l1 = .false. - logical(kind=2) :: l2 = .true. - logical(kind=2), dimension(2), target :: la2 = (/ .true., .false. /) - logical(kind=2), dimension(:), pointer :: pla2 -! CHECK: %[[C0:.*]] = arith.constant 0 : index -! CHECK: %[[C2:.*]] = arith.constant 2 : index -! CHECK: %[[DUMMY_SCOPE_0:.*]] = fir.dummy_scope : !fir.dscope -! CHECK: %[[ADDRESS_OF_0:.*]] = fir.address_of(@_QMtest_show_descriptorFtest_logicalEl1) : !fir.ref> -! CHECK: %[[DECLARE_0:.*]] = fir.declare %[[ADDRESS_OF_0]] {uniq_name = "_QMtest_show_descriptorFtest_logicalEl1"} : (!fir.ref>) -> !fir.ref> -! CHECK: %[[ADDRESS_OF_1:.*]] = fir.address_of(@_QMtest_show_descriptorFtest_logicalEl2) : !fir.ref> -! CHECK: %[[DECLARE_1:.*]] = fir.declare %[[ADDRESS_OF_1]] {uniq_name = "_QMtest_show_descriptorFtest_logicalEl2"} : (!fir.ref>) -> !fir.ref> -! CHECK: %[[ADDRESS_OF_2:.*]] = fir.address_of(@_QMtest_show_descriptorFtest_logicalEla2) : !fir.ref>> -! CHECK: %[[SHAPE_0:.*]] = fir.shape %[[C2]] : (index) -> !fir.shape<1> -! CHECK: %[[DECLARE_2:.*]] = fir.declare %[[ADDRESS_OF_2]](%[[SHAPE_0]]) {fortran_attrs = #fir.var_attrs, uniq_name = "_QMtest_show_descriptorFtest_logicalEla2"} : (!fir.ref>>, !fir.shape<1>) -> !fir.ref>> -! CHECK: %[[ALLOCA_0:.*]] = fir.alloca !fir.box>>> {bindc_name = "pla2", uniq_name = "_QMtest_show_descriptorFtest_logicalEpla2"} -! CHECK: %[[ZERO_BITS_0:.*]] = fir.zero_bits !fir.ptr>> -! CHECK: %[[SHAPE_1:.*]] = fir.shape %[[C0]] : (index) -> !fir.shape<1> -! CHECK: %[[EMBOX_0:.*]] = fir.embox %[[ZERO_BITS_0]](%[[SHAPE_1]]) : (!fir.ptr>>, !fir.shape<1>) -> !fir.box>>> -! CHECK: fir.store %[[EMBOX_0]] to %[[ALLOCA_0]] : !fir.ref>>>> - - call show_descriptor(l1) - call show_descriptor(l2) - pla2 => la2 -! CHECK: %[[DECLARE_3:.*]] = fir.declare %[[ALLOCA_0]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QMtest_show_descriptorFtest_logicalEpla2"} : (!fir.ref>>>>) -> !fir.ref>>>> -! CHECK: %[[EMBOX_1:.*]] = fir.embox %[[DECLARE_0]] : (!fir.ref>) -> !fir.box> -! CHECK: fir.call @_FortranAShowDescriptor(%[[EMBOX_1]]) fastmath : (!fir.box>) -> () -! CHECK: %[[EMBOX_2:.*]] = fir.embox %[[DECLARE_1]] : (!fir.ref>) -> !fir.box> -! CHECK: fir.call @_FortranAShowDescriptor(%[[EMBOX_2]]) fastmath : (!fir.box>) -> () - - call show_descriptor(la2) - call show_descriptor(pla2) -! CHECK: %[[CONVERT_0:.*]] = fir.convert %[[DECLARE_2]] : (!fir.ref>>) -> !fir.ref>> -! CHECK: %[[EMBOX_3:.*]] = fir.embox %[[CONVERT_0]](%[[SHAPE_0]]) : (!fir.ref>>, !fir.shape<1>) -> !fir.box>>> -! CHECK: fir.store %[[EMBOX_3]] to %[[DECLARE_3]] : !fir.ref>>>> -! CHECK: %[[EMBOX_4:.*]] = fir.embox %[[DECLARE_2]](%[[SHAPE_0]]) : (!fir.ref>>, !fir.shape<1>) -> !fir.box>> -! CHECK: fir.call @_FortranAShowDescriptor(%[[EMBOX_4]]) fastmath : (!fir.box>>) -> () -! CHECK: %[[LOAD_0:.*]] = fir.load %[[DECLARE_3]] : !fir.ref>>>> -! CHECK: fir.call @_FortranAShowDescriptor(%[[LOAD_0]]) fastmath : (!fir.box>>>) -> () -! CHECK: return -end subroutine test_logical - -subroutine test_real -! CHECK-LABEL: func.func @_QMtest_show_descriptorPtest_real() { - implicit none - real :: half = 0.5 - real :: row(3) = (/ 1 , 2, 3 /) - real(kind=8) :: w(4) = (/ .00011_8 , .00012_8, .00013_8, .00014_8 /) -! CHECK: %[[C2:.*]] = arith.constant 2 : index -! CHECK: %[[C1:.*]] = arith.constant 1 : index -! CHECK: %[[C4:.*]] = arith.constant 4 : index -! CHECK: %[[C3:.*]] = arith.constant 3 : index -! CHECK: %[[DUMMY_SCOPE_2:.*]] = fir.dummy_scope : !fir.dscope -! CHECK: %[[ADDRESS_OF_4:.*]] = fir.address_of(@_QMtest_show_descriptorFtest_realEhalf) : !fir.ref -! CHECK: %[[DECLARE_5:.*]] = fir.declare %[[ADDRESS_OF_4]] {uniq_name = "_QMtest_show_descriptorFtest_realEhalf"} : (!fir.ref) -> !fir.ref -! CHECK: %[[ADDRESS_OF_5:.*]] = fir.address_of(@_QMtest_show_descriptorFtest_realErow) : !fir.ref> -! CHECK: %[[SHAPE_2:.*]] = fir.shape %[[C3]] : (index) -> !fir.shape<1> -! CHECK: %[[DECLARE_6:.*]] = fir.declare %[[ADDRESS_OF_5]](%[[SHAPE_2]]) {uniq_name = "_QMtest_show_descriptorFtest_realErow"} : (!fir.ref>, !fir.shape<1>) -> !fir.ref> -! CHECK: %[[ADDRESS_OF_6:.*]] = fir.address_of(@_QMtest_show_descriptorFtest_realEw) : !fir.ref> -! CHECK: %[[SHAPE_3:.*]] = fir.shape %[[C4]] : (index) -> !fir.shape<1> -! CHECK: %[[DECLARE_7:.*]] = fir.declare %[[ADDRESS_OF_6]](%[[SHAPE_3]]) {uniq_name = "_QMtest_show_descriptorFtest_realEw"} : (!fir.ref>, !fir.shape<1>) -> !fir.ref> - - call show_descriptor(half) - call show_descriptor(row) - call show_descriptor(w) - call show_descriptor(w(1:4:2)) -! CHECK: %[[EMBOX_7:.*]] = fir.embox %[[DECLARE_5]] : (!fir.ref) -> !fir.box -! CHECK: fir.call @_FortranAShowDescriptor(%[[EMBOX_7]]) fastmath : (!fir.box) -> () -! CHECK: %[[EMBOX_8:.*]] = fir.embox %[[DECLARE_6]](%[[SHAPE_2]]) : (!fir.ref>, !fir.shape<1>) -> !fir.box> -! CHECK: fir.call @_FortranAShowDescriptor(%[[EMBOX_8]]) fastmath : (!fir.box>) -> () -! CHECK: %[[EMBOX_9:.*]] = fir.embox %[[DECLARE_7]](%[[SHAPE_3]]) : (!fir.ref>, !fir.shape<1>) -> !fir.box> -! CHECK: fir.call @_FortranAShowDescriptor(%[[EMBOX_9]]) fastmath : (!fir.box>) -> () -! CHECK: %[[SHAPE_4:.*]] = fir.shape %[[C2]] : (index) -> !fir.shape<1> -! CHECK: %[[UNDEFINED_0:.*]] = fir.undefined index -! CHECK: %[[SLICE_0:.*]] = fir.slice %[[C1]], %[[C4]], %[[C2]] : (index, index, index) -> !fir.slice<1> -! CHECK: %[[EMBOX_10:.*]] = fir.embox %[[DECLARE_7]](%[[SHAPE_3]]) {{\[}}%[[SLICE_0]]] : (!fir.ref>, !fir.shape<1>, !fir.slice<1>) -> !fir.box> -! CHECK: fir.call @_FortranAShowDescriptor(%[[EMBOX_10]]) fastmath : (!fir.box>) -> () -! CHECK: return -end subroutine test_real - -subroutine test_complex -! CHECK-LABEL: func.func @_QMtest_show_descriptorPtest_complex() { - implicit none - complex, parameter :: hr = 0.5 - complex, parameter :: hi = (0, 0.5) - complex :: c1 = hr - complex :: c2 = hi - complex :: a2(2) = (/ hr, hi /) -! CHECK: %[[CST_0:.*]] = arith.constant 0.000000e+00 : f32 -! CHECK: %[[CST_1:.*]] = arith.constant 5.000000e-01 : f32 -! CHECK: %[[C2:.*]] = arith.constant 2 : index -! CHECK: %[[ALLOCA_1:.*]] = fir.alloca complex -! CHECK: %[[ALLOCA_2:.*]] = fir.alloca complex -! CHECK: %[[DUMMY_SCOPE_3:.*]] = fir.dummy_scope : !fir.dscope -! CHECK: %[[ADDRESS_OF_7:.*]] = fir.address_of(@_QMtest_show_descriptorFtest_complexEa2) : !fir.ref>> -! CHECK: %[[SHAPE_5:.*]] = fir.shape %[[C2]] : (index) -> !fir.shape<1> -! CHECK: %[[DECLARE_8:.*]] = fir.declare %[[ADDRESS_OF_7]](%[[SHAPE_5]]) {uniq_name = "_QMtest_show_descriptorFtest_complexEa2"} : (!fir.ref>>, !fir.shape<1>) -> !fir.ref>> -! CHECK: %[[ADDRESS_OF_8:.*]] = fir.address_of(@_QMtest_show_descriptorFtest_complexEc1) : !fir.ref> -! CHECK: %[[DECLARE_9:.*]] = fir.declare %[[ADDRESS_OF_8]] {uniq_name = "_QMtest_show_descriptorFtest_complexEc1"} : (!fir.ref>) -> !fir.ref> -! CHECK: %[[ADDRESS_OF_9:.*]] = fir.address_of(@_QMtest_show_descriptorFtest_complexEc2) : !fir.ref> -! CHECK: %[[DECLARE_10:.*]] = fir.declare %[[ADDRESS_OF_9]] {uniq_name = "_QMtest_show_descriptorFtest_complexEc2"} : (!fir.ref>) -> !fir.ref> -! CHECK: %[[ADDRESS_OF_10:.*]] = fir.address_of(@_QMtest_show_descriptorFtest_complexEChi) : !fir.ref> -! CHECK: %[[DECLARE_11:.*]] = fir.declare %[[ADDRESS_OF_10]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QMtest_show_descriptorFtest_complexEChi"} : (!fir.ref>) -> !fir.ref> -! CHECK: %[[ADDRESS_OF_11:.*]] = fir.address_of(@_QMtest_show_descriptorFtest_complexEChr) : !fir.ref> -! CHECK: %[[DECLARE_12:.*]] = fir.declare %[[ADDRESS_OF_11]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QMtest_show_descriptorFtest_complexEChr"} : (!fir.ref>) -> !fir.ref> -! CHECK: %[[UNDEFINED_1:.*]] = fir.undefined complex -! CHECK: %[[INSERT_VALUE_0:.*]] = fir.insert_value %[[UNDEFINED_1]], %[[CST_1]], [0 : index] : (complex, f32) -> complex -! CHECK: %[[INSERT_VALUE_1:.*]] = fir.insert_value %[[INSERT_VALUE_0]], %[[CST_0]], [1 : index] : (complex, f32) -> complex -! CHECK: fir.store %[[INSERT_VALUE_1]] to %[[ALLOCA_2]] : !fir.ref> - - call show_descriptor(hr) -! CHECK: %[[EMBOX_11:.*]] = fir.embox %[[ALLOCA_2]] : (!fir.ref>) -> !fir.box> -! CHECK: fir.call @_FortranAShowDescriptor(%[[EMBOX_11]]) fastmath : (!fir.box>) -> () - - call show_descriptor(hi) -! CHECK: %[[INSERT_VALUE_2:.*]] = fir.insert_value %[[UNDEFINED_1]], %[[CST_0]], [0 : index] : (complex, f32) -> complex -! CHECK: %[[INSERT_VALUE_3:.*]] = fir.insert_value %[[INSERT_VALUE_2]], %[[CST_1]], [1 : index] : (complex, f32) -> complex -! CHECK: fir.store %[[INSERT_VALUE_3]] to %[[ALLOCA_1]] : !fir.ref> -! CHECK: %[[EMBOX_12:.*]] = fir.embox %[[ALLOCA_1]] : (!fir.ref>) -> !fir.box> -! CHECK: fir.call @_FortranAShowDescriptor(%[[EMBOX_12]]) fastmath : (!fir.box>) -> () - - call show_descriptor(a2) -! CHECK: %[[EMBOX_13:.*]] = fir.embox %[[DECLARE_8]](%[[SHAPE_5]]) : (!fir.ref>>, !fir.shape<1>) -> !fir.box>> -! CHECK: fir.call @_FortranAShowDescriptor(%[[EMBOX_13]]) fastmath : (!fir.box>>) -> () -! CHECK: return -end subroutine test_complex - -subroutine test_derived -! CHECK-LABEL: func.func @_QMtest_show_descriptorPtest_derived() { - implicit none - type :: t1 - integer :: a - integer :: b - end type t1 - type, extends (t1) :: t2 - integer :: c - end type t2 - type(t2) :: vt2 = t2(7,5,3) -! CHECK: %[[C0:.*]] = arith.constant 0 : index -! CHECK: %[[C2:.*]] = arith.constant 2 : index -! CHECK: %[[C1:.*]] = arith.constant 1 : index -! CHECK: %[[DUMMY_SCOPE_4:.*]] = fir.dummy_scope : !fir.dscope -! CHECK: %[[ADDRESS_OF_12:.*]] = fir.address_of(@_QMtest_show_descriptorFtest_derivedE.n.a) : !fir.ref> -! CHECK: %[[DECLARE_13:.*]] = fir.declare %[[ADDRESS_OF_12]] typeparams %[[C1]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QMtest_show_descriptorFtest_derivedE.n.a"} : (!fir.ref>, index) -> !fir.ref> -! CHECK: %[[ADDRESS_OF_13:.*]] = fir.address_of(@_QMtest_show_descriptorFtest_derivedE.n.b) : !fir.ref> -! CHECK: %[[DECLARE_14:.*]] = fir.declare %[[ADDRESS_OF_13]] typeparams %[[C1]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QMtest_show_descriptorFtest_derivedE.n.b"} : (!fir.ref>, index) -> !fir.ref> -! CHECK: %[[ADDRESS_OF_14:.*]] = fir.address_of(@_QMtest_show_descriptorFtest_derivedE.n.t1) : !fir.ref> -! CHECK: %[[DECLARE_15:.*]] = fir.declare %[[ADDRESS_OF_14]] typeparams %[[C2]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QMtest_show_descriptorFtest_derivedE.n.t1"} : (!fir.ref>, index) -> !fir.ref> -! CHECK: %[[ADDRESS_OF_15:.*]] = fir.address_of(@_QMtest_show_descriptorFtest_derivedE.n.c) : !fir.ref> -! CHECK: %[[DECLARE_16:.*]] = fir.declare %[[ADDRESS_OF_15]] typeparams %[[C1]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QMtest_show_descriptorFtest_derivedE.n.c"} : (!fir.ref>, index) -> !fir.ref> -! CHECK: %[[ADDRESS_OF_16:.*]] = fir.address_of(@_QMtest_show_descriptorFtest_derivedE.n.t2) : !fir.ref> -! CHECK: %[[DECLARE_17:.*]] = fir.declare %[[ADDRESS_OF_16]] typeparams %[[C2]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QMtest_show_descriptorFtest_derivedE.n.t2"} : (!fir.ref>, index) -> !fir.ref> -! CHECK: %[[ADDRESS_OF_17:.*]] = fir.address_of(@_QMtest_show_descriptorFtest_derivedEvt2) : !fir.ref,c:i32}>> -! CHECK: %[[DECLARE_18:.*]] = fir.declare %[[ADDRESS_OF_17]] {uniq_name = "_QMtest_show_descriptorFtest_derivedEvt2"} : (!fir.ref,c:i32}>>) -> !fir.ref,c:i32}>> - - call show_descriptor(vt2) -! CHECK: %[[EMBOX_16:.*]] = fir.embox %[[DECLARE_18]] : (!fir.ref,c:i32}>>) -> !fir.box,c:i32}>> -! CHECK: fir.call @_FortranAShowDescriptor(%[[EMBOX_16]]) fastmath : (!fir.box,c:i32}>>) -> () -! CHECK: return -end subroutine test_derived -end module test_show_descriptor diff --git a/flang/tools/f18/CMakeLists.txt b/flang/tools/f18/CMakeLists.txt index 3baffbd0d310d..a2b4d73f48384 100644 --- a/flang/tools/f18/CMakeLists.txt +++ b/flang/tools/f18/CMakeLists.txt @@ -31,7 +31,6 @@ set(MODULES_WITHOUT_IMPLEMENTATION "iso_fortran_env" "f90deviceio" "iso_fortran_env_impl" - "flang_debug" ) set(MODULES ${MODULES_WITH_IMPLEMENTATION} ${MODULES_WITHOUT_IMPLEMENTATION}) diff --git a/libclc/opencl/lib/generic/atomic/atomic_def.inc b/libclc/opencl/lib/generic/atomic/atomic_def.inc index a4ccab5990888..e6b7c831e10d3 100644 --- a/libclc/opencl/lib/generic/atomic/atomic_def.inc +++ b/libclc/opencl/lib/generic/atomic/atomic_def.inc @@ -12,7 +12,8 @@ defined(cl_khr_int64_extended_atomics)) #define __CLC_HAVE_64_ATOMIC #endif -#if defined(__CLC_FPSIZE) && (__CLC_FPSIZE < 64 || defined(__CLC_HAVE_64_ATOMIC) +#if defined(__CLC_FPSIZE) && \ + (__CLC_FPSIZE < 64 || defined(__CLC_HAVE_64_ATOMIC)) #define __CLC_HAVE_FP_ATOMIC #endif #if defined(__CLC_GENSIZE) && \ diff --git a/libcxx/include/ext/hash_map b/libcxx/include/ext/hash_map index 01ca7498f0cc1..09c981131ff96 100644 --- a/libcxx/include/ext/hash_map +++ b/libcxx/include/ext/hash_map @@ -570,10 +570,7 @@ hash_map<_Key, _Tp, _Hash, _Pred, _Alloc>::hash_map( } template -hash_map<_Key, _Tp, _Hash, _Pred, _Alloc>::hash_map(const hash_map& __u) : __table_(__u.__table_) { - __table_.__rehash_unique(__u.bucket_count()); - insert(__u.begin(), __u.end()); -} +hash_map<_Key, _Tp, _Hash, _Pred, _Alloc>::hash_map(const hash_map& __u) : __table_(__u.__table_) {} template typename hash_map<_Key, _Tp, _Hash, _Pred, _Alloc>::__node_holder diff --git a/libcxx/include/ext/hash_set b/libcxx/include/ext/hash_set index 2796774fee24a..56aa4d8a47eeb 100644 --- a/libcxx/include/ext/hash_set +++ b/libcxx/include/ext/hash_set @@ -356,10 +356,7 @@ hash_set<_Value, _Hash, _Pred, _Alloc>::hash_set( } template -hash_set<_Value, _Hash, _Pred, _Alloc>::hash_set(const hash_set& __u) : __table_(__u.__table_) { - __table_.__rehash_unique(__u.bucket_count()); - insert(__u.begin(), __u.end()); -} +hash_set<_Value, _Hash, _Pred, _Alloc>::hash_set(const hash_set& __u) : __table_(__u.__table_) {} template template diff --git a/libcxx/test/extensions/gnu/hash_map/copy.pass.cpp b/libcxx/test/extensions/gnu/hash_map/copy.pass.cpp new file mode 100644 index 0000000000000..65b8debda0676 --- /dev/null +++ b/libcxx/test/extensions/gnu/hash_map/copy.pass.cpp @@ -0,0 +1,27 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// ADDITIONAL_COMPILE_FLAGS: -Wno-deprecated + +// hash_map::hash_map(const hash_map&) + +#include +#include + +int main(int, char**) { + __gnu_cxx::hash_map map; + + map.insert(std::make_pair(1, 1)); + map.insert(std::make_pair(2, 1)); + + auto map2 = map; + + assert(map2.size() == 2); + + return 0; +} diff --git a/libcxx/test/extensions/gnu/hash_set/copy.pass.cpp b/libcxx/test/extensions/gnu/hash_set/copy.pass.cpp new file mode 100644 index 0000000000000..95a3579194923 --- /dev/null +++ b/libcxx/test/extensions/gnu/hash_set/copy.pass.cpp @@ -0,0 +1,27 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// ADDITIONAL_COMPILE_FLAGS: -Wno-deprecated + +// hash_set::hash_set(const hash_set&) + +#include +#include + +int main(int, char**) { + __gnu_cxx::hash_set set; + + set.insert(1); + set.insert(2); + + auto set2 = set; + + assert(set2.size() == 2); + + return 0; +} diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td index fb9e5df648c75..eb299099a9b71 100644 --- a/lld/ELF/Options.td +++ b/lld/ELF/Options.td @@ -154,7 +154,7 @@ def bp_startup_sort: JJ<"bp-startup-sort=">, MetaVarName<"[none,function]">, // Auxiliary options related to balanced partition defm bp_compression_sort_startup_functions: BB<"bp-compression-sort-startup-functions", - "When --irpgo-profile is pecified, prioritize function similarity for compression in addition to startup time", "">; + "When --irpgo-profile is specified, prioritize function similarity for compression in addition to startup time", "">; def verbose_bp_section_orderer: FF<"verbose-bp-section-orderer">, HelpText<"Print information on balanced partitioning">; diff --git a/lldb/bindings/python/python-wrapper.swig b/lldb/bindings/python/python-wrapper.swig index ef501fbafc947..0ba152166522b 100644 --- a/lldb/bindings/python/python-wrapper.swig +++ b/lldb/bindings/python/python-wrapper.swig @@ -425,6 +425,18 @@ void *lldb_private::python::LLDBSWIGPython_CastPyObjectToSBBreakpoint(PyObject * return sb_ptr; } +void *lldb_private::python::LLDBSWIGPython_CastPyObjectToSBThread(PyObject * data) { + lldb::SBThread *sb_ptr = nullptr; + + int valid_cast = + SWIG_ConvertPtr(data, (void **)&sb_ptr, SWIGTYPE_p_lldb__SBThread, 0); + + if (valid_cast == -1) + return NULL; + + return sb_ptr; +} + void *lldb_private::python::LLDBSWIGPython_CastPyObjectToSBFrame(PyObject * data) { lldb::SBFrame *sb_ptr = nullptr; diff --git a/lldb/examples/python/templates/scripted_frame_provider.py b/lldb/examples/python/templates/scripted_frame_provider.py index 20f4d76d188c2..7a72f1a24c9da 100644 --- a/lldb/examples/python/templates/scripted_frame_provider.py +++ b/lldb/examples/python/templates/scripted_frame_provider.py @@ -31,7 +31,54 @@ class ScriptedFrameProvider(metaclass=ABCMeta): ) """ + @staticmethod + def applies_to_thread(thread): + """Determine if this frame provider should be used for a given thread. + + This static method is called before creating an instance of the frame + provider to determine if it should be applied to a specific thread. + Override this method to provide custom filtering logic. + + Args: + thread (lldb.SBThread): The thread to check. + + Returns: + bool: True if this frame provider should be used for the thread, + False otherwise. The default implementation returns True for + all threads. + + Example: + + .. code-block:: python + + @staticmethod + def applies_to_thread(thread): + # Only apply to thread 1 + return thread.GetIndexID() == 1 + """ + return True + + @staticmethod @abstractmethod + def get_description(): + """Get a description of this frame provider. + + This method should return a human-readable string describing what + this frame provider does. The description is used for debugging + and display purposes. + + Returns: + str: A description of the frame provider. + + Example: + + .. code-block:: python + + def get_description(self): + return "Crash log frame provider for thread 1" + """ + pass + def __init__(self, input_frames, args): """Construct a scripted frame provider. diff --git a/lldb/examples/python/templates/scripted_process.py b/lldb/examples/python/templates/scripted_process.py index b4232f632a30a..24aa9818bb989 100644 --- a/lldb/examples/python/templates/scripted_process.py +++ b/lldb/examples/python/templates/scripted_process.py @@ -243,6 +243,7 @@ def __init__(self, process, args): key/value pairs used by the scripted thread. """ self.target = None + self.arch = None self.originating_process = None self.process = None self.args = None @@ -264,6 +265,9 @@ def __init__(self, process, args): and process.IsValid() ): self.target = process.target + triple = self.target.triple + if triple: + self.arch = triple.split("-")[0] self.originating_process = process self.process = self.target.GetProcess() self.get_register_info() @@ -350,17 +354,14 @@ def get_stackframes(self): def get_register_info(self): if self.register_info is None: self.register_info = dict() - if "x86_64" in self.originating_process.arch: + if "x86_64" in self.arch: self.register_info["sets"] = ["General Purpose Registers"] self.register_info["registers"] = INTEL64_GPR - elif ( - "arm64" in self.originating_process.arch - or self.originating_process.arch == "aarch64" - ): + elif "arm64" in self.arch or self.arch == "aarch64": self.register_info["sets"] = ["General Purpose Registers"] self.register_info["registers"] = ARM64_GPR else: - raise ValueError("Unknown architecture", self.originating_process.arch) + raise ValueError("Unknown architecture", self.arch) return self.register_info @abstractmethod @@ -403,11 +404,12 @@ def __init__(self, thread, args): """Construct a scripted frame. Args: - thread (ScriptedThread): The thread owning this frame. + thread (ScriptedThread/lldb.SBThread): The thread owning this frame. args (lldb.SBStructuredData): A Dictionary holding arbitrary key/value pairs used by the scripted frame. """ self.target = None + self.arch = None self.originating_thread = None self.thread = None self.args = None @@ -417,15 +419,17 @@ def __init__(self, thread, args): self.register_ctx = {} self.variables = [] - if ( - isinstance(thread, ScriptedThread) - or isinstance(thread, lldb.SBThread) - and thread.IsValid() + if isinstance(thread, ScriptedThread) or ( + isinstance(thread, lldb.SBThread) and thread.IsValid() ): - self.target = thread.target self.process = thread.process + self.target = self.process.target + triple = self.target.triple + if triple: + self.arch = triple.split("-")[0] + tid = thread.tid if isinstance(thread, ScriptedThread) else thread.id self.originating_thread = thread - self.thread = self.process.GetThreadByIndexID(thread.tid) + self.thread = self.process.GetThreadByIndexID(tid) self.get_register_info() @abstractmethod @@ -506,7 +510,18 @@ def get_variables(self, filters): def get_register_info(self): if self.register_info is None: - self.register_info = self.originating_thread.get_register_info() + if isinstance(self.originating_thread, ScriptedThread): + self.register_info = self.originating_thread.get_register_info() + elif isinstance(self.originating_thread, lldb.SBThread): + self.register_info = dict() + if "x86_64" in self.arch: + self.register_info["sets"] = ["General Purpose Registers"] + self.register_info["registers"] = INTEL64_GPR + elif "arm64" in self.arch or self.arch == "aarch64": + self.register_info["sets"] = ["General Purpose Registers"] + self.register_info["registers"] = ARM64_GPR + else: + raise ValueError("Unknown architecture", self.arch) return self.register_info @abstractmethod @@ -640,12 +655,12 @@ def get_stop_reason(self): # TODO: Passthrough stop reason from driving process if self.driving_thread.GetStopReason() != lldb.eStopReasonNone: - if "arm64" in self.originating_process.arch: + if "arm64" in self.arch: stop_reason["type"] = lldb.eStopReasonException stop_reason["data"]["desc"] = ( self.driving_thread.GetStopDescription(100) ) - elif self.originating_process.arch == "x86_64": + elif self.arch == "x86_64": stop_reason["type"] = lldb.eStopReasonSignal stop_reason["data"]["signal"] = signal.SIGTRAP else: diff --git a/lldb/include/lldb/API/SBTarget.h b/lldb/include/lldb/API/SBTarget.h index ce81ae46a0905..0318492f1054c 100644 --- a/lldb/include/lldb/API/SBTarget.h +++ b/lldb/include/lldb/API/SBTarget.h @@ -19,6 +19,7 @@ #include "lldb/API/SBLaunchInfo.h" #include "lldb/API/SBStatisticsOptions.h" #include "lldb/API/SBSymbolContextList.h" +#include "lldb/API/SBThreadCollection.h" #include "lldb/API/SBType.h" #include "lldb/API/SBValue.h" #include "lldb/API/SBWatchpoint.h" @@ -1003,6 +1004,35 @@ class LLDB_API SBTarget { lldb::SBMutex GetAPIMutex() const; + /// Register a scripted frame provider for this target. + /// If a scripted frame provider with the same name and same argument + /// dictionary is already registered on this target, it will be overwritten. + /// + /// \param[in] class_name + /// The name of the Python class that implements the frame provider. + /// + /// \param[in] args_dict + /// A dictionary of arguments to pass to the frame provider class. + /// + /// \param[out] error + /// An error object indicating success or failure. + /// + /// \return + /// A unique identifier for the frame provider descriptor that was + /// registered. 0 if the registration failed. + uint32_t RegisterScriptedFrameProvider(const char *class_name, + lldb::SBStructuredData args_dict, + lldb::SBError &error); + + /// Remove a scripted frame provider from this target by name. + /// + /// \param[in] provider_id + /// The id of the frame provider class to remove. + /// + /// \return + /// An error object indicating success or failure. + lldb::SBError RemoveScriptedFrameProvider(uint32_t provider_id); + protected: friend class SBAddress; friend class SBAddressRange; diff --git a/lldb/include/lldb/API/SBThread.h b/lldb/include/lldb/API/SBThread.h index f6a6d19935b83..639e7a0a1a5c0 100644 --- a/lldb/include/lldb/API/SBThread.h +++ b/lldb/include/lldb/API/SBThread.h @@ -256,6 +256,7 @@ class LLDB_API SBThread { friend class SBThreadPlan; friend class SBTrace; + friend class lldb_private::ScriptInterpreter; friend class lldb_private::python::SWIGBridge; SBThread(const lldb::ThreadSP &lldb_object_sp); diff --git a/lldb/include/lldb/API/SBThreadCollection.h b/lldb/include/lldb/API/SBThreadCollection.h index 5a052e6246026..d13dea0f11cd2 100644 --- a/lldb/include/lldb/API/SBThreadCollection.h +++ b/lldb/include/lldb/API/SBThreadCollection.h @@ -46,6 +46,7 @@ class LLDB_API SBThreadCollection { void SetOpaque(const lldb::ThreadCollectionSP &threads); private: + friend class SBTarget; friend class SBProcess; friend class SBThread; friend class SBSaveCoreOptions; diff --git a/lldb/include/lldb/Core/FormatEntity.h b/lldb/include/lldb/Core/FormatEntity.h index 40916dc48a70b..107c30a000979 100644 --- a/lldb/include/lldb/Core/FormatEntity.h +++ b/lldb/include/lldb/Core/FormatEntity.h @@ -81,6 +81,7 @@ struct Entry { FrameRegisterByName, FrameIsArtificial, FrameKind, + FrameBorrowedInfo, ScriptFrame, FunctionID, FunctionDidChange, diff --git a/lldb/include/lldb/Interpreter/Interfaces/ScriptedFrameProviderInterface.h b/lldb/include/lldb/Interpreter/Interfaces/ScriptedFrameProviderInterface.h index 2d9f713676f90..49b60131399d5 100644 --- a/lldb/include/lldb/Interpreter/Interfaces/ScriptedFrameProviderInterface.h +++ b/lldb/include/lldb/Interpreter/Interfaces/ScriptedFrameProviderInterface.h @@ -16,11 +16,29 @@ namespace lldb_private { class ScriptedFrameProviderInterface : public ScriptedInterface { public: + virtual bool AppliesToThread(llvm::StringRef class_name, + lldb::ThreadSP thread_sp) { + return true; + } + virtual llvm::Expected CreatePluginObject(llvm::StringRef class_name, lldb::StackFrameListSP input_frames, StructuredData::DictionarySP args_sp) = 0; + /// Get a description string for the frame provider. + /// + /// This is called by the descriptor to fetch a description from the + /// scripted implementation. Implementations should call a static method + /// on the scripting class to retrieve the description. + /// + /// \param class_name The name of the scripting class implementing the + /// provider. + /// + /// \return A string describing what this frame provider does, or an + /// empty string if no description is available. + virtual std::string GetDescription(llvm::StringRef class_name) { return {}; } + virtual StructuredData::ObjectSP GetFrameAtIndex(uint32_t index) { return {}; } diff --git a/lldb/include/lldb/Interpreter/ScriptInterpreter.h b/lldb/include/lldb/Interpreter/ScriptInterpreter.h index 7fed4940b85bf..0b91d6756552d 100644 --- a/lldb/include/lldb/Interpreter/ScriptInterpreter.h +++ b/lldb/include/lldb/Interpreter/ScriptInterpreter.h @@ -21,6 +21,7 @@ #include "lldb/API/SBMemoryRegionInfo.h" #include "lldb/API/SBStream.h" #include "lldb/API/SBSymbolContext.h" +#include "lldb/API/SBThread.h" #include "lldb/Breakpoint/BreakpointOptions.h" #include "lldb/Core/PluginInterface.h" #include "lldb/Core/SearchFilter.h" @@ -580,6 +581,8 @@ class ScriptInterpreter : public PluginInterface { lldb::StreamSP GetOpaqueTypeFromSBStream(const lldb::SBStream &stream) const; + lldb::ThreadSP GetOpaqueTypeFromSBThread(const lldb::SBThread &exe_ctx) const; + lldb::StackFrameSP GetOpaqueTypeFromSBFrame(const lldb::SBFrame &frame) const; SymbolContext diff --git a/lldb/include/lldb/Target/BorrowedStackFrame.h b/lldb/include/lldb/Target/BorrowedStackFrame.h new file mode 100644 index 0000000000000..72e7777961da7 --- /dev/null +++ b/lldb/include/lldb/Target/BorrowedStackFrame.h @@ -0,0 +1,146 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_TARGET_BORROWEDSTACKFRAME_H +#define LLDB_TARGET_BORROWEDSTACKFRAME_H + +#include "lldb/Target/StackFrame.h" + +namespace lldb_private { + +/// \class BorrowedStackFrame BorrowedStackFrame.h +/// "lldb/Target/BorrowedStackFrame.h" +/// +/// A wrapper around an existing StackFrame that supersedes its frame indices. +/// +/// This class is useful when you need to present an existing stack frame +/// with a different index, such as when creating synthetic frame views or +/// renumbering frames without copying all the underlying data. +/// +/// All methods delegate to the borrowed frame except for GetFrameIndex() +/// & GetConcreteFrameIndex() which uses the overridden indices. +class BorrowedStackFrame : public StackFrame { +public: + /// Construct a BorrowedStackFrame that wraps an existing frame. + /// + /// \param [in] borrowed_frame_sp + /// The existing StackFrame to borrow from. This frame's data will be + /// used for all operations except frame index queries. + /// + /// \param [in] new_frame_index + /// The frame index to report instead of the borrowed frame's index. + /// + /// \param [in] new_concrete_frame_index + /// Optional concrete frame index. If not provided, defaults to + /// new_frame_index. + BorrowedStackFrame( + lldb::StackFrameSP borrowed_frame_sp, uint32_t new_frame_index, + std::optional new_concrete_frame_index = std::nullopt); + + ~BorrowedStackFrame() override = default; + + uint32_t GetFrameIndex() const override; + void SetFrameIndex(uint32_t index); + + /// Get the concrete frame index for this borrowed frame. + /// + /// Returns the overridden concrete frame index provided at construction, + /// or LLDB_INVALID_FRAME_ID if the borrowed frame represents an inlined + /// function, since this would require some computation if we chain inlined + /// borrowed stack frames. + /// + /// \return + /// The concrete frame index, or LLDB_INVALID_FRAME_ID for inline frames. + uint32_t GetConcreteFrameIndex() override; + + StackID &GetStackID() override; + + const Address &GetFrameCodeAddress() override; + + Address GetFrameCodeAddressForSymbolication() override; + + bool ChangePC(lldb::addr_t pc) override; + + const SymbolContext & + GetSymbolContext(lldb::SymbolContextItem resolve_scope) override; + + llvm::Error GetFrameBaseValue(Scalar &value) override; + + DWARFExpressionList *GetFrameBaseExpression(Status *error_ptr) override; + + Block *GetFrameBlock() override; + + lldb::RegisterContextSP GetRegisterContext() override; + + VariableList *GetVariableList(bool get_file_globals, + Status *error_ptr) override; + + lldb::VariableListSP + GetInScopeVariableList(bool get_file_globals, + bool must_have_valid_location = false) override; + + lldb::ValueObjectSP GetValueForVariableExpressionPath( + llvm::StringRef var_expr, lldb::DynamicValueType use_dynamic, + uint32_t options, lldb::VariableSP &var_sp, Status &error) override; + + bool HasDebugInformation() override; + + const char *Disassemble() override; + + lldb::ValueObjectSP + GetValueObjectForFrameVariable(const lldb::VariableSP &variable_sp, + lldb::DynamicValueType use_dynamic) override; + + bool IsInlined() override; + + bool IsSynthetic() const override; + + bool IsHistorical() const override; + + bool IsArtificial() const override; + + bool IsHidden() override; + + const char *GetFunctionName() override; + + const char *GetDisplayFunctionName() override; + + lldb::ValueObjectSP FindVariable(ConstString name) override; + + SourceLanguage GetLanguage() override; + + SourceLanguage GuessLanguage() override; + + lldb::ValueObjectSP GuessValueForAddress(lldb::addr_t addr) override; + + lldb::ValueObjectSP GuessValueForRegisterAndOffset(ConstString reg, + int64_t offset) override; + + StructuredData::ObjectSP GetLanguageSpecificData() override; + + lldb::RecognizedStackFrameSP GetRecognizedFrame() override; + + /// Get the underlying borrowed frame. + lldb::StackFrameSP GetBorrowedFrame() const; + + bool isA(const void *ClassID) const override; + static bool classof(const StackFrame *obj); + +private: + lldb::StackFrameSP m_borrowed_frame_sp; + uint32_t m_new_frame_index; + uint32_t m_new_concrete_frame_index; + static char ID; + + BorrowedStackFrame(const BorrowedStackFrame &) = delete; + const BorrowedStackFrame &operator=(const BorrowedStackFrame &) = delete; +}; + +} // namespace lldb_private + +#endif // LLDB_TARGET_BORROWEDSTACKFRAME_H diff --git a/lldb/include/lldb/Target/StackFrame.h b/lldb/include/lldb/Target/StackFrame.h index 135bd81e4e8d4..46922448d6e59 100644 --- a/lldb/include/lldb/Target/StackFrame.h +++ b/lldb/include/lldb/Target/StackFrame.h @@ -43,6 +43,13 @@ namespace lldb_private { class StackFrame : public ExecutionContextScope, public std::enable_shared_from_this { public: + /// LLVM RTTI support. + /// \{ + static char ID; + virtual bool isA(const void *ClassID) const { return ClassID == &ID; } + static bool classof(const StackFrame *obj) { return obj->isA(&ID); } + /// \} + enum ExpressionPathOption { eExpressionPathOptionCheckPtrVsMember = (1u << 0), eExpressionPathOptionsNoFragileObjcIvar = (1u << 1), @@ -127,7 +134,7 @@ class StackFrame : public ExecutionContextScope, lldb::ThreadSP GetThread() const { return m_thread_wp.lock(); } - StackID &GetStackID(); + virtual StackID &GetStackID(); /// Get an Address for the current pc value in this StackFrame. /// @@ -135,7 +142,7 @@ class StackFrame : public ExecutionContextScope, /// /// \return /// The Address object set to the current PC value. - const Address &GetFrameCodeAddress(); + virtual const Address &GetFrameCodeAddress(); /// Get the current code Address suitable for symbolication, /// may not be the same as GetFrameCodeAddress(). @@ -153,7 +160,7 @@ class StackFrame : public ExecutionContextScope, /// /// \return /// The Address object set to the current PC value. - Address GetFrameCodeAddressForSymbolication(); + virtual Address GetFrameCodeAddressForSymbolication(); /// Change the pc value for a given thread. /// @@ -165,7 +172,7 @@ class StackFrame : public ExecutionContextScope, /// \return /// true if the pc was changed. false if this failed -- possibly /// because this frame is not a live StackFrame. - bool ChangePC(lldb::addr_t pc); + virtual bool ChangePC(lldb::addr_t pc); /// Provide a SymbolContext for this StackFrame's current pc value. /// @@ -181,7 +188,8 @@ class StackFrame : public ExecutionContextScope, /// \return /// A SymbolContext reference which includes the types of information /// requested by resolve_scope, if they are available. - const SymbolContext &GetSymbolContext(lldb::SymbolContextItem resolve_scope); + virtual const SymbolContext & + GetSymbolContext(lldb::SymbolContextItem resolve_scope); /// Return the Canonical Frame Address (DWARF term) for this frame. /// @@ -199,7 +207,7 @@ class StackFrame : public ExecutionContextScope, /// \return /// If there is an error determining the CFA address, return an error /// explaining the failure. Success otherwise. - llvm::Error GetFrameBaseValue(Scalar &value); + virtual llvm::Error GetFrameBaseValue(Scalar &value); /// Get the DWARFExpressionList corresponding to the Canonical Frame Address. /// @@ -211,7 +219,7 @@ class StackFrame : public ExecutionContextScope, /// /// \return /// Returns the corresponding DWARF expression, or NULL. - DWARFExpressionList *GetFrameBaseExpression(Status *error_ptr); + virtual DWARFExpressionList *GetFrameBaseExpression(Status *error_ptr); /// Get the current lexical scope block for this StackFrame, if possible. /// @@ -221,7 +229,7 @@ class StackFrame : public ExecutionContextScope, /// \return /// A pointer to the current Block. nullptr is returned if this can /// not be provided. - Block *GetFrameBlock(); + virtual Block *GetFrameBlock(); /// Get the RegisterContext for this frame, if possible. /// @@ -235,7 +243,7 @@ class StackFrame : public ExecutionContextScope, /// /// \return /// The RegisterContext shared point for this frame. - lldb::RegisterContextSP GetRegisterContext(); + virtual lldb::RegisterContextSP GetRegisterContext(); const lldb::RegisterContextSP &GetRegisterContextSP() const { return m_reg_context_sp; @@ -261,7 +269,8 @@ class StackFrame : public ExecutionContextScope, /// /// \return /// A pointer to a list of variables. - VariableList *GetVariableList(bool get_file_globals, Status *error_ptr); + virtual VariableList *GetVariableList(bool get_file_globals, + Status *error_ptr); /// Retrieve the list of variables that are in scope at this StackFrame's /// pc. @@ -280,7 +289,7 @@ class StackFrame : public ExecutionContextScope, /// StackFrame's pc. /// \return /// A pointer to a list of variables. - lldb::VariableListSP + virtual lldb::VariableListSP GetInScopeVariableList(bool get_file_globals, bool must_have_valid_location = false); @@ -309,7 +318,7 @@ class StackFrame : public ExecutionContextScope, /// /// \return /// A shared pointer to the ValueObject described by var_expr. - lldb::ValueObjectSP GetValueForVariableExpressionPath( + virtual lldb::ValueObjectSP GetValueForVariableExpressionPath( llvm::StringRef var_expr, lldb::DynamicValueType use_dynamic, uint32_t options, lldb::VariableSP &var_sp, Status &error); @@ -318,14 +327,14 @@ class StackFrame : public ExecutionContextScope, /// \return /// true if debug information is available for this frame (function, /// compilation unit, block, etc.) - bool HasDebugInformation(); + virtual bool HasDebugInformation(); /// Return the disassembly for the instructions of this StackFrame's /// function as a single C string. /// /// \return /// C string with the assembly instructions for this function. - const char *Disassemble(); + virtual const char *Disassemble(); /// Print a description of this frame using the provided frame format. /// @@ -337,9 +346,9 @@ class StackFrame : public ExecutionContextScope, /// /// \return /// \b true if and only if dumping with the given \p format worked. - bool DumpUsingFormat(Stream &strm, - const lldb_private::FormatEntity::Entry *format, - llvm::StringRef frame_marker = {}); + virtual bool DumpUsingFormat(Stream &strm, + const lldb_private::FormatEntity::Entry *format, + llvm::StringRef frame_marker = {}); /// Print a description for this frame using the frame-format formatter /// settings. If the current frame-format settings are invalid, then the @@ -353,8 +362,8 @@ class StackFrame : public ExecutionContextScope, /// /// \param [in] frame_marker /// Optional string that will be prepended to the frame output description. - void DumpUsingSettingsFormat(Stream *strm, bool show_unique = false, - const char *frame_marker = nullptr); + virtual void DumpUsingSettingsFormat(Stream *strm, bool show_unique = false, + const char *frame_marker = nullptr); /// Print a description for this frame using a default format. /// @@ -366,7 +375,7 @@ class StackFrame : public ExecutionContextScope, /// /// \param [in] show_fullpaths /// Whether to print the full source paths or just the file base name. - void Dump(Stream *strm, bool show_frame_index, bool show_fullpaths); + virtual void Dump(Stream *strm, bool show_frame_index, bool show_fullpaths); /// Print a description of this stack frame and/or the source /// context/assembly for this stack frame. @@ -389,8 +398,9 @@ class StackFrame : public ExecutionContextScope, /// /// \return /// Returns true if successful. - bool GetStatus(Stream &strm, bool show_frame_info, bool show_source, - bool show_unique = false, const char *frame_marker = nullptr); + virtual bool GetStatus(Stream &strm, bool show_frame_info, bool show_source, + bool show_unique = false, + const char *frame_marker = nullptr); /// Query whether this frame is a concrete frame on the call stack, or if it /// is an inlined frame derived from the debug information and presented by @@ -401,10 +411,10 @@ class StackFrame : public ExecutionContextScope, virtual bool IsInlined(); /// Query whether this frame is synthetic. - bool IsSynthetic() const; + virtual bool IsSynthetic() const; /// Query whether this frame is part of a historical backtrace. - bool IsHistorical() const; + virtual bool IsHistorical() const; /// Query whether this frame is artificial (e.g a synthesized result of /// inferring missing tail call frames from a backtrace). Artificial frames @@ -419,7 +429,7 @@ class StackFrame : public ExecutionContextScope, /// Language plugins can use this API to report language-specific /// runtime information about this compile unit, such as additional /// language version details or feature flags. - StructuredData::ObjectSP GetLanguageSpecificData(); + virtual StructuredData::ObjectSP GetLanguageSpecificData(); /// Get the frame's demangled name. /// @@ -439,9 +449,9 @@ class StackFrame : public ExecutionContextScope, /// \return /// StackFrame index 0 indicates the currently-executing function. Inline /// frames are included in this frame index count. - uint32_t GetFrameIndex() const; + virtual uint32_t GetFrameIndex() const; - /// Set this frame's synthetic frame index. + /// Set this frame's frame index. void SetFrameIndex(uint32_t index) { m_frame_index = index; } /// Query this frame to find what frame it is in this Thread's @@ -452,7 +462,7 @@ class StackFrame : public ExecutionContextScope, /// frames are not included in this frame index count; their concrete /// frame index will be the same as the concrete frame that they are /// derived from. - uint32_t GetConcreteFrameIndex() const { return m_concrete_frame_index; } + virtual uint32_t GetConcreteFrameIndex() { return m_concrete_frame_index; } /// Create a ValueObject for a given Variable in this StackFrame. /// @@ -466,7 +476,7 @@ class StackFrame : public ExecutionContextScope, /// /// \return /// A ValueObject for this variable. - lldb::ValueObjectSP + virtual lldb::ValueObjectSP GetValueObjectForFrameVariable(const lldb::VariableSP &variable_sp, lldb::DynamicValueType use_dynamic); @@ -474,11 +484,11 @@ class StackFrame : public ExecutionContextScope, /// parsing expressions given the execution context. /// /// \return The language of the frame if known. - SourceLanguage GetLanguage(); + virtual SourceLanguage GetLanguage(); /// Similar to GetLanguage(), but is allowed to take a potentially incorrect /// guess if exact information is not available. - SourceLanguage GuessLanguage(); + virtual SourceLanguage GuessLanguage(); /// Attempt to econstruct the ValueObject for a given raw address touched by /// the current instruction. The ExpressionPath should indicate how to get @@ -489,7 +499,7 @@ class StackFrame : public ExecutionContextScope, /// /// \return /// The ValueObject if found. If valid, it has a valid ExpressionPath. - lldb::ValueObjectSP GuessValueForAddress(lldb::addr_t addr); + virtual lldb::ValueObjectSP GuessValueForAddress(lldb::addr_t addr); /// Attempt to reconstruct the ValueObject for the address contained in a /// given register plus an offset. The ExpressionPath should indicate how @@ -503,8 +513,8 @@ class StackFrame : public ExecutionContextScope, /// /// \return /// The ValueObject if found. If valid, it has a valid ExpressionPath. - lldb::ValueObjectSP GuessValueForRegisterAndOffset(ConstString reg, - int64_t offset); + virtual lldb::ValueObjectSP GuessValueForRegisterAndOffset(ConstString reg, + int64_t offset); /// Attempt to reconstruct the ValueObject for a variable with a given \a name /// from within the current StackFrame, within the current block. The search @@ -517,7 +527,7 @@ class StackFrame : public ExecutionContextScope, /// /// \return /// The ValueObject if found. - lldb::ValueObjectSP FindVariable(ConstString name); + virtual lldb::ValueObjectSP FindVariable(ConstString name); // lldb::ExecutionContextScope pure virtual functions lldb::TargetSP CalculateTarget() override; @@ -530,7 +540,7 @@ class StackFrame : public ExecutionContextScope, void CalculateExecutionContext(ExecutionContext &exe_ctx) override; - lldb::RecognizedStackFrameSP GetRecognizedFrame(); + virtual lldb::RecognizedStackFrameSP GetRecognizedFrame(); /// Get the StackFrameList that contains this frame. /// @@ -546,7 +556,9 @@ class StackFrame : public ExecutionContextScope, } protected: + friend class BorrowedStackFrame; friend class StackFrameList; + friend class SyntheticStackFrameList; void SetSymbolContextScope(SymbolContextScope *symbol_scope); diff --git a/lldb/include/lldb/Target/StackFrameList.h b/lldb/include/lldb/Target/StackFrameList.h index 8c14e92a41a4e..539c070ff0f4b 100644 --- a/lldb/include/lldb/Target/StackFrameList.h +++ b/lldb/include/lldb/Target/StackFrameList.h @@ -26,7 +26,7 @@ class StackFrameList : public std::enable_shared_from_this { StackFrameList(Thread &thread, const lldb::StackFrameListSP &prev_frames_sp, bool show_inline_frames); - ~StackFrameList(); + virtual ~StackFrameList(); /// Get the number of visible frames. Frames may be created if \p can_create /// is true. Synthetic (inline) frames expanded from the concrete frame #0 @@ -106,6 +106,7 @@ class StackFrameList : public std::enable_shared_from_this { protected: friend class Thread; + friend class ScriptedFrameProvider; friend class ScriptedThread; /// Use this API to build a stack frame list (used for scripted threads, for @@ -211,19 +212,23 @@ class StackFrameList : public std::enable_shared_from_this { /// Whether or not to show synthetic (inline) frames. Immutable. const bool m_show_inlined_frames; + /// Returns true if fetching frames was interrupted, false otherwise. + virtual bool FetchFramesUpTo(uint32_t end_idx, + InterruptionControl allow_interrupt); + private: uint32_t SetSelectedFrameNoLock(lldb_private::StackFrame *frame); lldb::StackFrameSP GetFrameAtIndexNoLock(uint32_t idx, std::shared_lock &guard); + /// @{ /// These two Fetch frames APIs and SynthesizeTailCallFrames are called in /// GetFramesUpTo, they are the ones that actually add frames. They must be /// called with the writer end of the list mutex held. - - /// Returns true if fetching frames was interrupted, false otherwise. - bool FetchFramesUpTo(uint32_t end_idx, InterruptionControl allow_interrupt); + /// /// Not currently interruptible so returns void. + /// }@ void FetchOnlyConcreteFramesUpTo(uint32_t end_idx); void SynthesizeTailCallFrames(StackFrame &next_frame); @@ -231,6 +236,27 @@ class StackFrameList : public std::enable_shared_from_this { const StackFrameList &operator=(const StackFrameList &) = delete; }; +/// A StackFrameList that wraps another StackFrameList and uses a +/// SyntheticFrameProvider to lazily provide frames from either the provider +/// or the underlying real stack frame list. +class SyntheticStackFrameList : public StackFrameList { +public: + SyntheticStackFrameList(Thread &thread, lldb::StackFrameListSP input_frames, + const lldb::StackFrameListSP &prev_frames_sp, + bool show_inline_frames); + +protected: + /// Override FetchFramesUpTo to lazily return frames from the provider + /// or from the actual stack frame list. + bool FetchFramesUpTo(uint32_t end_idx, + InterruptionControl allow_interrupt) override; + +private: + /// The input stack frame list that the provider transforms. + /// This could be a real StackFrameList or another SyntheticStackFrameList. + lldb::StackFrameListSP m_input_frames; +}; + } // namespace lldb_private #endif // LLDB_TARGET_STACKFRAMELIST_H diff --git a/lldb/include/lldb/Target/SyntheticFrameProvider.h b/lldb/include/lldb/Target/SyntheticFrameProvider.h index 61a492f356ece..2d5330cb03105 100644 --- a/lldb/include/lldb/Target/SyntheticFrameProvider.h +++ b/lldb/include/lldb/Target/SyntheticFrameProvider.h @@ -24,22 +24,25 @@ namespace lldb_private { /// This struct contains the metadata needed to instantiate a frame provider /// and optional filters to control which threads it applies to. -struct SyntheticFrameProviderDescriptor { +struct ScriptedFrameProviderDescriptor { /// Metadata for instantiating the provider (e.g. script class name and args). lldb::ScriptedMetadataSP scripted_metadata_sp; + /// Interface for calling static methods on the provider class. + lldb::ScriptedFrameProviderInterfaceSP interface_sp; + /// Optional list of thread specifications to which this provider applies. /// If empty, the provider applies to all threads. A thread matches if it /// satisfies ANY of the specs in this vector (OR logic). std::vector thread_specs; - SyntheticFrameProviderDescriptor() = default; + ScriptedFrameProviderDescriptor() = default; - SyntheticFrameProviderDescriptor(lldb::ScriptedMetadataSP metadata_sp) + ScriptedFrameProviderDescriptor(lldb::ScriptedMetadataSP metadata_sp) : scripted_metadata_sp(metadata_sp) {} - SyntheticFrameProviderDescriptor(lldb::ScriptedMetadataSP metadata_sp, - const std::vector &specs) + ScriptedFrameProviderDescriptor(lldb::ScriptedMetadataSP metadata_sp, + const std::vector &specs) : scripted_metadata_sp(metadata_sp), thread_specs(specs) {} /// Get the name of this descriptor (the scripted class name). @@ -47,6 +50,12 @@ struct SyntheticFrameProviderDescriptor { return scripted_metadata_sp ? scripted_metadata_sp->GetClassName() : ""; } + /// Get the description of this frame provider. + /// + /// \return A string describing what this frame provider does, or an + /// empty string if no description is available. + std::string GetDescription() const; + /// Check if this descriptor applies to the given thread. bool AppliesToThread(Thread &thread) const { // If no thread specs specified, applies to all threads. @@ -64,6 +73,13 @@ struct SyntheticFrameProviderDescriptor { /// Check if this descriptor has valid metadata for script-based providers. bool IsValid() const { return scripted_metadata_sp != nullptr; } + /// Get a unique identifier for this descriptor based on its contents. + /// The ID is computed from the class name and arguments dictionary, + /// not from the pointer address, so two descriptors with the same + /// contents will have the same ID. + uint32_t GetID() const; + + /// Dump a description of this descriptor to the given stream. void Dump(Stream *s) const; }; @@ -95,7 +111,7 @@ class SyntheticFrameProvider : public PluginInterface { /// otherwise an \a llvm::Error. static llvm::Expected CreateInstance(lldb::StackFrameListSP input_frames, - const SyntheticFrameProviderDescriptor &descriptor); + const ScriptedFrameProviderDescriptor &descriptor); /// Try to create a SyntheticFrameProvider instance for the given input /// frames using a specific C++ plugin. @@ -125,6 +141,8 @@ class SyntheticFrameProvider : public PluginInterface { ~SyntheticFrameProvider() override; + virtual std::string GetDescription() const = 0; + /// Get a single stack frame at the specified index. /// /// This method is called lazily - frames are only created when requested. diff --git a/lldb/include/lldb/Target/Target.h b/lldb/include/lldb/Target/Target.h index c0fcda7c0d960..812a638910b3b 100644 --- a/lldb/include/lldb/Target/Target.h +++ b/lldb/include/lldb/Target/Target.h @@ -32,6 +32,7 @@ #include "lldb/Target/PathMappingList.h" #include "lldb/Target/SectionLoadHistory.h" #include "lldb/Target/Statistics.h" +#include "lldb/Target/SyntheticFrameProvider.h" #include "lldb/Target/ThreadSpec.h" #include "lldb/Utility/ArchSpec.h" #include "lldb/Utility/Broadcaster.h" @@ -745,6 +746,36 @@ class Target : public std::enable_shared_from_this, Status Attach(ProcessAttachInfo &attach_info, Stream *stream); // Optional stream to receive first stop info + /// Add or update a scripted frame provider descriptor for this target. + /// All new threads in this target will check if they match any descriptors + /// to create their frame providers. + /// + /// \param[in] descriptor + /// The descriptor to add or update. + /// + /// \return + /// The descriptor identifier if the registration succeeded, otherwise an + /// llvm::Error. + llvm::Expected AddScriptedFrameProviderDescriptor( + const ScriptedFrameProviderDescriptor &descriptor); + + /// Remove a scripted frame provider descriptor by id. + /// + /// \param[in] id + /// The id of the descriptor to remove. + /// + /// \return + /// True if a descriptor was removed, false if no descriptor with that + /// id existed. + bool RemoveScriptedFrameProviderDescriptor(uint32_t id); + + /// Clear all scripted frame provider descriptors for this target. + void ClearScriptedFrameProviderDescriptors(); + + /// Get all scripted frame provider descriptors for this target. + const llvm::DenseMap & + GetScriptedFrameProviderDescriptors() const; + // This part handles the breakpoints. BreakpointList &GetBreakpointList(bool internal = false); @@ -1744,6 +1775,13 @@ class Target : public std::enable_shared_from_this, PathMappingList m_image_search_paths; TypeSystemMap m_scratch_type_system_map; + /// Map of scripted frame provider descriptors for this target. + /// Keys are the provider descriptors ids, values are the descriptors. + /// Used to initialize frame providers for new threads. + llvm::DenseMap + m_frame_provider_descriptors; + mutable std::recursive_mutex m_frame_provider_descriptors_mutex; + typedef std::map REPLMap; REPLMap m_repl_map; diff --git a/lldb/include/lldb/Target/Thread.h b/lldb/include/lldb/Target/Thread.h index 841f80cd1b1eb..46ce192556756 100644 --- a/lldb/include/lldb/Target/Thread.h +++ b/lldb/include/lldb/Target/Thread.h @@ -1297,6 +1297,15 @@ class Thread : public std::enable_shared_from_this, lldb::StackFrameListSP GetStackFrameList(); + llvm::Error + LoadScriptedFrameProvider(const ScriptedFrameProviderDescriptor &descriptor); + + void ClearScriptedFrameProvider(); + + lldb::SyntheticFrameProviderSP GetFrameProvider() const { + return m_frame_provider_sp; + } + protected: friend class ThreadPlan; friend class ThreadList; @@ -1400,6 +1409,9 @@ class Thread : public std::enable_shared_from_this, /// The Thread backed by this thread, if any. lldb::ThreadWP m_backed_thread; + /// The Scripted Frame Provider, if any. + lldb::SyntheticFrameProviderSP m_frame_provider_sp; + private: bool m_extended_info_fetched; // Have we tried to retrieve the m_extended_info // for this thread? diff --git a/lldb/include/lldb/Target/ThreadSpec.h b/lldb/include/lldb/Target/ThreadSpec.h index 7c7c832741196..63f8f8b5ec181 100644 --- a/lldb/include/lldb/Target/ThreadSpec.h +++ b/lldb/include/lldb/Target/ThreadSpec.h @@ -34,6 +34,8 @@ class ThreadSpec { public: ThreadSpec(); + ThreadSpec(Thread &thread); + static std::unique_ptr CreateFromStructuredData(const StructuredData::Dictionary &data_dict, Status &error); diff --git a/lldb/include/lldb/Utility/ScriptedMetadata.h b/lldb/include/lldb/Utility/ScriptedMetadata.h index 69c83edce909a..8523c95429718 100644 --- a/lldb/include/lldb/Utility/ScriptedMetadata.h +++ b/lldb/include/lldb/Utility/ScriptedMetadata.h @@ -10,7 +10,9 @@ #define LLDB_INTERPRETER_SCRIPTEDMETADATA_H #include "lldb/Utility/ProcessInfo.h" +#include "lldb/Utility/StreamString.h" #include "lldb/Utility/StructuredData.h" +#include "llvm/ADT/Hashing.h" namespace lldb_private { class ScriptedMetadata { @@ -27,11 +29,36 @@ class ScriptedMetadata { } } + ScriptedMetadata(const ScriptedMetadata &other) + : m_class_name(other.m_class_name), m_args_sp(other.m_args_sp) {} + explicit operator bool() const { return !m_class_name.empty(); } llvm::StringRef GetClassName() const { return m_class_name; } StructuredData::DictionarySP GetArgsSP() const { return m_args_sp; } + /// Get a unique identifier for this metadata based on its contents. + /// The ID is computed from the class name and arguments dictionary, + /// not from the pointer address, so two metadata objects with the same + /// contents will have the same ID. + uint32_t GetID() const { + if (m_class_name.empty()) + return 0; + + // Hash the class name. + llvm::hash_code hash = llvm::hash_value(m_class_name); + + // Hash the arguments dictionary if present. + if (m_args_sp) { + StreamString ss; + m_args_sp->GetDescription(ss); + hash = llvm::hash_combine(hash, llvm::hash_value(ss.GetData())); + } + + // Return the lower 32 bits of the hash. + return static_cast(hash); + } + private: std::string m_class_name; StructuredData::DictionarySP m_args_sp; diff --git a/lldb/include/lldb/ValueObject/ValueObjectSynthetic.h b/lldb/include/lldb/ValueObject/ValueObjectSynthetic.h index 063d796ee4eec..1a82fd78bbba3 100644 --- a/lldb/include/lldb/ValueObject/ValueObjectSynthetic.h +++ b/lldb/include/lldb/ValueObject/ValueObjectSynthetic.h @@ -123,6 +123,11 @@ class ValueObjectSynthetic : public ValueObject { void SetLanguageFlags(uint64_t flags) override; + void + GetExpressionPath(Stream &stream, + GetExpressionPathFormat epformat = + eGetExpressionPathFormatDereferencePointers) override; + protected: bool UpdateValue() override; diff --git a/lldb/include/lldb/lldb-private-interfaces.h b/lldb/include/lldb/lldb-private-interfaces.h index 5fc5c14c52f9e..52806eea190a7 100644 --- a/lldb/include/lldb/lldb-private-interfaces.h +++ b/lldb/include/lldb/lldb-private-interfaces.h @@ -26,7 +26,7 @@ class Value; namespace lldb_private { class ScriptedInterfaceUsages; -struct SyntheticFrameProviderDescriptor; +struct ScriptedFrameProviderDescriptor; typedef lldb::ABISP (*ABICreateInstance)(lldb::ProcessSP process_sp, const ArchSpec &arch); typedef std::unique_ptr (*ArchitectureCreateInstance)( @@ -91,7 +91,7 @@ typedef lldb::ScriptInterpreterSP (*ScriptInterpreterCreateInstance)( typedef llvm::Expected ( *ScriptedFrameProviderCreateInstance)( lldb::StackFrameListSP input_frames, - const lldb_private::SyntheticFrameProviderDescriptor &descriptor); + const lldb_private::ScriptedFrameProviderDescriptor &descriptor); typedef llvm::Expected ( *SyntheticFrameProviderCreateInstance)( lldb::StackFrameListSP input_frames, diff --git a/lldb/source/API/SBTarget.cpp b/lldb/source/API/SBTarget.cpp index 578a7bdf7433d..78c2d49d647b5 100644 --- a/lldb/source/API/SBTarget.cpp +++ b/lldb/source/API/SBTarget.cpp @@ -23,6 +23,7 @@ #include "lldb/API/SBStringList.h" #include "lldb/API/SBStructuredData.h" #include "lldb/API/SBSymbolContextList.h" +#include "lldb/API/SBThreadCollection.h" #include "lldb/API/SBTrace.h" #include "lldb/Breakpoint/BreakpointID.h" #include "lldb/Breakpoint/BreakpointIDList.h" @@ -39,6 +40,7 @@ #include "lldb/Core/Section.h" #include "lldb/Core/StructuredDataImpl.h" #include "lldb/Host/Host.h" +#include "lldb/Interpreter/Interfaces/ScriptedFrameProviderInterface.h" #include "lldb/Symbol/DeclVendor.h" #include "lldb/Symbol/ObjectFile.h" #include "lldb/Symbol/SymbolFile.h" @@ -50,6 +52,7 @@ #include "lldb/Target/LanguageRuntime.h" #include "lldb/Target/Process.h" #include "lldb/Target/StackFrame.h" +#include "lldb/Target/SyntheticFrameProvider.h" #include "lldb/Target/Target.h" #include "lldb/Target/TargetList.h" #include "lldb/Utility/ArchSpec.h" @@ -59,6 +62,7 @@ #include "lldb/Utility/LLDBLog.h" #include "lldb/Utility/ProcessInfo.h" #include "lldb/Utility/RegularExpression.h" +#include "lldb/Utility/ScriptedMetadata.h" #include "lldb/ValueObject/ValueObjectConstResult.h" #include "lldb/ValueObject/ValueObjectList.h" #include "lldb/ValueObject/ValueObjectVariable.h" @@ -2435,3 +2439,81 @@ lldb::SBMutex SBTarget::GetAPIMutex() const { return lldb::SBMutex(target_sp); return lldb::SBMutex(); } + +uint32_t +SBTarget::RegisterScriptedFrameProvider(const char *class_name, + lldb::SBStructuredData args_dict, + lldb::SBError &error) { + LLDB_INSTRUMENT_VA(this, class_name, args_dict, error); + + TargetSP target_sp = GetSP(); + if (!target_sp) { + error.SetErrorString("invalid target"); + return 0; + } + + if (!class_name || !class_name[0]) { + error.SetErrorString("invalid class name"); + return 0; + } + + // Extract the dictionary from SBStructuredData. + StructuredData::DictionarySP dict_sp; + if (args_dict.IsValid() && args_dict.m_impl_up) { + StructuredData::ObjectSP obj_sp = args_dict.m_impl_up->GetObjectSP(); + if (obj_sp && obj_sp->GetType() != lldb::eStructuredDataTypeDictionary) { + error.SetErrorString("SBStructuredData argument isn't a dictionary"); + return 0; + } + dict_sp = std::make_shared(obj_sp); + } + + // Create the ScriptedMetadata. + ScriptedMetadataSP metadata_sp = + std::make_shared(class_name, dict_sp); + + // Create the interface for calling static methods. + ScriptedFrameProviderInterfaceSP interface_sp = + target_sp->GetDebugger() + .GetScriptInterpreter() + ->CreateScriptedFrameProviderInterface(); + + // Create a descriptor (applies to all threads by default). + ScriptedFrameProviderDescriptor descriptor(metadata_sp); + descriptor.interface_sp = interface_sp; + + llvm::Expected descriptor_id_or_err = + target_sp->AddScriptedFrameProviderDescriptor(descriptor); + if (!descriptor_id_or_err) { + error.SetErrorString( + llvm::toString(descriptor_id_or_err.takeError()).c_str()); + return 0; + } + + // Register the descriptor with the target. + return *descriptor_id_or_err; +} + +lldb::SBError SBTarget::RemoveScriptedFrameProvider(uint32_t provider_id) { + LLDB_INSTRUMENT_VA(this, provider_id); + + SBError error; + TargetSP target_sp = GetSP(); + if (!target_sp) { + error.SetErrorString("invalid target"); + return error; + } + + if (!provider_id) { + error.SetErrorString("invalid provider id"); + return error; + } + + if (!target_sp->RemoveScriptedFrameProviderDescriptor(provider_id)) { + error.SetErrorStringWithFormat("no frame provider named '%u' found", + provider_id); + return error; + } + + return {}; +} diff --git a/lldb/source/Commands/CommandObjectTarget.cpp b/lldb/source/Commands/CommandObjectTarget.cpp index 7f880d223d6c3..6e8c94fa234cd 100644 --- a/lldb/source/Commands/CommandObjectTarget.cpp +++ b/lldb/source/Commands/CommandObjectTarget.cpp @@ -51,6 +51,7 @@ #include "lldb/Utility/ConstString.h" #include "lldb/Utility/FileSpec.h" #include "lldb/Utility/LLDBLog.h" +#include "lldb/Utility/ScriptedMetadata.h" #include "lldb/Utility/State.h" #include "lldb/Utility/Stream.h" #include "lldb/Utility/StructuredData.h" @@ -5402,6 +5403,202 @@ class CommandObjectTargetDump : public CommandObjectMultiword { ~CommandObjectTargetDump() override = default; }; +#pragma mark CommandObjectTargetFrameProvider + +#define LLDB_OPTIONS_target_frame_provider_register +#include "CommandOptions.inc" + +class CommandObjectTargetFrameProviderRegister : public CommandObjectParsed { +public: + CommandObjectTargetFrameProviderRegister(CommandInterpreter &interpreter) + : CommandObjectParsed( + interpreter, "target frame-provider register", + "Register frame provider for all threads in this target.", nullptr, + eCommandRequiresTarget), + + m_class_options("target frame-provider", true, 'C', 'k', 'v', 0) { + m_all_options.Append(&m_class_options, LLDB_OPT_SET_1 | LLDB_OPT_SET_2, + LLDB_OPT_SET_ALL); + m_all_options.Finalize(); + + AddSimpleArgumentList(eArgTypeRunArgs, eArgRepeatOptional); + } + + ~CommandObjectTargetFrameProviderRegister() override = default; + + Options *GetOptions() override { return &m_all_options; } + + std::optional GetRepeatCommand(Args ¤t_command_args, + uint32_t index) override { + return std::string(""); + } + +protected: + void DoExecute(Args &launch_args, CommandReturnObject &result) override { + ScriptedMetadataSP metadata_sp = std::make_shared( + m_class_options.GetName(), m_class_options.GetStructuredData()); + + Target *target = m_exe_ctx.GetTargetPtr(); + if (!target) + target = &GetDebugger().GetDummyTarget(); + + // Create the interface for calling static methods. + ScriptedFrameProviderInterfaceSP interface_sp = + GetDebugger() + .GetScriptInterpreter() + ->CreateScriptedFrameProviderInterface(); + + // Create a descriptor from the metadata (applies to all threads by + // default). + ScriptedFrameProviderDescriptor descriptor(metadata_sp); + descriptor.interface_sp = interface_sp; + + auto id_or_err = target->AddScriptedFrameProviderDescriptor(descriptor); + if (!id_or_err) { + result.SetError(id_or_err.takeError()); + return; + } + + result.AppendMessageWithFormat( + "successfully registered scripted frame provider '%s' for target\n", + m_class_options.GetName().c_str()); + } + + OptionGroupPythonClassWithDict m_class_options; + OptionGroupOptions m_all_options; +}; + +class CommandObjectTargetFrameProviderClear : public CommandObjectParsed { +public: + CommandObjectTargetFrameProviderClear(CommandInterpreter &interpreter) + : CommandObjectParsed( + interpreter, "target frame-provider clear", + "Clear all registered frame providers from this target.", nullptr, + eCommandRequiresTarget) {} + + ~CommandObjectTargetFrameProviderClear() override = default; + +protected: + void DoExecute(Args &command, CommandReturnObject &result) override { + Target *target = m_exe_ctx.GetTargetPtr(); + if (!target) { + result.AppendError("invalid target"); + return; + } + + target->ClearScriptedFrameProviderDescriptors(); + + result.SetStatus(eReturnStatusSuccessFinishResult); + } +}; + +class CommandObjectTargetFrameProviderList : public CommandObjectParsed { +public: + CommandObjectTargetFrameProviderList(CommandInterpreter &interpreter) + : CommandObjectParsed( + interpreter, "target frame-provider list", + "List all registered frame providers for the target.", nullptr, + eCommandRequiresTarget) {} + + ~CommandObjectTargetFrameProviderList() override = default; + +protected: + void DoExecute(Args &command, CommandReturnObject &result) override { + Target *target = m_exe_ctx.GetTargetPtr(); + if (!target) + target = &GetDebugger().GetDummyTarget(); + + const auto &descriptors = target->GetScriptedFrameProviderDescriptors(); + if (descriptors.empty()) { + result.AppendMessage("no frame providers registered for this target."); + result.SetStatus(eReturnStatusSuccessFinishResult); + return; + } + + result.AppendMessageWithFormat("%u frame provider(s) registered:\n\n", + descriptors.size()); + + for (const auto &entry : descriptors) { + const ScriptedFrameProviderDescriptor &descriptor = entry.second; + descriptor.Dump(&result.GetOutputStream()); + result.GetOutputStream().PutChar('\n'); + } + + result.SetStatus(eReturnStatusSuccessFinishResult); + } +}; + +class CommandObjectTargetFrameProviderRemove : public CommandObjectParsed { +public: + CommandObjectTargetFrameProviderRemove(CommandInterpreter &interpreter) + : CommandObjectParsed( + interpreter, "target frame-provider remove", + "Remove a registered frame provider from the target by id.", + "target frame-provider remove ", + eCommandRequiresTarget) { + AddSimpleArgumentList(eArgTypeUnsignedInteger, eArgRepeatPlus); + } + + ~CommandObjectTargetFrameProviderRemove() override = default; + +protected: + void DoExecute(Args &command, CommandReturnObject &result) override { + Target *target = m_exe_ctx.GetTargetPtr(); + if (!target) + target = &GetDebugger().GetDummyTarget(); + + std::vector removed_provider_ids; + for (size_t i = 0; i < command.GetArgumentCount(); i++) { + uint32_t provider_id = 0; + if (!llvm::to_integer(command[i].ref(), provider_id)) { + result.AppendError("target frame-provider remove requires integer " + "provider id argument"); + return; + } + + if (!target->RemoveScriptedFrameProviderDescriptor(provider_id)) { + result.AppendErrorWithFormat( + "no frame provider named '%u' found in target\n", provider_id); + return; + } + removed_provider_ids.push_back(provider_id); + } + + if (size_t num_removed_providers = removed_provider_ids.size()) { + result.AppendMessageWithFormat( + "Successfully removed %zu frame-providers.\n", num_removed_providers); + result.SetStatus(eReturnStatusSuccessFinishNoResult); + } else { + result.AppendError("0 frame providers removed.\n"); + } + } +}; + +class CommandObjectTargetFrameProvider : public CommandObjectMultiword { +public: + CommandObjectTargetFrameProvider(CommandInterpreter &interpreter) + : CommandObjectMultiword( + interpreter, "target frame-provider", + "Commands for registering and viewing frame providers for the " + "target.", + "target frame-provider [] ") { + LoadSubCommand("register", + CommandObjectSP(new CommandObjectTargetFrameProviderRegister( + interpreter))); + LoadSubCommand("clear", + CommandObjectSP( + new CommandObjectTargetFrameProviderClear(interpreter))); + LoadSubCommand( + "list", + CommandObjectSP(new CommandObjectTargetFrameProviderList(interpreter))); + LoadSubCommand( + "remove", CommandObjectSP( + new CommandObjectTargetFrameProviderRemove(interpreter))); + } + + ~CommandObjectTargetFrameProvider() override = default; +}; + #pragma mark CommandObjectMultiwordTarget // CommandObjectMultiwordTarget @@ -5417,6 +5614,9 @@ CommandObjectMultiwordTarget::CommandObjectMultiwordTarget( CommandObjectSP(new CommandObjectTargetDelete(interpreter))); LoadSubCommand("dump", CommandObjectSP(new CommandObjectTargetDump(interpreter))); + LoadSubCommand( + "frame-provider", + CommandObjectSP(new CommandObjectTargetFrameProvider(interpreter))); LoadSubCommand("list", CommandObjectSP(new CommandObjectTargetList(interpreter))); LoadSubCommand("select", diff --git a/lldb/source/Core/FormatEntity.cpp b/lldb/source/Core/FormatEntity.cpp index 491f5c6320d97..c528a14fa76d0 100644 --- a/lldb/source/Core/FormatEntity.cpp +++ b/lldb/source/Core/FormatEntity.cpp @@ -27,6 +27,7 @@ #include "lldb/Symbol/Symbol.h" #include "lldb/Symbol/SymbolContext.h" #include "lldb/Symbol/VariableList.h" +#include "lldb/Target/BorrowedStackFrame.h" #include "lldb/Target/ExecutionContext.h" #include "lldb/Target/ExecutionContextScope.h" #include "lldb/Target/Language.h" @@ -109,6 +110,7 @@ constexpr Definition g_frame_child_entries[] = { g_string_entry), Definition("is-artificial", EntryType::FrameIsArtificial), Definition("kind", EntryType::FrameKind), + Definition("borrowed-info", EntryType::FrameBorrowedInfo), }; constexpr Definition g_function_child_entries[] = { @@ -382,6 +384,7 @@ const char *FormatEntity::Entry::TypeToCString(Type t) { ENUM_TO_CSTR(FrameRegisterByName); ENUM_TO_CSTR(FrameIsArtificial); ENUM_TO_CSTR(FrameKind); + ENUM_TO_CSTR(FrameBorrowedInfo); ENUM_TO_CSTR(ScriptFrame); ENUM_TO_CSTR(FunctionID); ENUM_TO_CSTR(FunctionDidChange); @@ -1761,6 +1764,22 @@ bool FormatEntity::Format(const Entry &entry, Stream &s, return false; } + case Entry::Type::FrameBorrowedInfo: { + if (exe_ctx) + if (StackFrame *frame = exe_ctx->GetFramePtr()) { + if (BorrowedStackFrame *borrowed_frame = + llvm::dyn_cast(frame)) { + if (lldb::StackFrameSP borrowed_from_sp = + borrowed_frame->GetBorrowedFrame()) { + s.Printf(" [borrowed from frame #%u]", + borrowed_from_sp->GetFrameIndex()); + return true; + } + } + } + return false; + } + case Entry::Type::ScriptFrame: if (exe_ctx) { StackFrame *frame = exe_ctx->GetFramePtr(); diff --git a/lldb/source/Expression/DWARFExpression.cpp b/lldb/source/Expression/DWARFExpression.cpp index f4d1070d0a785..364b2ecadadd4 100644 --- a/lldb/source/Expression/DWARFExpression.cpp +++ b/lldb/source/Expression/DWARFExpression.cpp @@ -879,11 +879,11 @@ static Scalar DerefSizeExtractDataHelper(uint8_t *addr_bytes, return addr_data.GetAddress(&addr_data_offset); } -static llvm::Error Evaluate_DW_OP_deref_size(DWARFExpression::Stack &stack, - ExecutionContext *exe_ctx, - lldb::ModuleSP module_sp, - Process *process, Target *target, - uint8_t size) { +static llvm::Error Evaluate_DW_OP_deref_size( + DWARFExpression::Stack &stack, ExecutionContext *exe_ctx, + lldb::ModuleSP module_sp, Process *process, Target *target, uint8_t size, + size_t size_addr_bytes, + LocationDescriptionKind &dwarf4_location_description_kind) { if (stack.empty()) return llvm::createStringError( "expression stack empty for DW_OP_deref_size"); @@ -892,6 +892,25 @@ static llvm::Error Evaluate_DW_OP_deref_size(DWARFExpression::Stack &stack, return llvm::createStringError( "Invalid address size for DW_OP_deref_size: %d\n", size); + // Deref a register or implicit location and truncate the value to `size` + // bytes. See the corresponding comment in DW_OP_deref for more details on + // why we deref these locations this way. + if (dwarf4_location_description_kind == Register || + dwarf4_location_description_kind == Implicit) { + // Reset context to default values. + dwarf4_location_description_kind = Memory; + stack.back().ClearContext(); + + // Truncate the value on top of the stack to *size* bytes then + // extend to the size of an address (e.g. generic type). + Scalar scalar = stack.back().GetScalar(); + scalar.TruncOrExtendTo(size * 8, /*sign=*/false); + scalar.TruncOrExtendTo(size_addr_bytes * 8, + /*sign=*/false); + stack.back().GetScalar() = scalar; + return llvm::Error::success(); + } + Value::ValueType value_type = stack.back().GetValueType(); switch (value_type) { case Value::ValueType::HostAddress: { @@ -1142,8 +1161,9 @@ llvm::Expected DWARFExpression::Evaluate( // target machine. case DW_OP_deref: { size_t size = opcodes.GetAddressByteSize(); - if (llvm::Error err = Evaluate_DW_OP_deref_size(stack, exe_ctx, module_sp, - process, target, size)) + if (llvm::Error err = Evaluate_DW_OP_deref_size( + stack, exe_ctx, module_sp, process, target, size, size, + dwarf4_location_description_kind)) return err; } break; @@ -1161,8 +1181,9 @@ llvm::Expected DWARFExpression::Evaluate( // expression stack. case DW_OP_deref_size: { size_t size = opcodes.GetU8(&offset); - if (llvm::Error err = Evaluate_DW_OP_deref_size(stack, exe_ctx, module_sp, - process, target, size)) + if (llvm::Error err = Evaluate_DW_OP_deref_size( + stack, exe_ctx, module_sp, process, target, size, + opcodes.GetAddressByteSize(), dwarf4_location_description_kind)) return err; } break; diff --git a/lldb/source/Interpreter/ScriptInterpreter.cpp b/lldb/source/Interpreter/ScriptInterpreter.cpp index d2fd372bfe9e3..7bad10ff3ea61 100644 --- a/lldb/source/Interpreter/ScriptInterpreter.cpp +++ b/lldb/source/Interpreter/ScriptInterpreter.cpp @@ -106,6 +106,13 @@ ScriptInterpreter::GetStatusFromSBError(const lldb::SBError &error) const { return Status(); } +lldb::ThreadSP ScriptInterpreter::GetOpaqueTypeFromSBThread( + const lldb::SBThread &thread) const { + if (thread.m_opaque_sp) + return thread.m_opaque_sp->GetThreadSP(); + return nullptr; +} + lldb::StackFrameSP ScriptInterpreter::GetOpaqueTypeFromSBFrame(const lldb::SBFrame &frame) const { if (frame.m_opaque_sp) diff --git a/lldb/source/Plugins/CMakeLists.txt b/lldb/source/Plugins/CMakeLists.txt index 08f444e7b15e8..b6878b21ff71a 100644 --- a/lldb/source/Plugins/CMakeLists.txt +++ b/lldb/source/Plugins/CMakeLists.txt @@ -22,6 +22,7 @@ add_subdirectory(SymbolFile) add_subdirectory(SystemRuntime) add_subdirectory(SymbolLocator) add_subdirectory(SymbolVendor) +add_subdirectory(SyntheticFrameProvider) add_subdirectory(Trace) add_subdirectory(TraceExporter) add_subdirectory(TypeSystem) diff --git a/lldb/source/Plugins/Process/scripted/ScriptedFrame.cpp b/lldb/source/Plugins/Process/scripted/ScriptedFrame.cpp index 6519df9185df0..265bc28a8957f 100644 --- a/lldb/source/Plugins/Process/scripted/ScriptedFrame.cpp +++ b/lldb/source/Plugins/Process/scripted/ScriptedFrame.cpp @@ -7,42 +7,72 @@ //===----------------------------------------------------------------------===// #include "ScriptedFrame.h" - +#include "Plugins/Process/Utility/RegisterContextMemory.h" + +#include "lldb/Core/Address.h" +#include "lldb/Core/Debugger.h" +#include "lldb/Interpreter/Interfaces/ScriptedFrameInterface.h" +#include "lldb/Interpreter/Interfaces/ScriptedThreadInterface.h" +#include "lldb/Interpreter/ScriptInterpreter.h" +#include "lldb/Symbol/SymbolContext.h" +#include "lldb/Target/ExecutionContext.h" +#include "lldb/Target/Process.h" +#include "lldb/Target/RegisterContext.h" +#include "lldb/Target/Thread.h" #include "lldb/Utility/DataBufferHeap.h" +#include "lldb/Utility/LLDBLog.h" +#include "lldb/Utility/Log.h" +#include "lldb/Utility/StructuredData.h" using namespace lldb; using namespace lldb_private; +char ScriptedFrame::ID; + void ScriptedFrame::CheckInterpreterAndScriptObject() const { lldbassert(m_script_object_sp && "Invalid Script Object."); lldbassert(GetInterface() && "Invalid Scripted Frame Interface."); } llvm::Expected> -ScriptedFrame::Create(ScriptedThread &thread, +ScriptedFrame::Create(ThreadSP thread_sp, + ScriptedThreadInterfaceSP scripted_thread_interface_sp, StructuredData::DictionarySP args_sp, StructuredData::Generic *script_object) { - if (!thread.IsValid()) - return llvm::createStringError("Invalid scripted thread."); + if (!thread_sp || !thread_sp->IsValid()) + return llvm::createStringError("invalid thread"); + + ProcessSP process_sp = thread_sp->GetProcess(); + if (!process_sp || !process_sp->IsValid()) + return llvm::createStringError("invalid process"); - thread.CheckInterpreterAndScriptObject(); + ScriptInterpreter *script_interp = + process_sp->GetTarget().GetDebugger().GetScriptInterpreter(); + if (!script_interp) + return llvm::createStringError("no script interpreter"); - auto scripted_frame_interface = - thread.GetInterface()->CreateScriptedFrameInterface(); + auto scripted_frame_interface = script_interp->CreateScriptedFrameInterface(); if (!scripted_frame_interface) return llvm::createStringError("failed to create scripted frame interface"); llvm::StringRef frame_class_name; if (!script_object) { - std::optional class_name = - thread.GetInterface()->GetScriptedFramePluginName(); - if (!class_name || class_name->empty()) + // If no script object is provided and we have a scripted thread interface, + // try to get the frame class name from it. + if (scripted_thread_interface_sp) { + std::optional class_name = + scripted_thread_interface_sp->GetScriptedFramePluginName(); + if (!class_name || class_name->empty()) + return llvm::createStringError( + "failed to get scripted frame class name"); + frame_class_name = *class_name; + } else { return llvm::createStringError( - "failed to get scripted thread class name"); - frame_class_name = *class_name; + "no script object provided and no scripted thread interface"); + } } - ExecutionContext exe_ctx(thread); + ExecutionContext exe_ctx(thread_sp); auto obj_or_err = scripted_frame_interface->CreatePluginObject( frame_class_name, exe_ctx, args_sp, script_object); @@ -62,7 +92,7 @@ ScriptedFrame::Create(ScriptedThread &thread, SymbolContext sc; Address symbol_addr; if (pc != LLDB_INVALID_ADDRESS) { - symbol_addr.SetLoadAddress(pc, &thread.GetProcess()->GetTarget()); + symbol_addr.SetLoadAddress(pc, &process_sp->GetTarget()); symbol_addr.CalculateSymbolContext(&sc); } @@ -77,11 +107,11 @@ ScriptedFrame::Create(ScriptedThread &thread, if (!reg_info) return llvm::createStringError( - "failed to get scripted thread registers info"); + "failed to get scripted frame registers info"); std::shared_ptr register_info_sp = - DynamicRegisterInfo::Create( - *reg_info, thread.GetProcess()->GetTarget().GetArchitecture()); + DynamicRegisterInfo::Create(*reg_info, + process_sp->GetTarget().GetArchitecture()); lldb::RegisterContextSP reg_ctx_sp; @@ -96,32 +126,35 @@ ScriptedFrame::Create(ScriptedThread &thread, std::shared_ptr reg_ctx_memory = std::make_shared( - thread, frame_id, *register_info_sp, LLDB_INVALID_ADDRESS); + *thread_sp, frame_id, *register_info_sp, LLDB_INVALID_ADDRESS); if (!reg_ctx_memory) - return llvm::createStringError("failed to create a register context."); + return llvm::createStringError("failed to create a register context"); reg_ctx_memory->SetAllRegisterData(data_sp); reg_ctx_sp = reg_ctx_memory; } return std::make_shared( - thread, scripted_frame_interface, frame_id, pc, sc, reg_ctx_sp, + thread_sp, scripted_frame_interface, frame_id, pc, sc, reg_ctx_sp, register_info_sp, owned_script_object_sp); } -ScriptedFrame::ScriptedFrame(ScriptedThread &thread, +ScriptedFrame::ScriptedFrame(ThreadSP thread_sp, ScriptedFrameInterfaceSP interface_sp, lldb::user_id_t id, lldb::addr_t pc, SymbolContext &sym_ctx, lldb::RegisterContextSP reg_ctx_sp, std::shared_ptr reg_info_sp, StructuredData::GenericSP script_object_sp) - : StackFrame(thread.shared_from_this(), /*frame_idx=*/id, + : StackFrame(thread_sp, /*frame_idx=*/id, /*concrete_frame_idx=*/id, /*reg_context_sp=*/reg_ctx_sp, /*cfa=*/0, /*pc=*/pc, /*behaves_like_zeroth_frame=*/!id, /*symbol_ctx=*/&sym_ctx), m_scripted_frame_interface_sp(interface_sp), - m_script_object_sp(script_object_sp), m_register_info_sp(reg_info_sp) {} + m_script_object_sp(script_object_sp), m_register_info_sp(reg_info_sp) { + // FIXME: This should be part of the base class constructor. + m_stack_frame_kind = StackFrame::Kind::Synthetic; +} ScriptedFrame::~ScriptedFrame() {} @@ -164,7 +197,7 @@ std::shared_ptr ScriptedFrame::GetDynamicRegisterInfo() { if (!reg_info) return ScriptedInterface::ErrorWithMessage< std::shared_ptr>( - LLVM_PRETTY_FUNCTION, "Failed to get scripted frame registers info.", + LLVM_PRETTY_FUNCTION, "failed to get scripted frame registers info", error, LLDBLog::Thread); ThreadSP thread_sp = m_thread_wp.lock(); @@ -172,7 +205,7 @@ std::shared_ptr ScriptedFrame::GetDynamicRegisterInfo() { return ScriptedInterface::ErrorWithMessage< std::shared_ptr>( LLVM_PRETTY_FUNCTION, - "Failed to get scripted frame registers info: invalid thread.", error, + "failed to get scripted frame registers info: invalid thread", error, LLDBLog::Thread); ProcessSP process_sp = thread_sp->GetProcess(); @@ -180,8 +213,8 @@ std::shared_ptr ScriptedFrame::GetDynamicRegisterInfo() { return ScriptedInterface::ErrorWithMessage< std::shared_ptr>( LLVM_PRETTY_FUNCTION, - "Failed to get scripted frame registers info: invalid process.", - error, LLDBLog::Thread); + "failed to get scripted frame registers info: invalid process", error, + LLDBLog::Thread); m_register_info_sp = DynamicRegisterInfo::Create( *reg_info, process_sp->GetTarget().GetArchitecture()); diff --git a/lldb/source/Plugins/Process/scripted/ScriptedFrame.h b/lldb/source/Plugins/Process/scripted/ScriptedFrame.h index b6b77c4a7d160..d1cbd429d4979 100644 --- a/lldb/source/Plugins/Process/scripted/ScriptedFrame.h +++ b/lldb/source/Plugins/Process/scripted/ScriptedFrame.h @@ -10,21 +10,19 @@ #define LLDB_SOURCE_PLUGINS_SCRIPTED_FRAME_H #include "ScriptedThread.h" -#include "lldb/Interpreter/ScriptInterpreter.h" #include "lldb/Target/DynamicRegisterInfo.h" #include "lldb/Target/StackFrame.h" +#include "lldb/lldb-forward.h" +#include "llvm/Support/Error.h" +#include #include -namespace lldb_private { -class ScriptedThread; -} - namespace lldb_private { class ScriptedFrame : public lldb_private::StackFrame { public: - ScriptedFrame(ScriptedThread &thread, + ScriptedFrame(lldb::ThreadSP thread_sp, lldb::ScriptedFrameInterfaceSP interface_sp, lldb::user_id_t frame_idx, lldb::addr_t pc, SymbolContext &sym_ctx, lldb::RegisterContextSP reg_ctx_sp, @@ -33,8 +31,29 @@ class ScriptedFrame : public lldb_private::StackFrame { ~ScriptedFrame() override; + /// Create a ScriptedFrame from a object instanciated in the script + /// interpreter. + /// + /// \param[in] thread_sp + /// The thread this frame belongs to. + /// + /// \param[in] scripted_thread_interface_sp + /// The scripted thread interface (needed for ScriptedThread + /// compatibility). Can be nullptr for frames on real threads. + /// + /// \param[in] args_sp + /// Arguments to pass to the frame creation. + /// + /// \param[in] script_object + /// The optional script object representing this frame. + /// + /// \return + /// An Expected containing the ScriptedFrame shared pointer if successful, + /// otherwise an error. static llvm::Expected> - Create(ScriptedThread &thread, StructuredData::DictionarySP args_sp, + Create(lldb::ThreadSP thread_sp, + lldb::ScriptedThreadInterfaceSP scripted_thread_interface_sp, + StructuredData::DictionarySP args_sp, StructuredData::Generic *script_object = nullptr); bool IsInlined() override; @@ -43,6 +62,11 @@ class ScriptedFrame : public lldb_private::StackFrame { const char *GetFunctionName() override; const char *GetDisplayFunctionName() override; + bool isA(const void *ClassID) const override { + return ClassID == &ID || StackFrame::isA(ClassID); + } + static bool classof(const StackFrame *obj) { return obj->isA(&ID); } + private: void CheckInterpreterAndScriptObject() const; lldb::ScriptedFrameInterfaceSP GetInterface() const; @@ -55,6 +79,8 @@ class ScriptedFrame : public lldb_private::StackFrame { lldb::ScriptedFrameInterfaceSP m_scripted_frame_interface_sp; lldb_private::StructuredData::GenericSP m_script_object_sp; std::shared_ptr m_register_info_sp; + + static char ID; }; } // namespace lldb_private diff --git a/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp b/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp index 491efac5aadef..1dd9c48f56a59 100644 --- a/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp +++ b/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp @@ -210,7 +210,7 @@ bool ScriptedThread::LoadArtificialStackFrames() { SymbolContext sc; symbol_addr.CalculateSymbolContext(&sc); - return std::make_shared(this->shared_from_this(), idx, idx, cfa, + return std::make_shared(shared_from_this(), idx, idx, cfa, cfa_is_valid, pc, StackFrame::Kind::Synthetic, artificial, behaves_like_zeroth_frame, &sc); @@ -231,8 +231,8 @@ bool ScriptedThread::LoadArtificialStackFrames() { return error.ToError(); } - auto frame_or_error = - ScriptedFrame::Create(*this, nullptr, object_sp->GetAsGeneric()); + auto frame_or_error = ScriptedFrame::Create( + shared_from_this(), GetInterface(), nullptr, object_sp->GetAsGeneric()); if (!frame_or_error) { ScriptedInterface::ErrorWithMessage( diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptInterpreterPythonInterfaces.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptInterpreterPythonInterfaces.cpp index d43036d6fe544..f6c707b2bd168 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptInterpreterPythonInterfaces.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptInterpreterPythonInterfaces.cpp @@ -31,6 +31,7 @@ void ScriptInterpreterPythonInterfaces::Initialize() { ScriptedStopHookPythonInterface::Initialize(); ScriptedBreakpointPythonInterface::Initialize(); ScriptedThreadPlanPythonInterface::Initialize(); + ScriptedFrameProviderPythonInterface::Initialize(); } void ScriptInterpreterPythonInterfaces::Terminate() { @@ -40,6 +41,7 @@ void ScriptInterpreterPythonInterfaces::Terminate() { ScriptedStopHookPythonInterface::Terminate(); ScriptedBreakpointPythonInterface::Terminate(); ScriptedThreadPlanPythonInterface::Terminate(); + ScriptedFrameProviderPythonInterface::Terminate(); } #endif diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedFrameProviderPythonInterface.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedFrameProviderPythonInterface.cpp index b866bf332b7b6..3dde5036453f4 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedFrameProviderPythonInterface.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedFrameProviderPythonInterface.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include "lldb/Core/PluginManager.h" #include "lldb/Host/Config.h" #include "lldb/Target/Thread.h" #include "lldb/Utility/Log.h" @@ -30,18 +31,45 @@ ScriptedFrameProviderPythonInterface::ScriptedFrameProviderPythonInterface( ScriptInterpreterPythonImpl &interpreter) : ScriptedFrameProviderInterface(), ScriptedPythonInterface(interpreter) {} +bool ScriptedFrameProviderPythonInterface::AppliesToThread( + llvm::StringRef class_name, lldb::ThreadSP thread_sp) { + // If there is any issue with this method, we will just assume it also applies + // to this thread which is the default behavior. + constexpr bool fail_value = true; + Status error; + StructuredData::ObjectSP obj = + CallStaticMethod(class_name, "applies_to_thread", error, thread_sp); + if (!ScriptedInterface::CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, + error)) + return fail_value; + + return obj->GetBooleanValue(fail_value); +} + llvm::Expected ScriptedFrameProviderPythonInterface::CreatePluginObject( const llvm::StringRef class_name, lldb::StackFrameListSP input_frames, StructuredData::DictionarySP args_sp) { if (!input_frames) - return llvm::createStringError("Invalid frame list"); + return llvm::createStringError("invalid frame list"); StructuredDataImpl sd_impl(args_sp); return ScriptedPythonInterface::CreatePluginObject(class_name, nullptr, input_frames, sd_impl); } +std::string ScriptedFrameProviderPythonInterface::GetDescription( + llvm::StringRef class_name) { + Status error; + StructuredData::ObjectSP obj = + CallStaticMethod(class_name, "get_description", error); + if (!ScriptedInterface::CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, + error)) + return {}; + + return obj->GetStringValue().str(); +} + StructuredData::ObjectSP ScriptedFrameProviderPythonInterface::GetFrameAtIndex(uint32_t index) { Status error; @@ -54,4 +82,32 @@ ScriptedFrameProviderPythonInterface::GetFrameAtIndex(uint32_t index) { return obj; } +bool ScriptedFrameProviderPythonInterface::CreateInstance( + lldb::ScriptLanguage language, ScriptedInterfaceUsages usages) { + if (language != eScriptLanguagePython) + return false; + + return true; +} + +void ScriptedFrameProviderPythonInterface::Initialize() { + const std::vector ci_usages = { + "target frame-provider register -C [-k key -v value ...]", + "target frame-provider list", + "target frame-provider remove ", + "target frame-provider clear"}; + const std::vector api_usages = { + "SBTarget.RegisterScriptedFrameProvider", + "SBTarget.RemoveScriptedFrameProvider", + "SBTarget.ClearScriptedFrameProvider"}; + PluginManager::RegisterPlugin( + GetPluginNameStatic(), + llvm::StringRef("Provide scripted stack frames for threads"), + CreateInstance, eScriptLanguagePython, {ci_usages, api_usages}); +} + +void ScriptedFrameProviderPythonInterface::Terminate() { + PluginManager::UnregisterPlugin(CreateInstance); +} + #endif diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedFrameProviderPythonInterface.h b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedFrameProviderPythonInterface.h index fd163984028d3..97a5cc7c669ea 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedFrameProviderPythonInterface.h +++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedFrameProviderPythonInterface.h @@ -14,17 +14,22 @@ #if LLDB_ENABLE_PYTHON #include "ScriptedPythonInterface.h" +#include "lldb/Core/PluginInterface.h" #include "lldb/Interpreter/Interfaces/ScriptedFrameProviderInterface.h" #include namespace lldb_private { class ScriptedFrameProviderPythonInterface : public ScriptedFrameProviderInterface, - public ScriptedPythonInterface { + public ScriptedPythonInterface, + public PluginInterface { public: ScriptedFrameProviderPythonInterface( ScriptInterpreterPythonImpl &interpreter); + bool AppliesToThread(llvm::StringRef class_name, + lldb::ThreadSP thread_sp) override; + llvm::Expected CreatePluginObject(llvm::StringRef class_name, lldb::StackFrameListSP input_frames, @@ -33,10 +38,24 @@ class ScriptedFrameProviderPythonInterface llvm::SmallVector GetAbstractMethodRequirements() const override { return llvm::SmallVector( - {{"get_frame_at_index"}}); + {{"get_description"}, {"get_frame_at_index"}}); } + std::string GetDescription(llvm::StringRef class_name) override; + StructuredData::ObjectSP GetFrameAtIndex(uint32_t index) override; + + static void Initialize(); + static void Terminate(); + + static bool CreateInstance(lldb::ScriptLanguage language, + ScriptedInterfaceUsages usages); + + static llvm::StringRef GetPluginNameStatic() { + return "ScriptedFrameProviderPythonInterface"; + } + + llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); } }; } // namespace lldb_private diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.cpp index af2e0b5df4d22..ba4473cf9ec4d 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.cpp @@ -93,6 +93,19 @@ ScriptedPythonInterface::ExtractValueFromPythonObject( return nullptr; } +template <> +lldb::ThreadSP +ScriptedPythonInterface::ExtractValueFromPythonObject( + python::PythonObject &p, Status &error) { + if (lldb::SBThread *sb_thread = reinterpret_cast( + python::LLDBSWIGPython_CastPyObjectToSBThread(p.get()))) + return m_interpreter.GetOpaqueTypeFromSBThread(*sb_thread); + error = Status::FromErrorString( + "Couldn't cast lldb::SBThread to lldb_private::Thread."); + + return nullptr; +} + template <> SymbolContext ScriptedPythonInterface::ExtractValueFromPythonObject( diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.h b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.h index 23c56610124a6..53a7ba65f64b7 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.h +++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.h @@ -387,6 +387,112 @@ class ScriptedPythonInterface : virtual public ScriptedInterface { return m_object_instance_sp; } + /// Call a static method on a Python class without creating an instance. + /// + /// This method resolves a Python class by name and calls a static method + /// on it, returning the result. This is useful for calling class-level + /// methods that don't require an instance. + /// + /// \param class_name The fully-qualified name of the Python class. + /// \param method_name The name of the static method to call. + /// \param error Output parameter to receive error information if the call + /// fails. + /// \param args Arguments to pass to the static method. + /// + /// \return The return value of the static method call, or an error value. + template + T CallStaticMethod(llvm::StringRef class_name, llvm::StringRef method_name, + Status &error, Args &&...args) { + using namespace python; + using Locker = ScriptInterpreterPythonImpl::Locker; + + std::string caller_signature = + llvm::Twine(LLVM_PRETTY_FUNCTION + llvm::Twine(" (") + + llvm::Twine(class_name) + llvm::Twine(".") + + llvm::Twine(method_name) + llvm::Twine(")")) + .str(); + + if (class_name.empty()) + return ErrorWithMessage(caller_signature, "missing script class name", + error); + + Locker py_lock(&m_interpreter, Locker::AcquireLock | Locker::NoSTDIN, + Locker::FreeLock); + + // Get the interpreter dictionary. + auto dict = + PythonModule::MainModule().ResolveName( + m_interpreter.GetDictionaryName()); + if (!dict.IsAllocated()) + return ErrorWithMessage( + caller_signature, + llvm::formatv("could not find interpreter dictionary: {0}", + m_interpreter.GetDictionaryName()) + .str(), + error); + + // Resolve the class. + auto class_obj = + PythonObject::ResolveNameWithDictionary( + class_name, dict); + if (!class_obj.IsAllocated()) + return ErrorWithMessage( + caller_signature, + llvm::formatv("could not find script class: {0}", class_name).str(), + error); + + // Get the static method from the class. + if (!class_obj.HasAttribute(method_name)) + return ErrorWithMessage( + caller_signature, + llvm::formatv("class {0} does not have method {1}", class_name, + method_name) + .str(), + error); + + PythonCallable method = + class_obj.GetAttributeValue(method_name).AsType(); + if (!method.IsAllocated()) + return ErrorWithMessage(caller_signature, + llvm::formatv("method {0}.{1} is not callable", + class_name, method_name) + .str(), + error); + + // Transform the arguments. + std::tuple original_args = std::forward_as_tuple(args...); + auto transformed_args = TransformArgs(original_args); + + // Call the static method. + llvm::Expected expected_return_object = + llvm::make_error("Not initialized.", + llvm::inconvertibleErrorCode()); + std::apply( + [&method, &expected_return_object](auto &&...args) { + llvm::consumeError(expected_return_object.takeError()); + expected_return_object = method(args...); + }, + transformed_args); + + if (llvm::Error e = expected_return_object.takeError()) { + error = Status::FromError(std::move(e)); + return ErrorWithMessage( + caller_signature, "python static method could not be called", error); + } + + PythonObject py_return = std::move(expected_return_object.get()); + + // Re-assign reference and pointer arguments if needed. + if (sizeof...(Args) > 0) + if (!ReassignPtrsOrRefsArgs(original_args, transformed_args)) + return ErrorWithMessage( + caller_signature, + "couldn't re-assign reference and pointer arguments", error); + + // Extract value from Python object (handles unallocated case). + return ExtractValueFromPythonObject(py_return, error); + } + protected: template T ExtractValueFromPythonObject(python::PythonObject &p, Status &error) { @@ -403,7 +509,7 @@ class ScriptedPythonInterface : virtual public ScriptedInterface { llvm::Twine(method_name) + llvm::Twine(")")) .str(); if (!m_object_instance_sp) - return ErrorWithMessage(caller_signature, "Python object ill-formed", + return ErrorWithMessage(caller_signature, "python object ill-formed", error); Locker py_lock(&m_interpreter, Locker::AcquireLock | Locker::NoSTDIN, @@ -415,7 +521,7 @@ class ScriptedPythonInterface : virtual public ScriptedInterface { if (!implementor.IsAllocated()) return llvm::is_contained(GetAbstractMethods(), method_name) ? ErrorWithMessage(caller_signature, - "Python implementor not allocated.", + "python implementor not allocated", error) : T{}; @@ -436,20 +542,20 @@ class ScriptedPythonInterface : virtual public ScriptedInterface { if (llvm::Error e = expected_return_object.takeError()) { error = Status::FromError(std::move(e)); return ErrorWithMessage(caller_signature, - "Python method could not be called.", error); + "python method could not be called", error); } PythonObject py_return = std::move(expected_return_object.get()); // Now that we called the python method with the transformed arguments, - // we need to interate again over both the original and transformed + // we need to iterate again over both the original and transformed // parameter pack, and transform back the parameter that were passed in // the original parameter pack as references or pointers. if (sizeof...(Args) > 0) if (!ReassignPtrsOrRefsArgs(original_args, transformed_args)) return ErrorWithMessage( caller_signature, - "Couldn't re-assign reference and pointer arguments.", error); + "couldn't re-assign reference and pointer arguments", error); if (!py_return.IsAllocated()) return {}; @@ -655,6 +761,11 @@ lldb::StreamSP ScriptedPythonInterface::ExtractValueFromPythonObject( python::PythonObject &p, Status &error); +template <> +lldb::ThreadSP +ScriptedPythonInterface::ExtractValueFromPythonObject( + python::PythonObject &p, Status &error); + template <> lldb::StackFrameSP ScriptedPythonInterface::ExtractValueFromPythonObject( diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/SWIGPythonBridge.h b/lldb/source/Plugins/ScriptInterpreter/Python/SWIGPythonBridge.h index 2c971262fc34e..32948ffd30023 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/SWIGPythonBridge.h +++ b/lldb/source/Plugins/ScriptInterpreter/Python/SWIGPythonBridge.h @@ -265,6 +265,7 @@ void *LLDBSWIGPython_CastPyObjectToSBLaunchInfo(PyObject *data); void *LLDBSWIGPython_CastPyObjectToSBError(PyObject *data); void *LLDBSWIGPython_CastPyObjectToSBEvent(PyObject *data); void *LLDBSWIGPython_CastPyObjectToSBStream(PyObject *data); +void *LLDBSWIGPython_CastPyObjectToSBThread(PyObject *data); void *LLDBSWIGPython_CastPyObjectToSBFrame(PyObject *data); void *LLDBSWIGPython_CastPyObjectToSBSymbolContext(PyObject *data); void *LLDBSWIGPython_CastPyObjectToSBValue(PyObject *data); diff --git a/lldb/source/Plugins/SyntheticFrameProvider/CMakeLists.txt b/lldb/source/Plugins/SyntheticFrameProvider/CMakeLists.txt new file mode 100644 index 0000000000000..85b405e648c1f --- /dev/null +++ b/lldb/source/Plugins/SyntheticFrameProvider/CMakeLists.txt @@ -0,0 +1 @@ +add_subdirectory(ScriptedFrameProvider) diff --git a/lldb/source/Plugins/SyntheticFrameProvider/ScriptedFrameProvider/CMakeLists.txt b/lldb/source/Plugins/SyntheticFrameProvider/ScriptedFrameProvider/CMakeLists.txt new file mode 100644 index 0000000000000..fe67d39efdf11 --- /dev/null +++ b/lldb/source/Plugins/SyntheticFrameProvider/ScriptedFrameProvider/CMakeLists.txt @@ -0,0 +1,12 @@ +add_lldb_library(lldbPluginScriptedFrameProvider PLUGIN + ScriptedFrameProvider.cpp + + LINK_COMPONENTS + Support + + LINK_LIBS + lldbCore + lldbInterpreter + lldbTarget + lldbUtility + ) diff --git a/lldb/source/Plugins/SyntheticFrameProvider/ScriptedFrameProvider/ScriptedFrameProvider.cpp b/lldb/source/Plugins/SyntheticFrameProvider/ScriptedFrameProvider/ScriptedFrameProvider.cpp new file mode 100644 index 0000000000000..739963e6f0c2f --- /dev/null +++ b/lldb/source/Plugins/SyntheticFrameProvider/ScriptedFrameProvider/ScriptedFrameProvider.cpp @@ -0,0 +1,221 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ScriptedFrameProvider.h" +#include "Plugins/Process/scripted/ScriptedFrame.h" +#include "lldb/Core/Debugger.h" +#include "lldb/Core/PluginManager.h" +#include "lldb/Interpreter/Interfaces/ScriptedFrameProviderInterface.h" +#include "lldb/Interpreter/ScriptInterpreter.h" +#include "lldb/Target/BorrowedStackFrame.h" +#include "lldb/Target/Process.h" +#include "lldb/Target/StackFrame.h" +#include "lldb/Target/Thread.h" +#include "lldb/Utility/ScriptedMetadata.h" +#include "lldb/Utility/Status.h" +#include "llvm/Support/Error.h" +#include + +using namespace lldb; +using namespace lldb_private; + +void ScriptedFrameProvider::Initialize() { + PluginManager::RegisterPlugin(GetPluginNameStatic(), + "Provides synthetic frames via scripting", + nullptr, ScriptedFrameProvider::CreateInstance); +} + +void ScriptedFrameProvider::Terminate() { + PluginManager::UnregisterPlugin(ScriptedFrameProvider::CreateInstance); +} + +llvm::Expected +ScriptedFrameProvider::CreateInstance( + lldb::StackFrameListSP input_frames, + const ScriptedFrameProviderDescriptor &descriptor) { + if (!input_frames) + return llvm::createStringError( + "failed to create scripted frame provider: invalid input frames"); + + Thread &thread = input_frames->GetThread(); + ProcessSP process_sp = thread.GetProcess(); + if (!process_sp) + return nullptr; + + if (!descriptor.IsValid()) + return llvm::createStringError( + "failed to create scripted frame provider: invalid scripted metadata"); + + if (!descriptor.AppliesToThread(thread)) + return nullptr; + + ScriptInterpreter *script_interp = + process_sp->GetTarget().GetDebugger().GetScriptInterpreter(); + if (!script_interp) + return llvm::createStringError("cannot create scripted frame provider: No " + "script interpreter installed"); + + ScriptedFrameProviderInterfaceSP interface_sp = + script_interp->CreateScriptedFrameProviderInterface(); + if (!interface_sp) + return llvm::createStringError( + "cannot create scripted frame provider: script interpreter couldn't " + "create Scripted Frame Provider Interface"); + + const ScriptedMetadataSP scripted_metadata = descriptor.scripted_metadata_sp; + + // If we shouldn't attach a frame provider to this thread, just exit early. + if (!interface_sp->AppliesToThread(scripted_metadata->GetClassName(), + thread.shared_from_this())) + return nullptr; + + auto obj_or_err = interface_sp->CreatePluginObject( + scripted_metadata->GetClassName(), input_frames, + scripted_metadata->GetArgsSP()); + if (!obj_or_err) + return obj_or_err.takeError(); + + StructuredData::ObjectSP object_sp = *obj_or_err; + if (!object_sp || !object_sp->IsValid()) + return llvm::createStringError( + "cannot create scripted frame provider: failed to create valid scripted" + "frame provider object"); + + return std::make_shared(input_frames, interface_sp, + descriptor); +} + +ScriptedFrameProvider::ScriptedFrameProvider( + StackFrameListSP input_frames, + lldb::ScriptedFrameProviderInterfaceSP interface_sp, + const ScriptedFrameProviderDescriptor &descriptor) + : SyntheticFrameProvider(input_frames), m_interface_sp(interface_sp), + m_descriptor(descriptor) {} + +ScriptedFrameProvider::~ScriptedFrameProvider() = default; + +std::string ScriptedFrameProvider::GetDescription() const { + if (!m_interface_sp) + return {}; + + return m_interface_sp->GetDescription(m_descriptor.GetName()); +} + +llvm::Expected +ScriptedFrameProvider::GetFrameAtIndex(uint32_t idx) { + if (!m_interface_sp) + return llvm::createStringError( + "cannot get stack frame: scripted frame provider not initialized"); + + auto create_frame_from_dict = + [this](StructuredData::Dictionary *dict, + uint32_t index) -> llvm::Expected { + lldb::addr_t pc; + if (!dict->GetValueForKeyAsInteger("pc", pc)) + return llvm::createStringError( + "missing 'pc' key from scripted frame dictionary"); + + Address symbol_addr; + symbol_addr.SetLoadAddress(pc, &GetThread().GetProcess()->GetTarget()); + + const lldb::addr_t cfa = LLDB_INVALID_ADDRESS; + const bool cfa_is_valid = false; + const bool artificial = false; + const bool behaves_like_zeroth_frame = false; + SymbolContext sc; + symbol_addr.CalculateSymbolContext(&sc); + + ThreadSP thread_sp = GetThread().shared_from_this(); + return std::make_shared(thread_sp, index, index, cfa, + cfa_is_valid, pc, + StackFrame::Kind::Synthetic, artificial, + behaves_like_zeroth_frame, &sc); + }; + + auto create_frame_from_script_object = + [this]( + StructuredData::ObjectSP object_sp) -> llvm::Expected { + Status error; + if (!object_sp || !object_sp->GetAsGeneric()) + return llvm::createStringError("invalid script object"); + + ThreadSP thread_sp = GetThread().shared_from_this(); + auto frame_or_error = ScriptedFrame::Create(thread_sp, nullptr, nullptr, + object_sp->GetAsGeneric()); + + if (!frame_or_error) { + ScriptedInterface::ErrorWithMessage( + LLVM_PRETTY_FUNCTION, toString(frame_or_error.takeError()), error); + return error.ToError(); + } + + return *frame_or_error; + }; + + StructuredData::ObjectSP obj_sp = m_interface_sp->GetFrameAtIndex(idx); + + // None/null means no more frames or error. + if (!obj_sp || !obj_sp->IsValid()) + return llvm::createStringError("invalid script object returned for frame " + + llvm::Twine(idx)); + + StackFrameSP synth_frame_sp = nullptr; + if (StructuredData::UnsignedInteger *int_obj = + obj_sp->GetAsUnsignedInteger()) { + uint32_t real_frame_index = int_obj->GetValue(); + if (real_frame_index < m_input_frames->GetNumFrames()) { + StackFrameSP real_frame_sp = + m_input_frames->GetFrameAtIndex(real_frame_index); + synth_frame_sp = + (real_frame_index == idx) + ? real_frame_sp + : std::make_shared(real_frame_sp, idx); + } + } else if (StructuredData::Dictionary *dict = obj_sp->GetAsDictionary()) { + // Check if it's a dictionary describing a frame. + auto frame_from_dict_or_err = create_frame_from_dict(dict, idx); + if (!frame_from_dict_or_err) { + return llvm::createStringError(llvm::Twine( + "couldn't create frame from dictionary at index " + llvm::Twine(idx) + + ": " + toString(frame_from_dict_or_err.takeError()))); + } + synth_frame_sp = *frame_from_dict_or_err; + } else if (obj_sp->GetAsGeneric()) { + // It's a ScriptedFrame object. + auto frame_from_script_obj_or_err = create_frame_from_script_object(obj_sp); + if (!frame_from_script_obj_or_err) { + return llvm::createStringError( + llvm::Twine("couldn't create frame from script object at index " + + llvm::Twine(idx) + ": " + + toString(frame_from_script_obj_or_err.takeError()))); + } + synth_frame_sp = *frame_from_script_obj_or_err; + } else { + return llvm::createStringError( + llvm::Twine("invalid return type from get_frame_at_index at index " + + llvm::Twine(idx))); + } + + if (!synth_frame_sp) + return llvm::createStringError( + llvm::Twine("failed to create frame at index " + llvm::Twine(idx))); + + synth_frame_sp->SetFrameIndex(idx); + + return synth_frame_sp; +} + +namespace lldb_private { +void lldb_initialize_ScriptedFrameProvider() { + ScriptedFrameProvider::Initialize(); +} + +void lldb_terminate_ScriptedFrameProvider() { + ScriptedFrameProvider::Terminate(); +} +} // namespace lldb_private diff --git a/lldb/source/Plugins/SyntheticFrameProvider/ScriptedFrameProvider/ScriptedFrameProvider.h b/lldb/source/Plugins/SyntheticFrameProvider/ScriptedFrameProvider/ScriptedFrameProvider.h new file mode 100644 index 0000000000000..3434bf26ade24 --- /dev/null +++ b/lldb/source/Plugins/SyntheticFrameProvider/ScriptedFrameProvider/ScriptedFrameProvider.h @@ -0,0 +1,53 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_PLUGINS_SYNTHETICFRAMEPROVIDER_SCRIPTEDFRAMEPROVIDER_SCRIPTEDFRAMEPROVIDER_H +#define LLDB_PLUGINS_SYNTHETICFRAMEPROVIDER_SCRIPTEDFRAMEPROVIDER_SCRIPTEDFRAMEPROVIDER_H + +#include "lldb/Target/SyntheticFrameProvider.h" +#include "lldb/Utility/ScriptedMetadata.h" +#include "lldb/Utility/Status.h" +#include "lldb/lldb-forward.h" +#include "llvm/Support/Error.h" + +namespace lldb_private { + +class ScriptedFrameProvider : public SyntheticFrameProvider { +public: + static llvm::StringRef GetPluginNameStatic() { + return "ScriptedFrameProvider"; + } + + static llvm::Expected + CreateInstance(lldb::StackFrameListSP input_frames, + const ScriptedFrameProviderDescriptor &descriptor); + + static void Initialize(); + + static void Terminate(); + + ScriptedFrameProvider(lldb::StackFrameListSP input_frames, + lldb::ScriptedFrameProviderInterfaceSP interface_sp, + const ScriptedFrameProviderDescriptor &descriptor); + ~ScriptedFrameProvider() override; + + llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); } + + std::string GetDescription() const override; + + /// Get a single stack frame at the specified index. + llvm::Expected GetFrameAtIndex(uint32_t idx) override; + +private: + lldb::ScriptedFrameProviderInterfaceSP m_interface_sp; + const ScriptedFrameProviderDescriptor &m_descriptor; +}; + +} // namespace lldb_private + +#endif // LLDB_PLUGINS_SYNTHETICFRAMEPROVIDER_SCRIPTEDFRAMEPROVIDER_SCRIPTEDFRAMEPROVIDER_H diff --git a/lldb/source/Target/BorrowedStackFrame.cpp b/lldb/source/Target/BorrowedStackFrame.cpp new file mode 100644 index 0000000000000..5afadf21fde03 --- /dev/null +++ b/lldb/source/Target/BorrowedStackFrame.cpp @@ -0,0 +1,187 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lldb/Target/BorrowedStackFrame.h" + +using namespace lldb; +using namespace lldb_private; + +char BorrowedStackFrame::ID; + +BorrowedStackFrame::BorrowedStackFrame( + StackFrameSP borrowed_frame_sp, uint32_t new_frame_index, + std::optional new_concrete_frame_index) + : StackFrame( + borrowed_frame_sp->GetThread(), new_frame_index, + borrowed_frame_sp->GetConcreteFrameIndex(), + borrowed_frame_sp->GetRegisterContextSP(), + borrowed_frame_sp->GetStackID().GetPC(), + borrowed_frame_sp->GetStackID().GetCallFrameAddressWithoutMetadata(), + borrowed_frame_sp->m_behaves_like_zeroth_frame, + &borrowed_frame_sp->GetSymbolContext(eSymbolContextEverything)), + m_borrowed_frame_sp(borrowed_frame_sp), + m_new_frame_index(new_frame_index) { + if (new_concrete_frame_index) + m_new_concrete_frame_index = *new_concrete_frame_index; + else + m_new_concrete_frame_index = + IsInlined() ? LLDB_INVALID_FRAME_ID : new_frame_index; +} + +uint32_t BorrowedStackFrame::GetFrameIndex() const { return m_new_frame_index; } + +void BorrowedStackFrame::SetFrameIndex(uint32_t index) { + m_new_frame_index = index; +} + +uint32_t BorrowedStackFrame::GetConcreteFrameIndex() { + // FIXME: We need to find where the concrete frame into which this frame was + // inlined landed in the new stack frame list as that is the correct concrete + // frame index in this + // stack frame. + return m_new_concrete_frame_index; +} + +StackID &BorrowedStackFrame::GetStackID() { + return m_borrowed_frame_sp->GetStackID(); +} + +const Address &BorrowedStackFrame::GetFrameCodeAddress() { + return m_borrowed_frame_sp->GetFrameCodeAddress(); +} + +Address BorrowedStackFrame::GetFrameCodeAddressForSymbolication() { + return m_borrowed_frame_sp->GetFrameCodeAddressForSymbolication(); +} + +bool BorrowedStackFrame::ChangePC(addr_t pc) { + return m_borrowed_frame_sp->ChangePC(pc); +} + +const SymbolContext & +BorrowedStackFrame::GetSymbolContext(SymbolContextItem resolve_scope) { + return m_borrowed_frame_sp->GetSymbolContext(resolve_scope); +} + +llvm::Error BorrowedStackFrame::GetFrameBaseValue(Scalar &value) { + return m_borrowed_frame_sp->GetFrameBaseValue(value); +} + +DWARFExpressionList * +BorrowedStackFrame::GetFrameBaseExpression(Status *error_ptr) { + return m_borrowed_frame_sp->GetFrameBaseExpression(error_ptr); +} + +Block *BorrowedStackFrame::GetFrameBlock() { + return m_borrowed_frame_sp->GetFrameBlock(); +} + +RegisterContextSP BorrowedStackFrame::GetRegisterContext() { + return m_borrowed_frame_sp->GetRegisterContext(); +} + +VariableList *BorrowedStackFrame::GetVariableList(bool get_file_globals, + Status *error_ptr) { + return m_borrowed_frame_sp->GetVariableList(get_file_globals, error_ptr); +} + +VariableListSP +BorrowedStackFrame::GetInScopeVariableList(bool get_file_globals, + bool must_have_valid_location) { + return m_borrowed_frame_sp->GetInScopeVariableList(get_file_globals, + must_have_valid_location); +} + +ValueObjectSP BorrowedStackFrame::GetValueForVariableExpressionPath( + llvm::StringRef var_expr, DynamicValueType use_dynamic, uint32_t options, + VariableSP &var_sp, Status &error) { + return m_borrowed_frame_sp->GetValueForVariableExpressionPath( + var_expr, use_dynamic, options, var_sp, error); +} + +bool BorrowedStackFrame::HasDebugInformation() { + return m_borrowed_frame_sp->HasDebugInformation(); +} + +const char *BorrowedStackFrame::Disassemble() { + return m_borrowed_frame_sp->Disassemble(); +} + +ValueObjectSP BorrowedStackFrame::GetValueObjectForFrameVariable( + const VariableSP &variable_sp, DynamicValueType use_dynamic) { + return m_borrowed_frame_sp->GetValueObjectForFrameVariable(variable_sp, + use_dynamic); +} + +bool BorrowedStackFrame::IsInlined() { + return m_borrowed_frame_sp->IsInlined(); +} + +bool BorrowedStackFrame::IsSynthetic() const { + return m_borrowed_frame_sp->IsSynthetic(); +} + +bool BorrowedStackFrame::IsHistorical() const { + return m_borrowed_frame_sp->IsHistorical(); +} + +bool BorrowedStackFrame::IsArtificial() const { + return m_borrowed_frame_sp->IsArtificial(); +} + +bool BorrowedStackFrame::IsHidden() { return m_borrowed_frame_sp->IsHidden(); } + +const char *BorrowedStackFrame::GetFunctionName() { + return m_borrowed_frame_sp->GetFunctionName(); +} + +const char *BorrowedStackFrame::GetDisplayFunctionName() { + return m_borrowed_frame_sp->GetDisplayFunctionName(); +} + +ValueObjectSP BorrowedStackFrame::FindVariable(ConstString name) { + return m_borrowed_frame_sp->FindVariable(name); +} + +SourceLanguage BorrowedStackFrame::GetLanguage() { + return m_borrowed_frame_sp->GetLanguage(); +} + +SourceLanguage BorrowedStackFrame::GuessLanguage() { + return m_borrowed_frame_sp->GuessLanguage(); +} + +ValueObjectSP BorrowedStackFrame::GuessValueForAddress(addr_t addr) { + return m_borrowed_frame_sp->GuessValueForAddress(addr); +} + +ValueObjectSP +BorrowedStackFrame::GuessValueForRegisterAndOffset(ConstString reg, + int64_t offset) { + return m_borrowed_frame_sp->GuessValueForRegisterAndOffset(reg, offset); +} + +StructuredData::ObjectSP BorrowedStackFrame::GetLanguageSpecificData() { + return m_borrowed_frame_sp->GetLanguageSpecificData(); +} + +RecognizedStackFrameSP BorrowedStackFrame::GetRecognizedFrame() { + return m_borrowed_frame_sp->GetRecognizedFrame(); +} + +StackFrameSP BorrowedStackFrame::GetBorrowedFrame() const { + return m_borrowed_frame_sp; +} + +bool BorrowedStackFrame::isA(const void *ClassID) const { + return ClassID == &ID || StackFrame::isA(ClassID); +} + +bool BorrowedStackFrame::classof(const StackFrame *obj) { + return obj->isA(&ID); +} diff --git a/lldb/source/Target/CMakeLists.txt b/lldb/source/Target/CMakeLists.txt index cff59049cdce5..df2ee03860ac0 100644 --- a/lldb/source/Target/CMakeLists.txt +++ b/lldb/source/Target/CMakeLists.txt @@ -41,6 +41,7 @@ add_lldb_library(lldbTarget SyntheticFrameProvider.cpp SectionLoadHistory.cpp SectionLoadList.cpp + BorrowedStackFrame.cpp StackFrame.cpp StackFrameList.cpp StackFrameRecognizer.cpp diff --git a/lldb/source/Target/StackFrame.cpp b/lldb/source/Target/StackFrame.cpp index 78f67d21d6600..ca3d4a1a29b59 100644 --- a/lldb/source/Target/StackFrame.cpp +++ b/lldb/source/Target/StackFrame.cpp @@ -45,6 +45,9 @@ using namespace lldb; using namespace lldb_private; +// LLVM RTTI support. +char StackFrame::ID; + // The first bits in the flags are reserved for the SymbolContext::Scope bits // so we know if we have tried to look up information in our internal symbol // context (m_sc) already. diff --git a/lldb/source/Target/StackFrameList.cpp b/lldb/source/Target/StackFrameList.cpp index 8412e33aaba32..5d1a8a8370414 100644 --- a/lldb/source/Target/StackFrameList.cpp +++ b/lldb/source/Target/StackFrameList.cpp @@ -20,6 +20,7 @@ #include "lldb/Target/StackFrame.h" #include "lldb/Target/StackFrameRecognizer.h" #include "lldb/Target/StopInfo.h" +#include "lldb/Target/SyntheticFrameProvider.h" #include "lldb/Target/Target.h" #include "lldb/Target/Thread.h" #include "lldb/Target/Unwind.h" @@ -55,6 +56,44 @@ StackFrameList::~StackFrameList() { Clear(); } +SyntheticStackFrameList::SyntheticStackFrameList( + Thread &thread, lldb::StackFrameListSP input_frames, + const lldb::StackFrameListSP &prev_frames_sp, bool show_inline_frames) + : StackFrameList(thread, prev_frames_sp, show_inline_frames), + m_input_frames(std::move(input_frames)) {} + +bool SyntheticStackFrameList::FetchFramesUpTo( + uint32_t end_idx, InterruptionControl allow_interrupt) { + // Check if the thread has a synthetic frame provider. + if (auto provider_sp = m_thread.GetFrameProvider()) { + // Use the synthetic frame provider to generate frames lazily. + // Keep fetching until we reach end_idx or the provider returns an error. + for (uint32_t idx = m_frames.size(); idx <= end_idx; idx++) { + if (allow_interrupt && + m_thread.GetProcess()->GetTarget().GetDebugger().InterruptRequested()) + return true; + auto frame_or_err = provider_sp->GetFrameAtIndex(idx); + if (!frame_or_err) { + // Provider returned error - we've reached the end. + LLDB_LOG_ERROR(GetLog(LLDBLog::Thread), frame_or_err.takeError(), + "Frame provider reached end at index {0}: {1}", idx); + SetAllFramesFetched(); + break; + } + StackFrameSP frame_sp = *frame_or_err; + // Set the frame list weak pointer so ExecutionContextRef can resolve + // the frame without calling Thread::GetStackFrameList(). + frame_sp->m_frame_list_wp = shared_from_this(); + m_frames.push_back(frame_sp); + } + + return false; // Not interrupted. + } + + // If no provider, fall back to the base implementation. + return StackFrameList::FetchFramesUpTo(end_idx, allow_interrupt); +} + void StackFrameList::CalculateCurrentInlinedDepth() { uint32_t cur_inlined_depth = GetCurrentInlinedDepth(); if (cur_inlined_depth == UINT32_MAX) { diff --git a/lldb/source/Target/SyntheticFrameProvider.cpp b/lldb/source/Target/SyntheticFrameProvider.cpp index 241ce82c39be3..97ff42d1ed53e 100644 --- a/lldb/source/Target/SyntheticFrameProvider.cpp +++ b/lldb/source/Target/SyntheticFrameProvider.cpp @@ -8,10 +8,12 @@ #include "lldb/Target/SyntheticFrameProvider.h" #include "lldb/Core/PluginManager.h" +#include "lldb/Interpreter/Interfaces/ScriptedFrameProviderInterface.h" #include "lldb/Target/Thread.h" #include "lldb/Utility/LLDBLog.h" #include "lldb/Utility/Log.h" #include "lldb/Utility/Status.h" +#include "lldb/Utility/Stream.h" using namespace lldb; using namespace lldb_private; @@ -21,12 +23,17 @@ SyntheticFrameProvider::SyntheticFrameProvider(StackFrameListSP input_frames) SyntheticFrameProvider::~SyntheticFrameProvider() = default; -void SyntheticFrameProviderDescriptor::Dump(Stream *s) const { +void ScriptedFrameProviderDescriptor::Dump(Stream *s) const { if (!s) return; + s->Format(" ID: {0:x}\n", GetID()); s->Printf(" Name: %s\n", GetName().str().c_str()); + std::string description = GetDescription(); + if (!description.empty()) + s->Printf(" Description: %s\n", description.c_str()); + // Show thread filter information. if (thread_specs.empty()) { s->PutCString(" Thread Filter: (applies to all threads)\n"); @@ -41,9 +48,23 @@ void SyntheticFrameProviderDescriptor::Dump(Stream *s) const { } } +uint32_t ScriptedFrameProviderDescriptor::GetID() const { + if (!scripted_metadata_sp) + return 0; + + return scripted_metadata_sp->GetID(); +} + +std::string ScriptedFrameProviderDescriptor::GetDescription() const { + // If we have an interface, call get_description() to fetch it. + if (interface_sp && scripted_metadata_sp) + return interface_sp->GetDescription(scripted_metadata_sp->GetClassName()); + return {}; +} + llvm::Expected SyntheticFrameProvider::CreateInstance( StackFrameListSP input_frames, - const SyntheticFrameProviderDescriptor &descriptor) { + const ScriptedFrameProviderDescriptor &descriptor) { if (!input_frames) return llvm::createStringError( "cannot create synthetic frame provider: invalid input frames"); diff --git a/lldb/source/Target/Target.cpp b/lldb/source/Target/Target.cpp index 3a936b85f6339..b6a662ad3f14d 100644 --- a/lldb/source/Target/Target.cpp +++ b/lldb/source/Target/Target.cpp @@ -3720,6 +3720,61 @@ Status Target::Attach(ProcessAttachInfo &attach_info, Stream *stream) { return error; } +llvm::Expected Target::AddScriptedFrameProviderDescriptor( + const ScriptedFrameProviderDescriptor &descriptor) { + if (!descriptor.IsValid()) + return llvm::createStringError("invalid frame provider descriptor"); + + llvm::StringRef name = descriptor.GetName(); + if (name.empty()) + return llvm::createStringError( + "frame provider descriptor has no class name"); + + std::lock_guard guard( + m_frame_provider_descriptors_mutex); + + uint32_t descriptor_id = descriptor.GetID(); + m_frame_provider_descriptors[descriptor_id] = descriptor; + + // Clear frame providers on existing threads so they reload with new config. + if (ProcessSP process_sp = GetProcessSP()) + for (ThreadSP thread_sp : process_sp->Threads()) + thread_sp->ClearScriptedFrameProvider(); + + return descriptor_id; +} + +bool Target::RemoveScriptedFrameProviderDescriptor(uint32_t id) { + std::lock_guard guard( + m_frame_provider_descriptors_mutex); + bool removed = m_frame_provider_descriptors.erase(id); + + if (removed) + if (ProcessSP process_sp = GetProcessSP()) + for (ThreadSP thread_sp : process_sp->Threads()) + thread_sp->ClearScriptedFrameProvider(); + + return removed; +} + +void Target::ClearScriptedFrameProviderDescriptors() { + std::lock_guard guard( + m_frame_provider_descriptors_mutex); + + m_frame_provider_descriptors.clear(); + + if (ProcessSP process_sp = GetProcessSP()) + for (ThreadSP thread_sp : process_sp->Threads()) + thread_sp->ClearScriptedFrameProvider(); +} + +const llvm::DenseMap & +Target::GetScriptedFrameProviderDescriptors() const { + std::lock_guard guard( + m_frame_provider_descriptors_mutex); + return m_frame_provider_descriptors; +} + void Target::FinalizeFileActions(ProcessLaunchInfo &info) { Log *log = GetLog(LLDBLog::Process); diff --git a/lldb/source/Target/Thread.cpp b/lldb/source/Target/Thread.cpp index 8c3e19725f8cb..b40e753aca1e9 100644 --- a/lldb/source/Target/Thread.cpp +++ b/lldb/source/Target/Thread.cpp @@ -13,9 +13,12 @@ #include "lldb/Core/Module.h" #include "lldb/Core/StructuredDataImpl.h" #include "lldb/Host/Host.h" +#include "lldb/Interpreter/Interfaces/ScriptedFrameInterface.h" +#include "lldb/Interpreter/Interfaces/ScriptedFrameProviderInterface.h" #include "lldb/Interpreter/OptionValueFileSpecList.h" #include "lldb/Interpreter/OptionValueProperties.h" #include "lldb/Interpreter/Property.h" +#include "lldb/Interpreter/ScriptInterpreter.h" #include "lldb/Symbol/Function.h" #include "lldb/Target/ABI.h" #include "lldb/Target/DynamicLoader.h" @@ -26,6 +29,7 @@ #include "lldb/Target/ScriptedThreadPlan.h" #include "lldb/Target/StackFrameRecognizer.h" #include "lldb/Target/StopInfo.h" +#include "lldb/Target/SyntheticFrameProvider.h" #include "lldb/Target/SystemRuntime.h" #include "lldb/Target/Target.h" #include "lldb/Target/ThreadPlan.h" @@ -45,6 +49,7 @@ #include "lldb/Utility/LLDBLog.h" #include "lldb/Utility/Log.h" #include "lldb/Utility/RegularExpression.h" +#include "lldb/Utility/ScriptedMetadata.h" #include "lldb/Utility/State.h" #include "lldb/Utility/Stream.h" #include "lldb/Utility/StreamString.h" @@ -257,6 +262,7 @@ void Thread::DestroyThread() { std::lock_guard guard(m_frame_mutex); m_curr_frames_sp.reset(); m_prev_frames_sp.reset(); + m_frame_provider_sp.reset(); m_prev_framezero_pc.reset(); } @@ -1439,13 +1445,76 @@ void Thread::CalculateExecutionContext(ExecutionContext &exe_ctx) { StackFrameListSP Thread::GetStackFrameList() { std::lock_guard guard(m_frame_mutex); - if (!m_curr_frames_sp) + if (m_curr_frames_sp) + return m_curr_frames_sp; + + // First, try to load a frame provider if we don't have one yet. + if (!m_frame_provider_sp) { + ProcessSP process_sp = GetProcess(); + if (process_sp) { + Target &target = process_sp->GetTarget(); + const auto &descriptors = target.GetScriptedFrameProviderDescriptors(); + + // Find first descriptor that applies to this thread. + for (const auto &entry : descriptors) { + const ScriptedFrameProviderDescriptor &descriptor = entry.second; + if (descriptor.IsValid() && descriptor.AppliesToThread(*this)) { + if (llvm::Error error = LoadScriptedFrameProvider(descriptor)) { + LLDB_LOG_ERROR(GetLog(LLDBLog::Thread), std::move(error), + "Failed to load scripted frame provider: {0}"); + } + break; // Use first matching descriptor (success or failure). + } + } + } + } + + // Create the frame list based on whether we have a provider. + if (m_frame_provider_sp) { + // We have a provider - create synthetic frame list. + StackFrameListSP input_frames = m_frame_provider_sp->GetInputFrames(); + m_curr_frames_sp = std::make_shared( + *this, input_frames, m_prev_frames_sp, true); + } else { + // No provider - use normal unwinder frames. m_curr_frames_sp = std::make_shared(*this, m_prev_frames_sp, true); + } return m_curr_frames_sp; } +llvm::Error Thread::LoadScriptedFrameProvider( + const ScriptedFrameProviderDescriptor &descriptor) { + std::lock_guard guard(m_frame_mutex); + + // Note: We don't create input_frames here - it will be created lazily + // by SyntheticStackFrameList when frames are first fetched. + // Creating them too early can cause crashes during thread initialization. + + // Create a temporary StackFrameList just to get the thread reference for the + // provider. The provider won't actually use this - it will get real input + // frames from SyntheticStackFrameList later. + StackFrameListSP temp_frames = + std::make_shared(*this, m_prev_frames_sp, true); + + auto provider_or_err = + SyntheticFrameProvider::CreateInstance(temp_frames, descriptor); + if (!provider_or_err) + return provider_or_err.takeError(); + + ClearScriptedFrameProvider(); + m_frame_provider_sp = *provider_or_err; + return llvm::Error::success(); +} + +void Thread::ClearScriptedFrameProvider() { + std::lock_guard guard(m_frame_mutex); + m_frame_provider_sp.reset(); + m_curr_frames_sp.reset(); + m_prev_frames_sp.reset(); +} + std::optional Thread::GetPreviousFrameZeroPC() { return m_prev_framezero_pc; } @@ -1466,6 +1535,7 @@ void Thread::ClearStackFrames() { m_prev_frames_sp.swap(m_curr_frames_sp); m_curr_frames_sp.reset(); + m_frame_provider_sp.reset(); m_extended_info.reset(); m_extended_info_fetched = false; } diff --git a/lldb/source/Target/ThreadSpec.cpp b/lldb/source/Target/ThreadSpec.cpp index ba4c3aa894553..624f64e3af800 100644 --- a/lldb/source/Target/ThreadSpec.cpp +++ b/lldb/source/Target/ThreadSpec.cpp @@ -19,6 +19,10 @@ const char *ThreadSpec::g_option_names[static_cast( ThreadSpec::ThreadSpec() : m_name(), m_queue_name() {} +ThreadSpec::ThreadSpec(Thread &thread) + : m_index(thread.GetIndexID()), m_tid(thread.GetID()), + m_name(thread.GetName()), m_queue_name(thread.GetQueueName()) {} + std::unique_ptr ThreadSpec::CreateFromStructuredData( const StructuredData::Dictionary &spec_dict, Status &error) { uint32_t index = UINT32_MAX; diff --git a/lldb/source/ValueObject/ValueObjectSynthetic.cpp b/lldb/source/ValueObject/ValueObjectSynthetic.cpp index f673c51a88412..44e53bd5fd82e 100644 --- a/lldb/source/ValueObject/ValueObjectSynthetic.cpp +++ b/lldb/source/ValueObject/ValueObjectSynthetic.cpp @@ -443,3 +443,18 @@ void ValueObjectSynthetic::SetLanguageFlags(uint64_t flags) { else this->ValueObject::SetLanguageFlags(flags); } + +void ValueObjectSynthetic::GetExpressionPath(Stream &stream, + GetExpressionPathFormat epformat) { + // A synthetic ValueObject may wrap an underlying Register or RegisterSet + // ValueObject, which requires a different approach to generating the + // expression path. In such cases, delegate to the non-synthetic value object. + if (const lldb::ValueType obj_value_type = GetValueType(); + IsSynthetic() && (obj_value_type == lldb::eValueTypeRegister || + obj_value_type == lldb::eValueTypeRegisterSet)) { + + if (const lldb::ValueObjectSP raw_value = GetNonSyntheticValue()) + return raw_value->GetExpressionPath(stream, epformat); + } + return ValueObject::GetExpressionPath(stream, epformat); +} diff --git a/lldb/test/API/functionalities/scripted_frame_provider/Makefile b/lldb/test/API/functionalities/scripted_frame_provider/Makefile new file mode 100644 index 0000000000000..99998b20bcb05 --- /dev/null +++ b/lldb/test/API/functionalities/scripted_frame_provider/Makefile @@ -0,0 +1,3 @@ +CXX_SOURCES := main.cpp + +include Makefile.rules diff --git a/lldb/test/API/functionalities/scripted_frame_provider/TestScriptedFrameProvider.py b/lldb/test/API/functionalities/scripted_frame_provider/TestScriptedFrameProvider.py new file mode 100644 index 0000000000000..3c0390ef72fd2 --- /dev/null +++ b/lldb/test/API/functionalities/scripted_frame_provider/TestScriptedFrameProvider.py @@ -0,0 +1,418 @@ +""" +Test scripted frame provider functionality. +""" + +import os + +import lldb +from lldbsuite.test.lldbtest import TestBase +from lldbsuite.test import lldbutil + + +class ScriptedFrameProviderTestCase(TestBase): + NO_DEBUG_INFO_TESTCASE = True + + def setUp(self): + TestBase.setUp(self) + self.source = "main.cpp" + + def test_replace_all_frames(self): + """Test that we can replace the entire stack.""" + self.build() + target, process, thread, bkpt = lldbutil.run_to_source_breakpoint( + self, "Break here", lldb.SBFileSpec(self.source), only_one_thread=False + ) + + # Import the test frame provider + script_path = os.path.join(self.getSourceDir(), "test_frame_providers.py") + self.runCmd("command script import " + script_path) + + # Attach the Replace provider + error = lldb.SBError() + provider_id = target.RegisterScriptedFrameProvider( + "test_frame_providers.ReplaceFrameProvider", + lldb.SBStructuredData(), + error, + ) + self.assertTrue(error.Success(), f"Failed to register provider: {error}") + self.assertNotEqual(provider_id, 0, "Provider ID should be non-zero") + + # Verify we have exactly 3 synthetic frames + self.assertEqual(thread.GetNumFrames(), 3, "Should have 3 synthetic frames") + + # Verify frame indices and PCs (dictionary-based frames don't have custom function names) + frame0 = thread.GetFrameAtIndex(0) + self.assertIsNotNone(frame0) + self.assertEqual(frame0.GetPC(), 0x1000) + + frame1 = thread.GetFrameAtIndex(1) + self.assertIsNotNone(frame1) + self.assertIn("thread_func", frame1.GetFunctionName()) + + frame2 = thread.GetFrameAtIndex(2) + self.assertIsNotNone(frame2) + self.assertEqual(frame2.GetPC(), 0x3000) + + def test_prepend_frames(self): + """Test that we can add frames before real stack.""" + self.build() + target, process, thread, bkpt = lldbutil.run_to_source_breakpoint( + self, "Break here", lldb.SBFileSpec(self.source), only_one_thread=False + ) + + # Get original frame count and PC + original_frame_count = thread.GetNumFrames() + self.assertGreaterEqual( + original_frame_count, 2, "Should have at least 2 real frames" + ) + + # Import and attach Prepend provider + script_path = os.path.join(self.getSourceDir(), "test_frame_providers.py") + self.runCmd("command script import " + script_path) + + error = lldb.SBError() + provider_id = target.RegisterScriptedFrameProvider( + "test_frame_providers.PrependFrameProvider", + lldb.SBStructuredData(), + error, + ) + self.assertTrue(error.Success(), f"Failed to register provider: {error}") + self.assertNotEqual(provider_id, 0, "Provider ID should be non-zero") + + # Verify we have 2 more frames + new_frame_count = thread.GetNumFrames() + self.assertEqual(new_frame_count, original_frame_count + 2) + + # Verify first 2 frames are synthetic (check PCs, not function names) + frame0 = thread.GetFrameAtIndex(0) + self.assertEqual(frame0.GetPC(), 0x9000) + + frame1 = thread.GetFrameAtIndex(1) + self.assertEqual(frame1.GetPC(), 0xA000) + + # Verify frame 2 is the original real frame 0 + frame2 = thread.GetFrameAtIndex(2) + self.assertIn("thread_func", frame2.GetFunctionName()) + + def test_append_frames(self): + """Test that we can add frames after real stack.""" + self.build() + target, process, thread, bkpt = lldbutil.run_to_source_breakpoint( + self, "Break here", lldb.SBFileSpec(self.source), only_one_thread=False + ) + + # Get original frame count + original_frame_count = thread.GetNumFrames() + + # Import and attach Append provider + script_path = os.path.join(self.getSourceDir(), "test_frame_providers.py") + self.runCmd("command script import " + script_path) + + error = lldb.SBError() + provider_id = target.RegisterScriptedFrameProvider( + "test_frame_providers.AppendFrameProvider", + lldb.SBStructuredData(), + error, + ) + self.assertTrue(error.Success(), f"Failed to register provider: {error}") + self.assertNotEqual(provider_id, 0, "Provider ID should be non-zero") + + # Verify we have 1 more frame + new_frame_count = thread.GetNumFrames() + self.assertEqual(new_frame_count, original_frame_count + 1) + + # Verify first frames are still real + frame0 = thread.GetFrameAtIndex(0) + self.assertIn("thread_func", frame0.GetFunctionName()) + + frame_n_plus_1 = thread.GetFrameAtIndex(new_frame_count - 1) + self.assertEqual(frame_n_plus_1.GetPC(), 0x10) + + def test_scripted_frame_objects(self): + """Test that provider can return ScriptedFrame objects.""" + self.build() + target, process, thread, bkpt = lldbutil.run_to_source_breakpoint( + self, "Break here", lldb.SBFileSpec(self.source), only_one_thread=False + ) + + # Import the provider that returns ScriptedFrame objects + script_path = os.path.join(self.getSourceDir(), "test_frame_providers.py") + self.runCmd("command script import " + script_path) + + error = lldb.SBError() + provider_id = target.RegisterScriptedFrameProvider( + "test_frame_providers.ScriptedFrameObjectProvider", + lldb.SBStructuredData(), + error, + ) + self.assertTrue(error.Success(), f"Failed to register provider: {error}") + self.assertNotEqual(provider_id, 0, "Provider ID should be non-zero") + + # Verify we have 5 frames + self.assertEqual( + thread.GetNumFrames(), 5, "Should have 5 custom scripted frames" + ) + + # Verify frame properties from CustomScriptedFrame + frame0 = thread.GetFrameAtIndex(0) + self.assertIsNotNone(frame0) + self.assertEqual(frame0.GetFunctionName(), "custom_scripted_frame_0") + self.assertEqual(frame0.GetPC(), 0x5000) + self.assertTrue(frame0.IsSynthetic(), "Frame should be marked as synthetic") + + frame1 = thread.GetFrameAtIndex(1) + self.assertIsNotNone(frame1) + self.assertEqual(frame1.GetPC(), 0x6000) + + frame2 = thread.GetFrameAtIndex(2) + self.assertIsNotNone(frame2) + self.assertEqual(frame2.GetFunctionName(), "custom_scripted_frame_2") + self.assertEqual(frame2.GetPC(), 0x7000) + self.assertTrue(frame2.IsSynthetic(), "Frame should be marked as synthetic") + + def test_applies_to_thread(self): + """Test that applies_to_thread filters which threads get the provider.""" + self.build() + target, process, thread, bkpt = lldbutil.run_to_source_breakpoint( + self, "Break here", lldb.SBFileSpec(self.source), only_one_thread=False + ) + + # We should have at least 2 threads (worker threads) at the breakpoint + num_threads = process.GetNumThreads() + self.assertGreaterEqual( + num_threads, 2, "Should have at least 2 threads at breakpoint" + ) + + # Import the test frame provider + script_path = os.path.join(self.getSourceDir(), "test_frame_providers.py") + self.runCmd("command script import " + script_path) + + # Collect original thread info before applying provider + thread_info = {} + for i in range(num_threads): + t = process.GetThreadAtIndex(i) + thread_info[t.GetIndexID()] = { + "frame_count": t.GetNumFrames(), + "pc": t.GetFrameAtIndex(0).GetPC(), + } + + # Register the ThreadFilterFrameProvider which only applies to thread ID 1 + error = lldb.SBError() + provider_id = target.RegisterScriptedFrameProvider( + "test_frame_providers.ThreadFilterFrameProvider", + lldb.SBStructuredData(), + error, + ) + self.assertTrue(error.Success(), f"Failed to register provider: {error}") + self.assertNotEqual(provider_id, 0, "Provider ID should be non-zero") + + # Check each thread + thread_id_1_found = False + for i in range(num_threads): + t = process.GetThreadAtIndex(i) + thread_id = t.GetIndexID() + + if thread_id == 1: + # Thread with ID 1 should have synthetic frame + thread_id_1_found = True + self.assertEqual( + t.GetNumFrames(), + 1, + f"Thread with ID 1 should have 1 synthetic frame", + ) + self.assertEqual( + t.GetFrameAtIndex(0).GetPC(), + 0xFFFF, + f"Thread with ID 1 should have synthetic PC 0xFFFF", + ) + else: + # Other threads should keep their original frames + self.assertEqual( + t.GetNumFrames(), + thread_info[thread_id]["frame_count"], + f"Thread with ID {thread_id} should not be affected by provider", + ) + self.assertEqual( + t.GetFrameAtIndex(0).GetPC(), + thread_info[thread_id]["pc"], + f"Thread with ID {thread_id} should have its original PC", + ) + + # We should have found at least one thread with ID 1 + self.assertTrue( + thread_id_1_found, + "Should have found a thread with ID 1 to test filtering", + ) + + def test_remove_frame_provider_by_id(self): + """Test that RemoveScriptedFrameProvider removes a specific provider by ID.""" + self.build() + target, process, thread, bkpt = lldbutil.run_to_source_breakpoint( + self, "Break here", lldb.SBFileSpec(self.source), only_one_thread=False + ) + + # Import the test frame providers + script_path = os.path.join(self.getSourceDir(), "test_frame_providers.py") + self.runCmd("command script import " + script_path) + + # Get original frame count + original_frame_count = thread.GetNumFrames() + original_pc = thread.GetFrameAtIndex(0).GetPC() + + # Register the first provider and get its ID + error = lldb.SBError() + provider_id_1 = target.RegisterScriptedFrameProvider( + "test_frame_providers.ReplaceFrameProvider", + lldb.SBStructuredData(), + error, + ) + self.assertTrue(error.Success(), f"Failed to register provider 1: {error}") + + # Verify first provider is active (3 synthetic frames) + self.assertEqual(thread.GetNumFrames(), 3, "Should have 3 synthetic frames") + self.assertEqual( + thread.GetFrameAtIndex(0).GetPC(), 0x1000, "Should have first provider's PC" + ) + + # Register a second provider and get its ID + provider_id_2 = target.RegisterScriptedFrameProvider( + "test_frame_providers.PrependFrameProvider", + lldb.SBStructuredData(), + error, + ) + self.assertTrue(error.Success(), f"Failed to register provider 2: {error}") + + # Verify IDs are different + self.assertNotEqual( + provider_id_1, provider_id_2, "Provider IDs should be unique" + ) + + # Now remove the first provider by ID + result = target.RemoveScriptedFrameProvider(provider_id_1) + self.assertSuccess( + result, f"Should successfully remove provider with ID {provider_id_1}" + ) + + # After removing the first provider, the second provider should still be active + # The PrependFrameProvider adds 2 frames before the real stack + # Since ReplaceFrameProvider had 3 frames, and we removed it, we should now + # have the original frames (from real stack) with PrependFrameProvider applied + new_frame_count = thread.GetNumFrames() + self.assertEqual( + new_frame_count, + original_frame_count + 2, + "Should have original frames + 2 prepended frames", + ) + + # First two frames should be from PrependFrameProvider + self.assertEqual( + thread.GetFrameAtIndex(0).GetPC(), + 0x9000, + "First frame should be from PrependFrameProvider", + ) + self.assertEqual( + thread.GetFrameAtIndex(1).GetPC(), + 0xA000, + "Second frame should be from PrependFrameProvider", + ) + + # Remove the second provider + result = target.RemoveScriptedFrameProvider(provider_id_2) + self.assertSuccess( + result, f"Should successfully remove provider with ID {provider_id_2}" + ) + + # After removing both providers, frames should be back to original + self.assertEqual( + thread.GetNumFrames(), + original_frame_count, + "Should restore original frame count", + ) + self.assertEqual( + thread.GetFrameAtIndex(0).GetPC(), + original_pc, + "Should restore original PC", + ) + + # Try to remove a provider that doesn't exist + result = target.RemoveScriptedFrameProvider(999999) + self.assertTrue(result.Fail(), "Should fail to remove non-existent provider") + + def test_circular_dependency_fix(self): + """Test that accessing input_frames in __init__ doesn't cause circular dependency. + + This test verifies the fix for the circular dependency issue where: + 1. Thread::GetStackFrameList() creates the frame provider + 2. Provider's __init__ accesses input_frames and calls methods on frames + 3. SBFrame methods trigger ExecutionContextRef::GetFrameSP() + 4. Before the fix: GetFrameSP() would call Thread::GetStackFrameList() again -> circular dependency! + 5. After the fix: GetFrameSP() uses the remembered frame list -> no circular dependency + + The fix works by: + - StackFrame stores m_frame_list_wp (weak pointer to originating list) + - ExecutionContextRef stores m_frame_list_wp when created from a frame + - ExecutionContextRef::GetFrameSP() tries the remembered list first before asking the thread + """ + self.build() + target, process, thread, bkpt = lldbutil.run_to_source_breakpoint( + self, "Break here", lldb.SBFileSpec(self.source), only_one_thread=False + ) + + # Get original frame count and PC + original_frame_count = thread.GetNumFrames() + original_pc = thread.GetFrameAtIndex(0).GetPC() + self.assertGreaterEqual( + original_frame_count, 2, "Should have at least 2 real frames" + ) + + # Import the provider that accesses input frames in __init__ + script_path = os.path.join(self.getSourceDir(), "test_frame_providers.py") + self.runCmd("command script import " + script_path) + + # Register the CircularDependencyTestProvider + # Before the fix, this would crash or hang due to circular dependency + error = lldb.SBError() + provider_id = target.RegisterScriptedFrameProvider( + "test_frame_providers.CircularDependencyTestProvider", + lldb.SBStructuredData(), + error, + ) + + # If we get here without crashing, the fix is working! + self.assertTrue(error.Success(), f"Failed to register provider: {error}") + self.assertNotEqual(provider_id, 0, "Provider ID should be non-zero") + + # Verify the provider worked correctly + # Should have 1 synthetic frame + all original frames + new_frame_count = thread.GetNumFrames() + self.assertEqual( + new_frame_count, + original_frame_count + 1, + "Should have original frames + 1 synthetic frame", + ) + + # First frame should be synthetic + frame0 = thread.GetFrameAtIndex(0) + self.assertIsNotNone(frame0) + self.assertEqual( + frame0.GetPC(), + 0xDEADBEEF, + "First frame should be synthetic frame with PC 0xDEADBEEF", + ) + + # Second frame should be the original first frame + frame1 = thread.GetFrameAtIndex(1) + self.assertIsNotNone(frame1) + self.assertEqual( + frame1.GetPC(), + original_pc, + "Second frame should be original first frame", + ) + + # Verify we can still call methods on frames (no circular dependency!) + for i in range(min(3, new_frame_count)): + frame = thread.GetFrameAtIndex(i) + self.assertIsNotNone(frame) + # These calls should not trigger circular dependency + pc = frame.GetPC() + self.assertNotEqual(pc, 0, f"Frame {i} should have valid PC") diff --git a/lldb/test/API/functionalities/scripted_frame_provider/circular_dependency/Makefile b/lldb/test/API/functionalities/scripted_frame_provider/circular_dependency/Makefile new file mode 100644 index 0000000000000..10495940055b6 --- /dev/null +++ b/lldb/test/API/functionalities/scripted_frame_provider/circular_dependency/Makefile @@ -0,0 +1,3 @@ +C_SOURCES := main.c + +include Makefile.rules diff --git a/lldb/test/API/functionalities/scripted_frame_provider/circular_dependency/TestFrameProviderCircularDependency.py b/lldb/test/API/functionalities/scripted_frame_provider/circular_dependency/TestFrameProviderCircularDependency.py new file mode 100644 index 0000000000000..e03583a99425b --- /dev/null +++ b/lldb/test/API/functionalities/scripted_frame_provider/circular_dependency/TestFrameProviderCircularDependency.py @@ -0,0 +1,117 @@ +""" +Test that frame providers wouldn't cause a hang due to a circular dependency +during its initialization. +""" + +import os +import lldb +from lldbsuite.test.lldbtest import TestBase +from lldbsuite.test import lldbutil + + +class FrameProviderCircularDependencyTestCase(TestBase): + NO_DEBUG_INFO_TESTCASE = True + + def setUp(self): + TestBase.setUp(self) + self.source = "main.c" + + def test_circular_dependency_with_function_replacement(self): + """ + Test the circular dependency fix with a provider that replaces function names. + """ + self.build() + + target = self.dbg.CreateTarget(self.getBuildArtifact("a.out")) + self.assertTrue(target, "Target should be valid") + + bkpt = target.BreakpointCreateBySourceRegex( + "break here", lldb.SBFileSpec(self.source) + ) + self.assertTrue(bkpt.IsValid(), "Breakpoint should be valid") + self.assertEqual(bkpt.GetNumLocations(), 1, "Should have 1 breakpoint location") + + process = target.LaunchSimple(None, None, self.get_process_working_directory()) + self.assertTrue(process, "Process should be valid") + self.assertEqual( + process.GetState(), lldb.eStateStopped, "Process should be stopped" + ) + + thread = process.GetSelectedThread() + self.assertTrue(thread.IsValid(), "Thread should be valid") + + frame0 = thread.GetFrameAtIndex(0) + self.assertIn("bar", frame0.GetFunctionName(), "Should be stopped in bar()") + + original_frame_count = thread.GetNumFrames() + self.assertGreaterEqual( + original_frame_count, 3, "Should have at least 3 frames: bar, foo, main" + ) + + frame_names = [thread.GetFrameAtIndex(i).GetFunctionName() for i in range(3)] + self.assertEqual(frame_names[0], "bar", "Frame 0 should be bar") + self.assertEqual(frame_names[1], "foo", "Frame 1 should be foo") + self.assertEqual(frame_names[2], "main", "Frame 2 should be main") + + script_path = os.path.join(self.getSourceDir(), "frame_provider.py") + self.runCmd("command script import " + script_path) + + # Register the frame provider that accesses input_frames. + # Before the fix, this registration would trigger the circular dependency: + # - Thread::GetStackFrameList() creates provider + # - Provider's get_frame_at_index() accesses input_frames[0] + # - Calls frame.GetFunctionName() -> ExecutionContextRef::GetFrameSP() + # - Before fix: Calls Thread::GetStackFrameList() again -> CIRCULAR! + # - After fix: Uses remembered m_frame_list_wp -> Works! + error = lldb.SBError() + provider_id = target.RegisterScriptedFrameProvider( + "frame_provider.ScriptedFrameObjectProvider", + lldb.SBStructuredData(), + error, + ) + + # If we reach here without crashing/hanging, the fix is working! + self.assertTrue( + error.Success(), + f"Should successfully register provider (if this fails, circular dependency!): {error}", + ) + self.assertNotEqual(provider_id, 0, "Provider ID should be non-zero") + + # Verify the provider is working correctly. + # Frame count should be unchanged (we're replacing frames, not adding). + new_frame_count = thread.GetNumFrames() + self.assertEqual( + new_frame_count, + original_frame_count, + "Frame count should be unchanged (replacement, not addition)", + ) + + # Verify that "bar" was replaced with "baz". + frame0_new = thread.GetFrameAtIndex(0) + self.assertIsNotNone(frame0_new, "Frame 0 should exist") + self.assertEqual( + frame0_new.GetFunctionName(), + "baz", + "Frame 0 function should be replaced: bar -> baz", + ) + + # Verify other frames are unchanged. + frame1_new = thread.GetFrameAtIndex(1) + self.assertEqual( + frame1_new.GetFunctionName(), "foo", "Frame 1 should still be foo" + ) + + frame2_new = thread.GetFrameAtIndex(2) + self.assertEqual( + frame2_new.GetFunctionName(), "main", "Frame 2 should still be main" + ) + + # Verify we can call methods on all frames (no circular dependency!). + for i in range(new_frame_count): + frame = thread.GetFrameAtIndex(i) + self.assertIsNotNone(frame, f"Frame {i} should exist") + # These calls should not trigger circular dependency. + pc = frame.GetPC() + self.assertNotEqual(pc, 0, f"Frame {i} should have valid PC") + func_name = frame.GetFunctionName() + self.assertIsNotNone(func_name, f"Frame {i} should have function name") diff --git a/lldb/test/API/functionalities/scripted_frame_provider/circular_dependency/frame_provider.py b/lldb/test/API/functionalities/scripted_frame_provider/circular_dependency/frame_provider.py new file mode 100644 index 0000000000000..f27f18cd07b7f --- /dev/null +++ b/lldb/test/API/functionalities/scripted_frame_provider/circular_dependency/frame_provider.py @@ -0,0 +1,102 @@ +""" +Frame provider that reproduces the circular dependency issue. + +This provider accesses input_frames and calls methods on them, +which before the fix would cause a circular dependency. +""" + +import lldb +from lldb.plugins.scripted_process import ScriptedFrame +from lldb.plugins.scripted_frame_provider import ScriptedFrameProvider + + +class CustomScriptedFrame(ScriptedFrame): + """Custom scripted frame with full control over frame behavior.""" + + def __init__(self, thread, idx, pc, function_name): + args = lldb.SBStructuredData() + super().__init__(thread, args) + + self.idx = idx + self.pc = pc + self.function_name = function_name + + def get_id(self): + """Return the frame index.""" + return self.idx + + def get_pc(self): + """Return the program counter.""" + return self.pc + + def get_function_name(self): + """Return the function name.""" + return self.function_name + + def is_artificial(self): + """Mark as artificial frame.""" + return False + + def is_hidden(self): + """Not hidden.""" + return False + + def get_register_context(self): + return None + + +class ScriptedFrameObjectProvider(ScriptedFrameProvider): + """ + Provider that returns ScriptedFrame objects and accesses input_frames. + + This provider demonstrates the circular dependency bug fix: + 1. During get_frame_at_index(), we access input_frames[idx] + 2. We call frame.GetFunctionName() and frame.GetPC() on input frames + 3. Before the fix: These calls would trigger ExecutionContextRef::GetFrameSP() + which would call Thread::GetStackFrameList() -> circular dependency! + 4. After the fix: ExecutionContextRef uses the remembered frame list -> no circular dependency + """ + + def __init__(self, input_frames, args): + super().__init__(input_frames, args) + self.replacement_count = 0 + if self.target.process: + baz_symbol_ctx = self.target.FindFunctions("baz") + self.baz_symbol_ctx = None + if len(baz_symbol_ctx) == 1: + self.baz_symbol_ctx = baz_symbol_ctx[0] + + @staticmethod + def get_description(): + """Return a description of this provider.""" + return "Provider that replaces 'bar' function with 'baz'" + + def get_frame_at_index(self, idx): + """ + Replace frames named 'bar' with custom frames named 'baz'. + + This accesses input_frames and calls methods on them, which would + trigger the circular dependency bug before the fix. + """ + if idx < len(self.input_frames): + # This access and method calls would cause circular dependency before fix! + frame = self.input_frames[idx] + + # Calling GetFunctionName() triggers ExecutionContextRef resolution. + function_name = frame.GetFunctionName() + + if function_name == "bar" and self.baz_symbol_ctx: + # Replace "bar" with "baz". + baz_func = self.baz_symbol_ctx.GetFunction() + new_function_name = baz_func.GetName() + pc = baz_func.GetStartAddress().GetLoadAddress(self.target) + custom_frame = CustomScriptedFrame( + self.thread, idx, pc, new_function_name + ) + self.replacement_count += 1 + return custom_frame + + # Pass through other frames by returning their index. + return idx + + return None diff --git a/lldb/test/API/functionalities/scripted_frame_provider/circular_dependency/main.c b/lldb/test/API/functionalities/scripted_frame_provider/circular_dependency/main.c new file mode 100644 index 0000000000000..bbd1028236f40 --- /dev/null +++ b/lldb/test/API/functionalities/scripted_frame_provider/circular_dependency/main.c @@ -0,0 +1,21 @@ +#include + +int baz() { + printf("baz\n"); + return 666; +} + +int bar() { + printf("bar\n"); + return 42; // break here. +} + +int foo() { + printf("foo\n"); + return bar(); +} + +int main() { + printf("main\n"); + return foo(); +} diff --git a/lldb/test/API/functionalities/scripted_frame_provider/main.cpp b/lldb/test/API/functionalities/scripted_frame_provider/main.cpp new file mode 100644 index 0000000000000..0298e88e4de16 --- /dev/null +++ b/lldb/test/API/functionalities/scripted_frame_provider/main.cpp @@ -0,0 +1,53 @@ +// Multi-threaded test program for testing frame providers. + +#include +#include +#include +#include + +std::mutex mtx; +std::condition_variable cv; +int ready_count = 0; +constexpr int NUM_THREADS = 2; + +void thread_func(int thread_num) { + std::cout << "Thread " << thread_num << " started\n"; + + { + std::unique_lock lock(mtx); + ready_count++; + if (ready_count == NUM_THREADS + 1) { + cv.notify_all(); + } else { + cv.wait(lock, [] { return ready_count == NUM_THREADS + 1; }); + } + } + + std::cout << "Thread " << thread_num << " at breakpoint\n"; // Break here. +} + +int main(int argc, char **argv) { + std::thread threads[NUM_THREADS]; + + for (int i = 0; i < NUM_THREADS; i++) { + threads[i] = std::thread(thread_func, i); + } + + { + std::unique_lock lock(mtx); + ready_count++; + if (ready_count == NUM_THREADS + 1) { + cv.notify_all(); + } else { + cv.wait(lock, [] { return ready_count == NUM_THREADS + 1; }); + } + } + + std::cout << "Main thread at barrier\n"; + + for (int i = 0; i < NUM_THREADS; i++) + threads[i].join(); + + std::cout << "All threads completed\n"; + return 0; +} diff --git a/lldb/test/API/functionalities/scripted_frame_provider/test_frame_providers.py b/lldb/test/API/functionalities/scripted_frame_provider/test_frame_providers.py new file mode 100644 index 0000000000000..b9731fdc0a197 --- /dev/null +++ b/lldb/test/API/functionalities/scripted_frame_provider/test_frame_providers.py @@ -0,0 +1,222 @@ +""" +Test frame providers for scripted frame provider functionality. + +These providers demonstrate various merge strategies: +- Replace: Replace entire stack +- Prepend: Add frames before real stack +- Append: Add frames after real stack + +It also shows the ability to mix a dictionary, a ScriptedFrame or an SBFrame +index to create stackframes +""" + +import lldb +from lldb.plugins.scripted_process import ScriptedFrame +from lldb.plugins.scripted_frame_provider import ScriptedFrameProvider + + +class ReplaceFrameProvider(ScriptedFrameProvider): + """Replace entire stack with custom frames.""" + + def __init__(self, input_frames, args): + super().__init__(input_frames, args) + self.frames = [ + { + "idx": 0, + "pc": 0x1000, + }, + 0, + { + "idx": 2, + "pc": 0x3000, + }, + ] + + @staticmethod + def get_description(): + """Return a description of this provider.""" + return "Replace entire stack with 3 custom frames" + + def get_frame_at_index(self, index): + if index >= len(self.frames): + return None + return self.frames[index] + + +class PrependFrameProvider(ScriptedFrameProvider): + """Prepend synthetic frames before real stack.""" + + def __init__(self, input_frames, args): + super().__init__(input_frames, args) + + @staticmethod + def get_description(): + """Return a description of this provider.""" + return "Prepend 2 synthetic frames before real stack" + + def get_frame_at_index(self, index): + if index == 0: + return {"pc": 0x9000} + elif index == 1: + return {"pc": 0xA000} + elif index - 2 < len(self.input_frames): + return index - 2 # Return real frame index. + return None + + +class AppendFrameProvider(ScriptedFrameProvider): + """Append synthetic frames after real stack.""" + + def __init__(self, input_frames, args): + super().__init__(input_frames, args) + + @staticmethod + def get_description(): + """Return a description of this provider.""" + return "Append 1 synthetic frame after real stack" + + def get_frame_at_index(self, index): + if index < len(self.input_frames): + return index # Return real frame index. + elif index == len(self.input_frames): + return { + "idx": 1, + "pc": 0x10, + } + return None + + +class CustomScriptedFrame(ScriptedFrame): + """Custom scripted frame with full control over frame behavior.""" + + def __init__(self, thread, idx, pc, function_name): + args = lldb.SBStructuredData() + super().__init__(thread, args) + + self.idx = idx + self.pc = pc + self.function_name = function_name + + def get_id(self): + """Return the frame index.""" + return self.idx + + def get_pc(self): + """Return the program counter.""" + return self.pc + + def get_function_name(self): + """Return the function name.""" + return self.function_name + + def is_artificial(self): + """Mark as artificial frame.""" + return False + + def is_hidden(self): + """Not hidden.""" + return False + + def get_register_context(self): + """No register context for this test.""" + return None + + +class ScriptedFrameObjectProvider(ScriptedFrameProvider): + """Provider that returns ScriptedFrame objects instead of dictionaries.""" + + def __init__(self, input_frames, args): + super().__init__(input_frames, args) + + @staticmethod + def get_description(): + """Return a description of this provider.""" + return "Provider returning custom ScriptedFrame objects" + + def get_frame_at_index(self, index): + """Return ScriptedFrame objects or dictionaries based on index.""" + if index == 0: + return CustomScriptedFrame( + self.thread, 0, 0x5000, "custom_scripted_frame_0" + ) + elif index == 1: + return {"pc": 0x6000} + elif index == 2: + return CustomScriptedFrame( + self.thread, 2, 0x7000, "custom_scripted_frame_2" + ) + elif index == 3: + return len(self.input_frames) - 2 # Real frame index. + elif index == 4: + return len(self.input_frames) - 1 # Real frame index. + return None + + +class ThreadFilterFrameProvider(ScriptedFrameProvider): + """Provider that only applies to thread with ID 1.""" + + @staticmethod + def applies_to_thread(thread): + """Only apply to thread with index ID 1.""" + return thread.GetIndexID() == 1 + + def __init__(self, input_frames, args): + super().__init__(input_frames, args) + + @staticmethod + def get_description(): + """Return a description of this provider.""" + return "Provider that only applies to thread ID 1" + + def get_frame_at_index(self, index): + """Return a single synthetic frame.""" + if index == 0: + return {"pc": 0xFFFF} + return None + + +class CircularDependencyTestProvider(ScriptedFrameProvider): + """ + Provider that tests the circular dependency fix. + + This provider accesses input_frames during __init__ and calls methods + on those frames. Before the fix, this would cause a circular dependency: + - Thread::GetStackFrameList() creates provider + - Provider's __init__ accesses input_frames[0] + - SBFrame::GetPC() tries to resolve ExecutionContextRef + - ExecutionContextRef::GetFrameSP() calls Thread::GetStackFrameList() + - Re-enters initialization -> circular dependency! + + With the fix, ExecutionContextRef remembers the frame list, so it doesn't + re-enter Thread::GetStackFrameList(). + """ + + def __init__(self, input_frames, args): + super().__init__(input_frames, args) + + # This would cause circular dependency before the fix! + # Accessing frames and calling methods on them during __init__ + self.original_frame_count = len(input_frames) + self.original_pcs = [] + + # Call GetPC() on each input frame - this triggers ExecutionContextRef resolution. + for i in range(min(3, len(input_frames))): + frame = input_frames[i] + if frame.IsValid(): + pc = frame.GetPC() + self.original_pcs.append(pc) + + @staticmethod + def get_description(): + """Return a description of this provider.""" + return "Provider that tests circular dependency fix by accessing frames in __init__" + + def get_frame_at_index(self, index): + """Prepend a synthetic frame, then pass through original frames.""" + if index == 0: + # Synthetic frame at index 0. + return {"pc": 0xDEADBEEF} + elif index - 1 < self.original_frame_count: + # Pass through original frames at indices 1, 2, 3, ... + return index - 1 + return None diff --git a/lldb/test/API/python_api/exprpath_register/Makefile b/lldb/test/API/python_api/exprpath_register/Makefile new file mode 100644 index 0000000000000..10495940055b6 --- /dev/null +++ b/lldb/test/API/python_api/exprpath_register/Makefile @@ -0,0 +1,3 @@ +C_SOURCES := main.c + +include Makefile.rules diff --git a/lldb/test/API/python_api/exprpath_register/TestExprPathRegisters.py b/lldb/test/API/python_api/exprpath_register/TestExprPathRegisters.py new file mode 100644 index 0000000000000..4ffbc5e49fb0d --- /dev/null +++ b/lldb/test/API/python_api/exprpath_register/TestExprPathRegisters.py @@ -0,0 +1,64 @@ +""" +Test Getting the expression path for registers works correctly +""" + +import lldb +from lldbsuite.test import lldbutil +from lldbsuite.test.lldbtest import TestBase, VALID_BREAKPOINT, VALID_TARGET + + +class TestExprPathRegisters(TestBase): + NO_DEBUG_INFO_TESTCASE = True + + def verify_register_path(self, reg_value: lldb.SBValue): + stream = lldb.SBStream() + reg_name = reg_value.name + self.assertTrue( + reg_value.GetExpressionPath(stream), + f"Expected an expression path for register {reg_name}.", + ) + reg_expr_path = stream.GetData() + self.assertEqual(reg_expr_path, f"${reg_name}") + + def test_float_registers(self): + """Verify the expression path of the registers is valid.""" + self.build() + _, _, thread, _ = lldbutil.run_to_name_breakpoint(self, "my_foo") + frame = thread.GetSelectedFrame() + self.assertTrue(frame, "Expected a valid Frame.") + + # possible floating point register on some cpus. + register_names = [ + "xmm0", + "ymm0", + "v0", + "v1", + "f0", + "f1", + "d0", + "d1", + "vr0", + "vr1", + "st0", + "st1", + ] + for name in register_names: + reg_value = frame.FindRegister(name) + # some the register will not be available for the cpu + # only verify if it is valid. + if reg_value: + self.verify_register_path(reg_value) + + def test_all_registers(self): + """Test all the registers that is avaiable on the machine""" + self.build() + _, _, thread, _ = lldbutil.run_to_name_breakpoint(self, "my_foo") + frame = thread.GetSelectedFrame() + self.assertTrue(frame, "Expected a valid Frame.") + + register_sets = frame.GetRegisters() + self.assertTrue(register_sets.IsValid(), "Expected Frame Registers") + + for register_set in register_sets: + for register in register_set.children: + self.verify_register_path(register) diff --git a/lldb/test/API/python_api/exprpath_register/main.c b/lldb/test/API/python_api/exprpath_register/main.c new file mode 100644 index 0000000000000..4809a87cdf210 --- /dev/null +++ b/lldb/test/API/python_api/exprpath_register/main.c @@ -0,0 +1,10 @@ + +float my_foo() { + float result = 10.0 + 20.0; + return result; +} + +int main(void) { + float result = my_foo(); + return (int)result; +} diff --git a/lldb/test/API/python_api/sbframe_extensions/Makefile b/lldb/test/API/python_api/sbframe_extensions/Makefile new file mode 100644 index 0000000000000..10495940055b6 --- /dev/null +++ b/lldb/test/API/python_api/sbframe_extensions/Makefile @@ -0,0 +1,3 @@ +C_SOURCES := main.c + +include Makefile.rules diff --git a/lldb/test/API/python_api/sbframe_extensions/TestSBFrameExtensions.py b/lldb/test/API/python_api/sbframe_extensions/TestSBFrameExtensions.py new file mode 100644 index 0000000000000..d3eabfdd979c5 --- /dev/null +++ b/lldb/test/API/python_api/sbframe_extensions/TestSBFrameExtensions.py @@ -0,0 +1,534 @@ +""" +Test SBFrameExtensions API. +""" + +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class TestSBFrameExtensions(TestBase): + NO_DEBUG_INFO_TESTCASE = True + + def setUp(self): + TestBase.setUp(self) + self.source = "main.c" + + def _get_frame(self): + """Helper method to get a valid frame for testing.""" + self.build() + target, process, thread, bkpt = lldbutil.run_to_source_breakpoint( + self, "Set breakpoint here", lldb.SBFileSpec(self.source) + ) + frame = thread.GetFrameAtIndex(0) + self.assertTrue(frame.IsValid(), "Frame should be valid") + return frame, thread + + def test_property_pc(self): + """Test SBFrame extension property: pc""" + frame, _ = self._get_frame() + + pc = frame.pc + self.assertIsInstance(pc, int, "pc should be an integer") + self.assertGreater(pc, 0, "pc should be greater than 0") + self.assertEqual(pc, frame.GetPC(), "pc property should match GetPC()") + + def test_property_addr(self): + """Test SBFrame extension property: addr""" + frame, _ = self._get_frame() + + addr = frame.addr + self.assertTrue(addr.IsValid(), "addr should be valid") + self.assertEqual(addr, frame.GetPCAddress(), "addr should match GetPCAddress()") + + def test_property_fp(self): + """Test SBFrame extension property: fp""" + frame, _ = self._get_frame() + + fp = frame.fp + self.assertIsInstance(fp, int, "fp should be an integer") + self.assertEqual(fp, frame.GetFP(), "fp property should match GetFP()") + + def test_property_sp(self): + """Test SBFrame extension property: sp""" + frame, _ = self._get_frame() + + sp = frame.sp + self.assertIsInstance(sp, int, "sp should be an integer") + self.assertEqual(sp, frame.GetSP(), "sp property should match GetSP()") + + def test_property_module(self): + """Test SBFrame extension property: module""" + frame, _ = self._get_frame() + + module = frame.module + self.assertTrue(module.IsValid(), "module should be valid") + self.assertEqual(module, frame.GetModule(), "module should match GetModule()") + + def test_property_compile_unit(self): + """Test SBFrame extension property: compile_unit""" + frame, _ = self._get_frame() + + compile_unit = frame.compile_unit + self.assertTrue(compile_unit.IsValid(), "compile_unit should be valid") + self.assertEqual( + compile_unit, + frame.GetCompileUnit(), + "compile_unit should match GetCompileUnit()", + ) + + def test_property_function(self): + """Test SBFrame extension property: function""" + frame, _ = self._get_frame() + + function = frame.function + self.assertTrue(function.IsValid(), "function should be valid") + self.assertEqual( + function, frame.GetFunction(), "function should match GetFunction()" + ) + + def test_property_symbol(self): + """Test SBFrame extension property: symbol""" + frame, _ = self._get_frame() + + symbol = frame.symbol + self.assertTrue(symbol.IsValid(), "symbol should be valid") + self.assertEqual(symbol, frame.GetSymbol(), "symbol should match GetSymbol()") + + def test_property_block(self): + """Test SBFrame extension property: block""" + frame, _ = self._get_frame() + + block = frame.block + self.assertTrue(block.IsValid(), "block should be valid") + block_direct = frame.GetBlock() + self.assertTrue(block_direct.IsValid(), "GetBlock() should return valid block") + # Verify both blocks are valid and have the same ranges + # by comparing their first range start address. + block_ranges = block.GetRanges() + block_direct_ranges = block_direct.GetRanges() + if block_ranges.GetSize() > 0 and block_direct_ranges.GetSize() > 0: + self.assertEqual( + block.GetRangeStartAddress(0), + block_direct.GetRangeStartAddress(0), + "block should match GetBlock() start address", + ) + + def test_property_is_inlined(self): + """Test SBFrame extension property: is_inlined""" + frame, _ = self._get_frame() + + is_inlined = frame.is_inlined + self.assertIsInstance(is_inlined, bool, "is_inlined should be a boolean") + self.assertEqual( + is_inlined, frame.IsInlined(), "is_inlined should match IsInlined()" + ) + + def test_property_name(self): + """Test SBFrame extension property: name""" + frame, _ = self._get_frame() + + name = frame.name + self.assertIsInstance(name, str, "name should be a string") + self.assertEqual( + name, frame.GetFunctionName(), "name should match GetFunctionName()" + ) + # Should be one of our functions. + self.assertIn( + name, ["func1", "func2", "main"], "name should be a known function" + ) + + def test_property_line_entry(self): + """Test SBFrame extension property: line_entry""" + frame, _ = self._get_frame() + + line_entry = frame.line_entry + self.assertTrue(line_entry.IsValid(), "line_entry should be valid") + self.assertEqual( + line_entry, frame.GetLineEntry(), "line_entry should match GetLineEntry()" + ) + + def test_property_thread(self): + """Test SBFrame extension property: thread""" + frame, thread = self._get_frame() + + thread_prop = frame.thread + self.assertTrue(thread_prop.IsValid(), "thread should be valid") + self.assertEqual( + thread_prop, frame.GetThread(), "thread should match GetThread()" + ) + self.assertEqual( + thread_prop.GetThreadID(), + thread.GetThreadID(), + "thread should be the same thread", + ) + + def test_property_disassembly(self): + """Test SBFrame extension property: disassembly""" + frame, _ = self._get_frame() + + disassembly = frame.disassembly + self.assertIsInstance(disassembly, str, "disassembly should be a string") + self.assertGreater(len(disassembly), 0, "disassembly should not be empty") + self.assertEqual( + disassembly, frame.Disassemble(), "disassembly should match Disassemble()" + ) + + def test_property_idx(self): + """Test SBFrame extension property: idx""" + frame, _ = self._get_frame() + + idx = frame.idx + self.assertIsInstance(idx, int, "idx should be an integer") + self.assertEqual(idx, frame.GetFrameID(), "idx should match GetFrameID()") + self.assertEqual(idx, 0, "First frame should have idx 0") + + def test_property_variables(self): + """Test SBFrame extension property: variables""" + frame, _ = self._get_frame() + + variables = frame.variables + self.assertIsInstance( + variables, lldb.SBValueList, "variables should be SBValueList" + ) + all_vars = frame.GetVariables(True, True, True, True) + self.assertEqual( + variables.GetSize(), + all_vars.GetSize(), + "variables should match GetVariables(True, True, True, True)", + ) + + def test_property_vars(self): + """Test SBFrame extension property: vars (alias for variables)""" + frame, _ = self._get_frame() + + vars_prop = frame.vars + self.assertIsInstance(vars_prop, lldb.SBValueList, "vars should be SBValueList") + variables = frame.variables + self.assertEqual( + vars_prop.GetSize(), + variables.GetSize(), + "vars should match variables", + ) + + def test_property_locals(self): + """Test SBFrame extension property: locals""" + frame, _ = self._get_frame() + + locals_prop = frame.locals + self.assertIsInstance( + locals_prop, lldb.SBValueList, "locals should be SBValueList" + ) + locals_direct = frame.GetVariables(False, True, False, False) + self.assertEqual( + locals_prop.GetSize(), + locals_direct.GetSize(), + "locals should match GetVariables(False, True, False, False)", + ) + + def test_property_args(self): + """Test SBFrame extension property: args""" + frame, _ = self._get_frame() + + args_prop = frame.args + self.assertIsInstance(args_prop, lldb.SBValueList, "args should be SBValueList") + args_direct = frame.GetVariables(True, False, False, False) + self.assertEqual( + args_prop.GetSize(), + args_direct.GetSize(), + "args should match GetVariables(True, False, False, False)", + ) + + def test_property_arguments(self): + """Test SBFrame extension property: arguments (alias for args)""" + frame, _ = self._get_frame() + + arguments_prop = frame.arguments + self.assertIsInstance( + arguments_prop, lldb.SBValueList, "arguments should be SBValueList" + ) + args_prop = frame.args + self.assertEqual( + arguments_prop.GetSize(), + args_prop.GetSize(), + "arguments should match args", + ) + + def test_property_statics(self): + """Test SBFrame extension property: statics""" + frame, _ = self._get_frame() + + statics_prop = frame.statics + self.assertIsInstance( + statics_prop, lldb.SBValueList, "statics should be SBValueList" + ) + statics_direct = frame.GetVariables(False, False, True, False) + self.assertEqual( + statics_prop.GetSize(), + statics_direct.GetSize(), + "statics should match GetVariables(False, False, True, False)", + ) + + def test_property_registers(self): + """Test SBFrame extension property: registers""" + frame, _ = self._get_frame() + + registers = frame.registers + # registers returns an SBValueList that can be iterated. + self.assertTrue(hasattr(registers, "__iter__"), "registers should be iterable") + registers_direct = frame.GetRegisters() + # Compare by iterating and counting. + registers_count = sum(1 for _ in registers) + registers_direct_count = sum(1 for _ in registers_direct) + self.assertEqual( + registers_count, + registers_direct_count, + "registers should match GetRegisters()", + ) + + def test_property_regs(self): + """Test SBFrame extension property: regs (alias for registers)""" + frame, _ = self._get_frame() + + regs = frame.regs + self.assertTrue(hasattr(regs, "__iter__"), "regs should be iterable") + registers = frame.registers + regs_count = sum(1 for _ in regs) + registers_count = sum(1 for _ in registers) + self.assertEqual(regs_count, registers_count, "regs should match registers") + + def test_property_register(self): + """Test SBFrame extension property: register (flattened view)""" + frame, _ = self._get_frame() + + register = frame.register + self.assertIsNotNone(register, "register should not be None") + # register is a helper object with __iter__ and __getitem__. + reg_names = set() + for reg in register: + self.assertTrue(reg.IsValid(), "Register should be valid") + reg_names.add(reg.name) + + # Test register indexing by name. + if len(reg_names) > 0: + first_reg_name = list(reg_names)[0] + reg_by_name = register[first_reg_name] + self.assertTrue(reg_by_name.IsValid(), "Register by name should be valid") + self.assertEqual( + reg_by_name.name, first_reg_name, "Register name should match" + ) + + def test_property_reg(self): + """Test SBFrame extension property: reg (alias for register)""" + frame, _ = self._get_frame() + + reg = frame.reg + self.assertIsNotNone(reg, "reg should not be None") + register = frame.register + reg_names = set() + for r in reg: + reg_names.add(r.name) + reg_names2 = set() + for r in register: + reg_names2.add(r.name) + self.assertEqual(reg_names, reg_names2, "reg should match register") + + def test_property_parent(self): + """Test SBFrame extension property: parent""" + frame0, thread = self._get_frame() + + # If there's a parent frame (frame 1), test parent property. + if thread.GetNumFrames() > 1: + frame1 = thread.GetFrameAtIndex(1) + parent = frame0.parent + self.assertTrue(parent.IsValid(), "parent should be valid") + self.assertEqual( + parent.GetFrameID(), + frame1.GetFrameID(), + "parent should be the next frame", + ) + self.assertEqual( + parent.pc, frame1.GetPC(), "parent PC should match frame 1" + ) + + def test_property_child(self): + """Test SBFrame extension property: child""" + frame0, thread = self._get_frame() + + # Test child property (should be frame -1, which doesn't exist, so should return invalid). + child = frame0.child + # Child of frame 0 would be frame -1, which doesn't exist. + # So it should return an invalid frame. + if thread.GetNumFrames() == 1: + self.assertFalse(child.IsValid(), "child of only frame should be invalid") + + def test_method_get_all_variables(self): + """Test SBFrame extension method: get_all_variables()""" + frame, _ = self._get_frame() + + all_vars = frame.get_all_variables() + self.assertIsInstance( + all_vars, lldb.SBValueList, "get_all_variables should return SBValueList" + ) + all_vars_direct = frame.GetVariables(True, True, True, True) + self.assertEqual( + all_vars.GetSize(), + all_vars_direct.GetSize(), + "get_all_variables should match GetVariables(True, True, True, True)", + ) + + def test_method_get_arguments(self): + """Test SBFrame extension method: get_arguments()""" + frame, _ = self._get_frame() + + args = frame.get_arguments() + self.assertIsInstance( + args, lldb.SBValueList, "get_arguments should return SBValueList" + ) + args_direct = frame.GetVariables(True, False, False, False) + self.assertEqual( + args.GetSize(), + args_direct.GetSize(), + "get_arguments should match GetVariables(True, False, False, False)", + ) + + def test_method_get_locals(self): + """Test SBFrame extension method: get_locals()""" + frame, _ = self._get_frame() + + locals = frame.get_locals() + self.assertIsInstance( + locals, lldb.SBValueList, "get_locals should return SBValueList" + ) + locals_direct = frame.GetVariables(False, True, False, False) + self.assertEqual( + locals.GetSize(), + locals_direct.GetSize(), + "get_locals should match GetVariables(False, True, False, False)", + ) + + def test_method_get_statics(self): + """Test SBFrame extension method: get_statics()""" + frame, _ = self._get_frame() + + statics = frame.get_statics() + self.assertIsInstance( + statics, lldb.SBValueList, "get_statics should return SBValueList" + ) + statics_direct = frame.GetVariables(False, False, True, False) + self.assertEqual( + statics.GetSize(), + statics_direct.GetSize(), + "get_statics should match GetVariables(False, False, True, False)", + ) + + def test_method_var(self): + """Test SBFrame extension method: var()""" + frame, _ = self._get_frame() + + # Test var() method with a variable that should exist. + # First, let's see what variables are available. + all_vars = frame.GetVariables(True, True, True, True) + if all_vars.GetSize() > 0: + var_name = all_vars.GetValueAtIndex(0).GetName() + var_value = frame.var(var_name) + self.assertTrue(var_value.IsValid(), f"var('{var_name}') should be valid") + self.assertEqual( + var_value.GetName(), + var_name, + f"var('{var_name}') should return the correct variable", + ) + # Compare with GetValueForVariablePath. + var_direct = frame.GetValueForVariablePath(var_name) + self.assertEqual( + var_value.GetName(), + var_direct.GetName(), + "var() should match GetValueForVariablePath()", + ) + + # Test var() with non-existent variable. + invalid_var = frame.var("NonExistentVariable12345") + self.assertFalse( + invalid_var.IsValid(), "var() with non-existent variable should be invalid" + ) + + def test_method_get_parent_frame(self): + """Test SBFrame extension method: get_parent_frame()""" + frame0, thread = self._get_frame() + + # Test get_parent_frame. + if thread.GetNumFrames() > 1: + parent = frame0.get_parent_frame() + self.assertTrue( + parent.IsValid(), "get_parent_frame should return valid frame" + ) + frame1 = thread.GetFrameAtIndex(1) + self.assertEqual( + parent.GetFrameID(), + frame1.GetFrameID(), + "get_parent_frame should return frame 1", + ) + else: + # If there's only one frame, parent should be invalid. + parent = frame0.get_parent_frame() + # Note: get_parent_frame might return an invalid frame if idx+1 is out of bounds. + + def test_method_get_child_frame(self): + """Test SBFrame extension method: get_child_frame()""" + frame0, thread = self._get_frame() + + # Test get_child_frame (frame -1 doesn't exist, so should be invalid). + child = frame0.get_child_frame() + if thread.GetNumFrames() == 1: + self.assertFalse( + child.IsValid(), "get_child_frame of only frame should be invalid" + ) + + def test_special_method_int(self): + """Test SBFrame extension special method: __int__""" + frame0, _ = self._get_frame() + + # Test __int__ (converts frame to its frame ID). + frame_id = int(frame0) + self.assertIsInstance(frame_id, int, "__int__ should return an integer") + self.assertEqual( + frame_id, frame0.GetFrameID(), "__int__ should return frame ID" + ) + + def test_special_method_hex(self): + """Test SBFrame extension special method: __hex__""" + frame0, _ = self._get_frame() + + # Test __hex__ (converts frame to its PC). + # Note: __hex__ returns the PC as an integer, not a hex string. + # In Python 3, hex() builtin calls __index__ if __hex__ doesn't exist, + # but since __hex__ is defined, it will be called. + pc_hex = frame0.__hex__() + self.assertIsInstance(pc_hex, int, "__hex__ should return an integer (PC)") + self.assertEqual(pc_hex, frame0.GetPC(), "__hex__ should return PC") + + def test_special_method_eq(self): + """Test SBFrame extension special method: __eq__ and __ne__""" + frame0, thread = self._get_frame() + + # Test __eq__ and __ne__. + frame0_copy = thread.GetFrameAtIndex(0) + self.assertTrue(frame0 == frame0_copy, "Same frame should be equal") + self.assertFalse(frame0 != frame0_copy, "Same frame should not be not-equal") + + if thread.GetNumFrames() > 1: + frame1 = thread.GetFrameAtIndex(1) + self.assertFalse(frame0 == frame1, "Different frames should not be equal") + self.assertTrue(frame0 != frame1, "Different frames should be not-equal") + + def test_pc_property_settable(self): + """Test that pc property is settable""" + frame, _ = self._get_frame() + + original_pc = frame.GetPC() + # Test that we can set pc (though this might not work on all platforms). + # We'll just verify the property exists and can be read. + pc = frame.pc + self.assertIsInstance(pc, int, "pc should be readable") + # Note: Setting pc might not be supported on all platforms, so we just test reading. diff --git a/lldb/test/API/python_api/sbframe_extensions/main.c b/lldb/test/API/python_api/sbframe_extensions/main.c new file mode 100644 index 0000000000000..8e2d3ed8e5a5f --- /dev/null +++ b/lldb/test/API/python_api/sbframe_extensions/main.c @@ -0,0 +1,33 @@ +#include + +// Global and static variables for testing +int g_global_var = 42; +static int g_static_var = 100; + +// Function declarations +int func1(int arg1, char arg2); +int func2(int arg1, int arg2); + +int func1(int arg1, char arg2) { + static int static_var = 200; + int local1 = arg1 * 2; + char local2 = arg2; + // Set breakpoint here + return local1 + local2 + static_var; +} + +int func2(int arg1, int arg2) { + int local1 = arg1 + arg2; + int local2 = arg1 * arg2; + // Set breakpoint here + return func1(local1, 'X'); +} + +int main(int argc, char const *argv[]) { + int main_local = 10; + static int main_static = 50; + // Set breakpoint here + int result = func2(5, 7); + printf("Result: %d\n", result); + return 0; +} diff --git a/lldb/test/Shell/helper/toolchain.py b/lldb/test/Shell/helper/toolchain.py index b0d4e272d5646..0c8c39d37e089 100644 --- a/lldb/test/Shell/helper/toolchain.py +++ b/lldb/test/Shell/helper/toolchain.py @@ -226,7 +226,7 @@ def use_support_substitutions(config): except OSError: res = -1 if res == 0 and out: - sdk_path = str(out) + sdk_path = out.decode("utf-8") llvm_config.lit_config.note("using SDKROOT: %r" % sdk_path) host_flags += ["-isysroot", sdk_path] elif sys.platform != "win32": diff --git a/lldb/tools/lldb-dap/Handler/InitializeRequestHandler.cpp b/lldb/tools/lldb-dap/Handler/InitializeRequestHandler.cpp index 53e1810a5b0e0..2d30e089447f1 100644 --- a/lldb/tools/lldb-dap/Handler/InitializeRequestHandler.cpp +++ b/lldb/tools/lldb-dap/Handler/InitializeRequestHandler.cpp @@ -23,7 +23,7 @@ llvm::Expected InitializeRequestHandler::Run( const InitializeRequestArguments &arguments) const { // Store initialization arguments for later use in Launch/Attach. dap.clientFeatures = arguments.supportedFeatures; - dap.sourceInitFile = arguments.lldbExtSourceInitFile.value_or(true); + dap.sourceInitFile = arguments.lldbExtSourceInitFile; return dap.GetCapabilities(); } diff --git a/lldb/tools/lldb-dap/Protocol/ProtocolRequests.cpp b/lldb/tools/lldb-dap/Protocol/ProtocolRequests.cpp index d53a520ade39b..0a1d580bffd68 100644 --- a/lldb/tools/lldb-dap/Protocol/ProtocolRequests.cpp +++ b/lldb/tools/lldb-dap/Protocol/ProtocolRequests.cpp @@ -216,12 +216,13 @@ bool fromJSON(const json::Value &Params, InitializeRequestArguments &IRA, } return OM.map("adapterID", IRA.adapterID) && - OM.map("clientID", IRA.clientID) && - OM.map("clientName", IRA.clientName) && OM.map("locale", IRA.locale) && - OM.map("linesStartAt1", IRA.linesStartAt1) && - OM.map("columnsStartAt1", IRA.columnsStartAt1) && + OM.mapOptional("clientID", IRA.clientID) && + OM.mapOptional("clientName", IRA.clientName) && + OM.mapOptional("locale", IRA.locale) && + OM.mapOptional("linesStartAt1", IRA.linesStartAt1) && + OM.mapOptional("columnsStartAt1", IRA.columnsStartAt1) && OM.mapOptional("pathFormat", IRA.pathFormat) && - OM.map("$__lldb_sourceInitFile", IRA.lldbExtSourceInitFile); + OM.mapOptional("$__lldb_sourceInitFile", IRA.lldbExtSourceInitFile); } bool fromJSON(const json::Value &Params, Configuration &C, json::Path P) { diff --git a/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h b/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h index 37fc2465f6a05..6a85033ae7ef2 100644 --- a/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h +++ b/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h @@ -108,23 +108,23 @@ struct InitializeRequestArguments { std::string adapterID; /// The ID of the client using this adapter. - std::optional clientID; + std::string clientID; /// The human-readable name of the client using this adapter. - std::optional clientName; + std::string clientName; /// The ISO-639 locale of the client using this adapter, e.g. en-US or de-CH. - std::optional locale; + std::string locale; /// Determines in what format paths are specified. The default is `path`, /// which is the native format. PathFormat pathFormat = ePatFormatPath; /// If true all line numbers are 1-based (default). - std::optional linesStartAt1; + bool linesStartAt1 = true; /// If true all column numbers are 1-based (default). - std::optional columnsStartAt1; + bool columnsStartAt1 = true; /// The set of supported features reported by the client. llvm::DenseSet supportedFeatures; @@ -133,7 +133,7 @@ struct InitializeRequestArguments { /// @{ /// Source init files when initializing lldb::SBDebugger. - std::optional lldbExtSourceInitFile; + bool lldbExtSourceInitFile = true; /// @} }; diff --git a/lldb/unittests/DAP/ProtocolRequestsTest.cpp b/lldb/unittests/DAP/ProtocolRequestsTest.cpp index ba9aef1e5fcc5..a74c369924b8e 100644 --- a/lldb/unittests/DAP/ProtocolRequestsTest.cpp +++ b/lldb/unittests/DAP/ProtocolRequestsTest.cpp @@ -77,7 +77,7 @@ TEST(ProtocolRequestsTest, EvaluateArguments) { EXPECT_EQ(expected->expression, "hello world"); EXPECT_EQ(expected->context, eEvaluateContextRepl); - // Check required keys; + // Check required keys. EXPECT_THAT_EXPECTED(parse(R"({})"), FailedWithMessage("missing value at (root).expression")); } @@ -118,3 +118,67 @@ TEST(ProtocolRequestsTest, EvaluateResponseBody) { ASSERT_THAT_EXPECTED(expected_opt, llvm::Succeeded()); EXPECT_EQ(PrettyPrint(*expected_opt), PrettyPrint(body)); } + +TEST(ProtocolRequestsTest, InitializeRequestArguments) { + llvm::Expected expected = + parse(R"({"adapterID": "myid"})"); + ASSERT_THAT_EXPECTED(expected, llvm::Succeeded()); + EXPECT_EQ(expected->adapterID, "myid"); + + // Check optional keys. + expected = parse(R"({ + "adapterID": "myid", + "clientID": "myclientid", + "clientName": "lldb-dap-unit-tests", + "locale": "en-US", + "linesStartAt1": true, + "columnsStartAt1": true, + "pathFormat": "uri", + "supportsVariableType": true, + "supportsVariablePaging": true, + "supportsRunInTerminalRequest": true, + "supportsMemoryReferences": true, + "supportsProgressReporting": true, + "supportsInvalidatedEvent": true, + "supportsMemoryEvent": true, + "supportsArgsCanBeInterpretedByShell": true, + "supportsStartDebuggingRequest": true, + "supportsANSIStyling": true + })"); + ASSERT_THAT_EXPECTED(expected, llvm::Succeeded()); + EXPECT_EQ(expected->adapterID, "myid"); + EXPECT_EQ(expected->clientID, "myclientid"); + EXPECT_EQ(expected->clientName, "lldb-dap-unit-tests"); + EXPECT_EQ(expected->locale, "en-US"); + EXPECT_EQ(expected->linesStartAt1, true); + EXPECT_EQ(expected->columnsStartAt1, true); + EXPECT_EQ(expected->pathFormat, ePathFormatURI); + EXPECT_EQ(expected->supportedFeatures.contains(eClientFeatureVariableType), + true); + EXPECT_EQ( + expected->supportedFeatures.contains(eClientFeatureRunInTerminalRequest), + true); + EXPECT_EQ( + expected->supportedFeatures.contains(eClientFeatureMemoryReferences), + true); + EXPECT_EQ( + expected->supportedFeatures.contains(eClientFeatureProgressReporting), + true); + EXPECT_EQ( + expected->supportedFeatures.contains(eClientFeatureInvalidatedEvent), + true); + EXPECT_EQ(expected->supportedFeatures.contains(eClientFeatureMemoryEvent), + true); + EXPECT_EQ(expected->supportedFeatures.contains( + eClientFeatureArgsCanBeInterpretedByShell), + true); + EXPECT_EQ( + expected->supportedFeatures.contains(eClientFeatureStartDebuggingRequest), + true); + EXPECT_EQ(expected->supportedFeatures.contains(eClientFeatureANSIStyling), + true); + + // Check required keys. + EXPECT_THAT_EXPECTED(parse(R"({})"), + FailedWithMessage("missing value at (root).adapterID")); +} diff --git a/lldb/unittests/Expression/DWARFExpressionTest.cpp b/lldb/unittests/Expression/DWARFExpressionTest.cpp index 0126c408d8696..f264fb3ce94e5 100644 --- a/lldb/unittests/Expression/DWARFExpressionTest.cpp +++ b/lldb/unittests/Expression/DWARFExpressionTest.cpp @@ -1217,3 +1217,107 @@ TEST_F(DWARFExpressionMockProcessTestWithAArch, DW_op_deref_no_ptr_fixing) { llvm::Expected result_deref = evaluate_expr(expr_deref); EXPECT_THAT_EXPECTED(result_deref, ExpectLoadAddress(expected_value)); } + +TEST_F(DWARFExpressionMockProcessTest, deref_register) { + TestContext test_ctx; + constexpr uint32_t reg_r0 = 0x504; + MockMemory::Map memory = { + {{0x004, 4}, {0x1, 0x2, 0x3, 0x4}}, + {{0x504, 4}, {0xa, 0xb, 0xc, 0xd}}, + {{0x505, 4}, {0x5, 0x6, 0x7, 0x8}}, + }; + ASSERT_TRUE(CreateTestContext(&test_ctx, "i386-pc-linux", + RegisterValue(reg_r0), memory, memory)); + + ExecutionContext exe_ctx(test_ctx.process_sp); + MockDwarfDelegate delegate = MockDwarfDelegate::Dwarf5(); + auto Eval = [&](llvm::ArrayRef expr_data) { + ExecutionContext exe_ctx(test_ctx.process_sp); + return Evaluate(expr_data, {}, &delegate, &exe_ctx, + test_ctx.reg_ctx_sp.get()); + }; + + // Reads from the register r0. + // Sets the context to RegisterInfo so we know this is a register location. + EXPECT_THAT_EXPECTED(Eval({DW_OP_reg0}), + ExpectScalar(reg_r0, Value::ContextType::RegisterInfo)); + + // Reads from the location(register r0). + // Clears the context so we know this is a value not a location. + EXPECT_THAT_EXPECTED(Eval({DW_OP_reg0, DW_OP_deref}), + ExpectLoadAddress(reg_r0, Value::ContextType::Invalid)); + + // Reads from the location(register r0) and adds the value to the host buffer. + // The evaluator should implicitly convert it to a memory location when + // added to a composite value and should add the contents of memory[r0] + // to the host buffer. + EXPECT_THAT_EXPECTED(Eval({DW_OP_reg0, DW_OP_deref, DW_OP_piece, 4}), + ExpectHostAddress({0xa, 0xb, 0xc, 0xd})); + + // Reads from the location(register r0) and truncates the value to one byte. + // Clears the context so we know this is a value not a location. + EXPECT_THAT_EXPECTED( + Eval({DW_OP_reg0, DW_OP_deref_size, 1}), + ExpectLoadAddress(reg_r0 & 0xff, Value::ContextType::Invalid)); + + // Reads from the location(register r0) and truncates to one byte then adds + // the value to the host buffer. The evaluator should implicitly convert it to + // a memory location when added to a composite value and should add the + // contents of memory[r0 & 0xff] to the host buffer. + EXPECT_THAT_EXPECTED(Eval({DW_OP_reg0, DW_OP_deref_size, 1, DW_OP_piece, 4}), + ExpectHostAddress({0x1, 0x2, 0x3, 0x4})); + + // Reads from the register r0 + 1. + EXPECT_THAT_EXPECTED( + Eval({DW_OP_breg0, 1}), + ExpectLoadAddress(reg_r0 + 1, Value::ContextType::Invalid)); + + // Reads from address r0 + 1, which contains the bytes [5,6,7,8]. + EXPECT_THAT_EXPECTED( + Eval({DW_OP_breg0, 1, DW_OP_deref}), + ExpectLoadAddress(0x08070605, Value::ContextType::Invalid)); +} + +TEST_F(DWARFExpressionMockProcessTest, deref_implicit_value) { + TestContext test_ctx; + MockMemory::Map memory = { + {{0x4, 1}, {0x1}}, + {{0x4, 4}, {0x1, 0x2, 0x3, 0x4}}, + }; + ASSERT_TRUE(CreateTestContext(&test_ctx, "i386-pc-linux", {}, memory)); + + ExecutionContext exe_ctx(test_ctx.process_sp); + MockDwarfDelegate delegate = MockDwarfDelegate::Dwarf5(); + auto Eval = [&](llvm::ArrayRef expr_data) { + ExecutionContext exe_ctx(test_ctx.process_sp); + return Evaluate(expr_data, {}, &delegate, &exe_ctx, + test_ctx.reg_ctx_sp.get()); + }; + + // Creates an implicit location with a value of 4. + EXPECT_THAT_EXPECTED(Eval({DW_OP_lit4, DW_OP_stack_value}), + ExpectScalar(0x4)); + + // Creates an implicit location with a value of 4. The deref reads the value + // out of the location and implicitly converts it to a load address. + EXPECT_THAT_EXPECTED(Eval({DW_OP_lit4, DW_OP_stack_value, DW_OP_deref}), + ExpectLoadAddress(0x4)); + + // Creates an implicit location with a value of 0x504 (uleb128(0x504) = + // 0xa84). The deref reads the low byte out of the location and implicitly + // converts it to a load address. + EXPECT_THAT_EXPECTED( + Eval({DW_OP_constu, 0x84, 0xa, DW_OP_stack_value, DW_OP_deref_size, 1}), + ExpectLoadAddress(0x4)); + + // The tests below are similar to the ones above, but there is no implicit + // location created by a stack_value operation. They are provided here as a + // reference to contrast with the above tests. + EXPECT_THAT_EXPECTED(Eval({DW_OP_lit4}), ExpectLoadAddress(0x4)); + + EXPECT_THAT_EXPECTED(Eval({DW_OP_lit4, DW_OP_deref}), + ExpectLoadAddress(0x04030201)); + + EXPECT_THAT_EXPECTED(Eval({DW_OP_lit4, DW_OP_deref_size, 1}), + ExpectLoadAddress(0x01)); +} diff --git a/lldb/unittests/ScriptInterpreter/Python/PythonTestSuite.cpp b/lldb/unittests/ScriptInterpreter/Python/PythonTestSuite.cpp index a63b740d9472f..5694aeeff3e5b 100644 --- a/lldb/unittests/ScriptInterpreter/Python/PythonTestSuite.cpp +++ b/lldb/unittests/ScriptInterpreter/Python/PythonTestSuite.cpp @@ -136,6 +136,11 @@ lldb_private::python::LLDBSWIGPython_CastPyObjectToSBStream(PyObject *data) { return nullptr; } +void * +lldb_private::python::LLDBSWIGPython_CastPyObjectToSBThread(PyObject *data) { + return nullptr; +} + void * lldb_private::python::LLDBSWIGPython_CastPyObjectToSBFrame(PyObject *data) { return nullptr; diff --git a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h index 569407963695e..483afb426fa10 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h @@ -35,9 +35,6 @@ class LLVM_ABI InstructionSelector : public GIMatchTableExecutor { /// !isPreISelGenericOpcode(I.getOpcode()) virtual bool select(MachineInstr &I) = 0; - // FIXME: Eliminate dependency on TargetPassConfig for NewPM transition - const TargetPassConfig *TPC = nullptr; - MachineOptimizationRemarkEmitter *MORE = nullptr; /// Note: InstructionSelect does not track changed instructions. diff --git a/llvm/include/llvm/CodeGen/GlobalISel/RegBankSelect.h b/llvm/include/llvm/CodeGen/GlobalISel/RegBankSelect.h index 076c70d21bbdf..6060bb6144c62 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/RegBankSelect.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/RegBankSelect.h @@ -510,9 +510,6 @@ class RegBankSelect : public MachineFunctionPass { /// Optimization mode of the pass. Mode OptMode; - /// Current target configuration. Controls how the pass handles errors. - const TargetPassConfig *TPC; - /// Assign the register bank of each operand of \p MI. /// \return True on success, false otherwise. bool assignInstr(MachineInstr &MI); diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h index e1aa8eceefd3f..da2742e089f8f 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h @@ -155,12 +155,10 @@ LLVM_ABI bool isTriviallyDead(const MachineInstr &MI, /// Report an ISel error as a missed optimization remark to the LLVMContext's /// diagnostic stream. Set the FailedISel MachineFunction property. LLVM_ABI void reportGISelFailure(MachineFunction &MF, - const TargetPassConfig &TPC, MachineOptimizationRemarkEmitter &MORE, MachineOptimizationRemarkMissed &R); LLVM_ABI void reportGISelFailure(MachineFunction &MF, - const TargetPassConfig &TPC, MachineOptimizationRemarkEmitter &MORE, const char *PassName, StringRef Msg, const MachineInstr &MI); @@ -168,7 +166,6 @@ LLVM_ABI void reportGISelFailure(MachineFunction &MF, /// Report an ISel warning as a missed optimization remark to the LLVMContext's /// diagnostic stream. LLVM_ABI void reportGISelWarning(MachineFunction &MF, - const TargetPassConfig &TPC, MachineOptimizationRemarkEmitter &MORE, MachineOptimizationRemarkMissed &R); diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index b2697c81fd825..149366c69bdcc 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -1243,7 +1243,7 @@ class LLVM_ABI TargetLoweringBase { /// to a MemIntrinsicNode (touches memory). If this is the case, it returns /// true and store the intrinsic information into the IntrinsicInfo that was /// passed to the function. - virtual bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &, + virtual bool getTgtMemIntrinsic(IntrinsicInfo &, const CallBase &, MachineFunction &, unsigned /*Intrinsic*/) const { return false; diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td index 3787e2591a4c1..3b475c8d5614d 100644 --- a/llvm/include/llvm/IR/IntrinsicsARM.td +++ b/llvm/include/llvm/IR/IntrinsicsARM.td @@ -972,6 +972,13 @@ def int_arm_mve_vmaxnma_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty], [IntrNoMem]>; +def int_arm_mve_vminnm: DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem]>; +def int_arm_mve_vmaxnm: DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem]>; + multiclass MVEPredicated rets, list params, LLVMType pred = llvm_anyvector_ty, list props = [IntrNoMem], diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 60d3535ea6097..5f6718d6cbcd8 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -13998,7 +13998,15 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE, } namespace llvm { -raw_ostream &operator<<(raw_ostream &OS, ScalarEvolution::LoopDisposition LD) { +// Note: these overloaded operators need to be in the llvm namespace for them +// to be resolved correctly. If we put them outside the llvm namespace, the +// +// OS << ": " << SE.getLoopDisposition(SV, InnerL); +// +// code below "breaks" and start printing raw enum values as opposed to the +// string values. +static raw_ostream &operator<<(raw_ostream &OS, + ScalarEvolution::LoopDisposition LD) { switch (LD) { case ScalarEvolution::LoopVariant: OS << "Variant"; @@ -14013,7 +14021,8 @@ raw_ostream &operator<<(raw_ostream &OS, ScalarEvolution::LoopDisposition LD) { return OS; } -raw_ostream &operator<<(raw_ostream &OS, ScalarEvolution::BlockDisposition BD) { +static raw_ostream &operator<<(raw_ostream &OS, + llvm::ScalarEvolution::BlockDisposition BD) { switch (BD) { case ScalarEvolution::DoesNotDominateBlock: OS << "DoesNotDominate"; diff --git a/llvm/lib/Analysis/ScalarEvolutionDivision.cpp b/llvm/lib/Analysis/ScalarEvolutionDivision.cpp index bce41f9f5329e..4e422539ff9f6 100644 --- a/llvm/lib/Analysis/ScalarEvolutionDivision.cpp +++ b/llvm/lib/Analysis/ScalarEvolutionDivision.cpp @@ -29,8 +29,6 @@ class Type; using namespace llvm; -namespace { - static inline int sizeOfSCEV(const SCEV *S) { struct FindSCEVSize { int Size = 0; @@ -52,8 +50,6 @@ static inline int sizeOfSCEV(const SCEV *S) { return F.Size; } -} // namespace - // Computes the Quotient and Remainder of the division of Numerator by // Denominator. void SCEVDivision::divide(ScalarEvolution &SE, const SCEV *Numerator, diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index dbceb8e557849..eb8650fd0eb60 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -5877,6 +5877,12 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts, break; } case Instruction::ShuffleVector: { + // Handle vector splat idiom + if (Value *Splat = getSplatValue(V)) { + computeKnownFPClass(Splat, Known, InterestedClasses, Q, Depth + 1); + break; + } + // For undef elements, we don't know anything about the common state of // the shuffle result. APInt DemandedLHS, DemandedRHS; diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 88e554244c663..ad821231035f0 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -111,17 +111,18 @@ INITIALIZE_PASS_END(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI", false, false) static void reportTranslationError(MachineFunction &MF, - const TargetPassConfig &TPC, OptimizationRemarkEmitter &ORE, OptimizationRemarkMissed &R) { MF.getProperties().setFailedISel(); + bool IsGlobalISelAbortEnabled = + MF.getTarget().Options.GlobalISelAbort == GlobalISelAbortMode::Enable; // Print the function name explicitly if we don't have a debug location (which // makes the diagnostic less useful) or if we're going to emit a raw error. - if (!R.getLocation().isValid() || TPC.isGlobalISelAbortEnabled()) + if (!R.getLocation().isValid() || IsGlobalISelAbortEnabled) R << (" (in function: " + MF.getName() + ")").str(); - if (TPC.isGlobalISelAbortEnabled()) + if (IsGlobalISelAbortEnabled) report_fatal_error(Twine(R.getMsg())); else ORE.emit(R); @@ -242,7 +243,7 @@ ArrayRef IRTranslator::getOrCreateVRegs(const Value &Val) { MF->getFunction().getSubprogram(), &MF->getFunction().getEntryBlock()); R << "unable to translate constant: " << ore::NV("Type", Val.getType()); - reportTranslationError(*MF, *TPC, *ORE, R); + reportTranslationError(*MF, *ORE, R); return *VRegs; } } @@ -279,7 +280,7 @@ Align IRTranslator::getMemOpAlign(const Instruction &I) { OptimizationRemarkMissed R("gisel-irtranslator", "", &I); R << "unable to translate memop: " << ore::NV("Opcode", &I); - reportTranslationError(*MF, *TPC, *ORE, R); + reportTranslationError(*MF, *ORE, R); return Align(1); } @@ -4150,7 +4151,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure", F.getSubprogram(), &F.getEntryBlock()); R << "unable to translate in big endian mode"; - reportTranslationError(*MF, *TPC, *ORE, R); + reportTranslationError(*MF, *ORE, R); return false; } @@ -4194,7 +4195,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { F.getSubprogram(), &F.getEntryBlock()); R << "unable to lower function: " << ore::NV("Prototype", F.getFunctionType()); - reportTranslationError(*MF, *TPC, *ORE, R); + reportTranslationError(*MF, *ORE, R); return false; } @@ -4217,7 +4218,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { F.getSubprogram(), &F.getEntryBlock()); R << "unable to lower arguments: " << ore::NV("Prototype", F.getFunctionType()); - reportTranslationError(*MF, *TPC, *ORE, R); + reportTranslationError(*MF, *ORE, R); return false; } @@ -4268,7 +4269,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { R << ": '" << InstStrStorage << "'"; } - reportTranslationError(*MF, *TPC, *ORE, R); + reportTranslationError(*MF, *ORE, R); return false; } @@ -4276,7 +4277,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure", BB->getTerminator()->getDebugLoc(), BB); R << "unable to translate basic block"; - reportTranslationError(*MF, *TPC, *ORE, R); + reportTranslationError(*MF, *ORE, R); return false; } } diff --git a/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp index 2dd22c8a7e8ba..1d281ab83aacc 100644 --- a/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp +++ b/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp @@ -137,7 +137,6 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { return false; ISel = MF.getSubtarget().getInstructionSelector(); - ISel->TPC = &getAnalysis(); // FIXME: Properly override OptLevel in TargetMachine. See OptLevelChanger CodeGenOptLevel OldOptLevel = OptLevel; @@ -159,7 +158,6 @@ bool InstructionSelect::selectMachineFunction(MachineFunction &MF) { LLVM_DEBUG(dbgs() << "Selecting function: " << MF.getName() << '\n'); assert(ISel && "Cannot work without InstructionSelector"); - const TargetPassConfig &TPC = *ISel->TPC; CodeGenCoverage CoverageInfo; ISel->setupMF(MF, VT, &CoverageInfo, PSI, BFI); @@ -177,8 +175,8 @@ bool InstructionSelect::selectMachineFunction(MachineFunction &MF) { // property check already is. if (!DisableGISelLegalityCheck) if (const MachineInstr *MI = machineFunctionIsIllegal(MF)) { - reportGISelFailure(MF, TPC, MORE, "gisel-select", - "instruction is not legal", *MI); + reportGISelFailure(MF, MORE, "gisel-select", "instruction is not legal", + *MI); return false; } // FIXME: We could introduce new blocks and will need to fix the outer loop. @@ -215,8 +213,7 @@ bool InstructionSelect::selectMachineFunction(MachineFunction &MF) { if (!selectInstr(MI)) { LLVM_DEBUG(dbgs() << "Selection failed!\n"; MIIMaintainer.reportFullyCreatedInstrs()); - reportGISelFailure(MF, TPC, MORE, "gisel-select", "cannot select", - MI); + reportGISelFailure(MF, MORE, "gisel-select", "cannot select", MI); return false; } LLVM_DEBUG(MIIMaintainer.reportFullyCreatedInstrs()); @@ -279,7 +276,7 @@ bool InstructionSelect::selectMachineFunction(MachineFunction &MF) { const TargetRegisterClass *RC = MRI.getRegClassOrNull(VReg); if (!RC) { - reportGISelFailure(MF, TPC, MORE, "gisel-select", + reportGISelFailure(MF, MORE, "gisel-select", "VReg has no regclass after selection", *MI); return false; } @@ -288,7 +285,7 @@ bool InstructionSelect::selectMachineFunction(MachineFunction &MF) { if (Ty.isValid() && TypeSize::isKnownGT(Ty.getSizeInBits(), TRI.getRegSizeInBits(*RC))) { reportGISelFailure( - MF, TPC, MORE, "gisel-select", + MF, MORE, "gisel-select", "VReg's low-level type and register class have different sizes", *MI); return false; } @@ -299,7 +296,7 @@ bool InstructionSelect::selectMachineFunction(MachineFunction &MF) { MF.getFunction().getSubprogram(), /*MBB=*/nullptr); R << "inserting blocks is not supported yet"; - reportGISelFailure(MF, TPC, MORE, R); + reportGISelFailure(MF, MORE, R); return false; } #endif diff --git a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp index aef16b5f33af4..0f0656aaa4f45 100644 --- a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp @@ -348,7 +348,7 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) { *MIRBuilder, VT); if (Result.FailedOn) { - reportGISelFailure(MF, TPC, MORE, "gisel-legalize", + reportGISelFailure(MF, MORE, "gisel-legalize", "unable to legalize instruction", *Result.FailedOn); return false; } @@ -360,7 +360,7 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) { R << "lost " << ore::NV("NumLostDebugLocs", LocObserver.getNumLostDebugLocs()) << " debug locations during pass"; - reportGISelWarning(MF, TPC, MORE, R); + reportGISelWarning(MF, MORE, R); // Example remark: // --- !Missed // Pass: gisel-legalize diff --git a/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp index bcb4f1c551cfd..5db631be32acd 100644 --- a/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp +++ b/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp @@ -39,6 +39,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" #include #include #include @@ -83,7 +84,6 @@ void RegBankSelect::init(MachineFunction &MF) { assert(RBI && "Cannot work without RegisterBankInfo"); MRI = &MF.getRegInfo(); TRI = MF.getSubtarget().getRegisterInfo(); - TPC = &getAnalysis(); if (OptMode != Mode::Fast) { MBFI = &getAnalysis().getMBFI(); MBPI = &getAnalysis().getMBPI(); @@ -308,7 +308,8 @@ const RegisterBankInfo::InstructionMapping &RegBankSelect::findBestMapping( RepairPts.emplace_back(std::move(RepairPt)); } } - if (!BestMapping && !TPC->isGlobalISelAbortEnabled()) { + if (!BestMapping && MI.getMF()->getTarget().Options.GlobalISelAbort != + GlobalISelAbortMode::Enable) { // If none of the mapping worked that means they are all impossible. // Thus, pick the first one and set an impossible repairing point. // It will trigger the failed isel mode. @@ -708,7 +709,7 @@ bool RegBankSelect::assignRegisterBanks(MachineFunction &MF) { continue; if (!assignInstr(MI)) { - reportGISelFailure(MF, *TPC, *MORE, "gisel-regbankselect", + reportGISelFailure(MF, *MORE, "gisel-regbankselect", "unable to map instruction", MI); return false; } @@ -722,7 +723,7 @@ bool RegBankSelect::checkFunctionIsLegal(MachineFunction &MF) const { #ifndef NDEBUG if (!DisableGISelLegalityCheck) { if (const MachineInstr *MI = machineFunctionIsIllegal(MF)) { - reportGISelFailure(MF, *TPC, *MORE, "gisel-regbankselect", + reportGISelFailure(MF, *MORE, "gisel-regbankselect", "instruction is not legal", *MI); return false; } diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index bc01cb65c4a69..15e81f5773b69 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -234,11 +234,11 @@ bool llvm::isTriviallyDead(const MachineInstr &MI, static void reportGISelDiagnostic(DiagnosticSeverity Severity, MachineFunction &MF, - const TargetPassConfig &TPC, MachineOptimizationRemarkEmitter &MORE, MachineOptimizationRemarkMissed &R) { - bool IsFatal = Severity == DS_Error && - TPC.isGlobalISelAbortEnabled(); + bool IsGlobalISelAbortEnabled = + MF.getTarget().Options.GlobalISelAbort == GlobalISelAbortMode::Enable; + bool IsFatal = Severity == DS_Error && IsGlobalISelAbortEnabled; // Print the function name explicitly if we don't have a debug location (which // makes the diagnostic less useful) or if we're going to emit a raw error. if (!R.getLocation().isValid() || IsFatal) @@ -250,20 +250,20 @@ static void reportGISelDiagnostic(DiagnosticSeverity Severity, MORE.emit(R); } -void llvm::reportGISelWarning(MachineFunction &MF, const TargetPassConfig &TPC, +void llvm::reportGISelWarning(MachineFunction &MF, MachineOptimizationRemarkEmitter &MORE, MachineOptimizationRemarkMissed &R) { - reportGISelDiagnostic(DS_Warning, MF, TPC, MORE, R); + reportGISelDiagnostic(DS_Warning, MF, MORE, R); } -void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC, +void llvm::reportGISelFailure(MachineFunction &MF, MachineOptimizationRemarkEmitter &MORE, MachineOptimizationRemarkMissed &R) { MF.getProperties().setFailedISel(); - reportGISelDiagnostic(DS_Error, MF, TPC, MORE, R); + reportGISelDiagnostic(DS_Error, MF, MORE, R); } -void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC, +void llvm::reportGISelFailure(MachineFunction &MF, MachineOptimizationRemarkEmitter &MORE, const char *PassName, StringRef Msg, const MachineInstr &MI) { @@ -271,9 +271,10 @@ void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC, MI.getDebugLoc(), MI.getParent()); R << Msg; // Printing MI is expensive; only do it if expensive remarks are enabled. - if (TPC.isGlobalISelAbortEnabled() || MORE.allowExtraAnalysis(PassName)) + if (MF.getTarget().Options.GlobalISelAbort == GlobalISelAbortMode::Enable || + MORE.allowExtraAnalysis(PassName)) R << ": " << ore::MNV("Inst", MI); - reportGISelFailure(MF, TPC, MORE, R); + reportGISelFailure(MF, MORE, R); } unsigned llvm::getInverseGMinMaxOpcode(unsigned MinMaxOpc) { diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index a02af59600c44..4e242311e290f 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -19,7 +19,6 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/StackSafetyAnalysis.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/Bitcode/BitcodeWriter.h" diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index d3d57b99a6345..a83185d6ade20 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -17227,7 +17227,7 @@ SDValue AArch64TargetLowering::LowerVSCALE(SDValue Op, template static bool setInfoSVEStN(const AArch64TargetLowering &TLI, const DataLayout &DL, - AArch64TargetLowering::IntrinsicInfo &Info, const CallInst &CI) { + AArch64TargetLowering::IntrinsicInfo &Info, const CallBase &CI) { Info.opc = ISD::INTRINSIC_VOID; // Retrieve EC from first vector argument. const EVT VT = TLI.getMemValueType(DL, CI.getArgOperand(0)->getType()); @@ -17252,7 +17252,7 @@ setInfoSVEStN(const AArch64TargetLowering &TLI, const DataLayout &DL, /// MemIntrinsicNodes. The associated MachineMemOperands record the alignment /// specified in the intrinsic calls. bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, - const CallInst &I, + const CallBase &I, MachineFunction &MF, unsigned Intrinsic) const { auto &DL = I.getDataLayout(); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index b6d34f97c7b48..1d4446d287462 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -206,7 +206,7 @@ class AArch64TargetLowering : public TargetLowering { EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override; - bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, + bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallBase &I, MachineFunction &MF, unsigned Intrinsic) const override; diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 5dea64844e64e..215ef67a9bc4f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -2124,7 +2124,7 @@ def FeatureISAVersion12 : FeatureSet< FeatureBVHDualAndBVH8Insts, FeatureWaitsBeforeSystemScopeStores, FeatureD16Writes32BitVgpr, - FeatureCubeInsts, + FeatureCubeInsts, FeatureLerpInst, FeatureSadInsts, FeatureQsadInsts, @@ -2137,7 +2137,6 @@ def FeatureISAVersion12_50_Common : FeatureSet< [FeatureGFX12, FeatureGFX1250Insts, FeatureRequiresAlignedVGPRs, - FeatureAddressableLocalMemorySize327680, FeatureCuMode, Feature1024AddressableVGPRs, Feature64BitLiterals, @@ -2206,17 +2205,18 @@ def FeatureISAVersion12_50_Common : FeatureSet< FeatureXNACK, FeatureClusters, FeatureD16Writes32BitVgpr, -]>; - -def FeatureISAVersion12_50 : FeatureSet< - !listconcat(FeatureISAVersion12_50_Common.Features, - [FeatureCubeInsts, + FeatureCubeInsts, FeatureLerpInst, FeatureSadInsts, FeatureQsadInsts, FeatureCvtNormInsts, FeatureCvtPkNormVOP2Insts, - FeatureCvtPkNormVOP3Insts])>; + FeatureCvtPkNormVOP3Insts +]>; + +def FeatureISAVersion12_50 : FeatureSet< + !listconcat(FeatureISAVersion12_50_Common.Features, + [FeatureAddressableLocalMemorySize327680])>; def FeatureISAVersion12_51 : FeatureSet< !listconcat(FeatureISAVersion12_50.Features, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 78a3ec7f0c266..8698e816ddbb9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -4451,16 +4451,14 @@ bool AMDGPUDAGToDAGISel::isUniformLoad(const SDNode *N) const { const auto *Ld = cast(N); const MachineMemOperand *MMO = Ld->getMemOperand(); - if (Ld->isDivergent()) { - // FIXME: We ought to able able to take the direct isDivergent result. We - // cannot rely on the MMO for a uniformity check, and should stop using - // it. This is a hack for 2 ways that the IR divergence analysis is superior - // to the DAG divergence: Recognizing shift-of-workitem-id as always - // uniform, and isSingleLaneExecution. These should be handled in the DAG - // version, and then this can be dropped. - if (!MMO->getValue() || !AMDGPU::isUniformMMO(MMO)) - return false; - } + // FIXME: We ought to able able to take the direct isDivergent result. We + // cannot rely on the MMO for a uniformity check, and should stop using + // it. This is a hack for 2 ways that the IR divergence analysis is superior + // to the DAG divergence: Recognizing shift-of-workitem-id as always + // uniform, and isSingleLaneExecution. These should be handled in the DAG + // version, and then this can be dropped. + if (Ld->isDivergent() && !AMDGPU::isUniformMMO(MMO)) + return false; return MMO->getSize().hasValue() && Ld->getAlign() >= diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp index b7b87674ee658..2b1f4048947bf 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp @@ -35,15 +35,13 @@ bool AMDGPU::isUniformMMO(const MachineMemOperand *MMO) { PSV->isJumpTable(); } - // FIXME: null value is should be treated as unknown, not as uniform. - return true; + // Unknown value. + return false; } // UndefValue means this is a load of a kernel input. These are uniform. // Sometimes LDS instructions have constant pointers. - // If Ptr is null, then that means this mem operand contains a - // PseudoSourceValue like GOT. - if (!Ptr || isa(Ptr)) + if (isa(Ptr)) return true; if (MMO->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 6e41134bb3bc1..ae62dbe1cc706 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -2362,7 +2362,7 @@ Register AMDGPULegalizerInfo::getSegmentAperture( if (!loadInputValue(QueuePtr, B, AMDGPUFunctionArgInfo::QUEUE_PTR)) return Register(); - // TODO: can we be smarter about machine pointer info? + // TODO: Use custom PseudoSourceValue MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS); // Offset into amd_queue_t for group_segment_aperture_base_hi / diff --git a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp index 5c80a9762ff69..11cafdec8d3c3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp @@ -100,8 +100,8 @@ class AMDGPURewriteAGPRCopyMFMAImpl { /// Compute the register class constraints based on the uses of \p Reg, /// excluding MFMA uses from which can be rewritten to change the register - /// class constraint. This should be nearly identical to - /// MachineRegisterInfo::recomputeRegClass. + /// class constraint. MFMA scale operands need to be constraint checked. + /// This should be nearly identical to MachineRegisterInfo::recomputeRegClass. /// \p RewriteCandidates will collect the set of MFMA instructions that need /// to have the opcode mutated to perform the replacement. @@ -155,9 +155,16 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::recomputeRegClassExceptRewritable( // We can swap the classes of dst + src2 as a pair to AGPR, so ignore the // effects of rewrite candidates. It just so happens that we can use - // either AGPR or VGPR in src0/src1, so don't bother checking the - // constraint effects of the individual operands. + // either AGPR or VGPR in src0/src1. We still need to check constraint + // effects for scale variant, which does not allow AGPR. if (isRewriteCandidate(*MI)) { + int AGPROp = AMDGPU::getMFMASrcCVDstAGPROp(MI->getOpcode()); + const MCInstrDesc &AGPRDesc = TII.get(AGPROp); + const TargetRegisterClass *NewRC = + TII.getRegClass(AGPRDesc, MO.getOperandNo()); + if (!TRI.hasAGPRs(NewRC)) + return false; + const MachineOperand *VDst = TII.getNamedOperand(*MI, AMDGPU::OpName::vdst); const MachineOperand *Src2 = diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 55370a54f07a0..70f8c0cc2ae9c 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1309,7 +1309,7 @@ static unsigned getIntrMemWidth(unsigned IntrID) { } } -static void getCoopAtomicOperandsInfo(const CallInst &CI, bool IsLoad, +static void getCoopAtomicOperandsInfo(const CallBase &CI, bool IsLoad, TargetLoweringBase::IntrinsicInfo &Info) { Value *OrderingArg = CI.getArgOperand(IsLoad ? 1 : 2); unsigned Ord = cast(OrderingArg)->getZExtValue(); @@ -1339,7 +1339,7 @@ static void getCoopAtomicOperandsInfo(const CallInst &CI, bool IsLoad, } bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, - const CallInst &CI, + const CallBase &CI, MachineFunction &MF, unsigned IntrID) const { Info.flags = MachineMemOperand::MONone; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index 40c03ca024c6c..fb162948caf4c 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -334,7 +334,7 @@ class SITargetLowering final : public AMDGPUTargetLowering { MVT getPointerTy(const DataLayout &DL, unsigned AS) const override; MVT getPointerMemTy(const DataLayout &DL, unsigned AS) const override; - bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &, + bool getTgtMemIntrinsic(IntrinsicInfo &, const CallBase &, MachineFunction &MF, unsigned IntrinsicID) const override; diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 1a8c470600394..2d26c67a8077a 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -20665,7 +20665,7 @@ bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, /// MemIntrinsicNodes. The associated MachineMemOperands record the alignment /// specified in the intrinsic calls. bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, - const CallInst &I, + const CallBase &I, MachineFunction &MF, unsigned Intrinsic) const { switch (Intrinsic) { diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index 8191eb40a712a..d0fb58c764edd 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -315,8 +315,7 @@ class VectorType; bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize = false) const override; - bool getTgtMemIntrinsic(IntrinsicInfo &Info, - const CallInst &I, + bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallBase &I, MachineFunction &MF, unsigned Intrinsic) const override; diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index f9aaacb7f5250..097318711d137 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -393,6 +393,12 @@ def vsub : PatFrags<(ops node:$lhs, node:$rhs), def vmul : PatFrags<(ops node:$lhs, node:$rhs), [(fmul node:$lhs, node:$rhs), (int_arm_mve_vmul node:$lhs, node:$rhs)]>; +def vminnm : PatFrags<(ops node:$lhs, node:$rhs), + [(fminnum node:$lhs, node:$rhs), + (int_arm_mve_vminnm node:$lhs, node:$rhs)]>; +def vmaxnm : PatFrags<(ops node:$lhs, node:$rhs), + [(fmaxnum node:$lhs, node:$rhs), + (int_arm_mve_vmaxnm node:$lhs, node:$rhs)]>; // --------- Start of base classes for the instructions themselves @@ -1489,7 +1495,7 @@ class MVE_VMINMAXNM sz, bit bit_21, let validForTailPredication = 1; } -multiclass MVE_VMINMAXNM_m { +multiclass MVE_VMINMAXNM_m { def "" : MVE_VMINMAXNM; let Predicates = [HasMVEFloat] in { @@ -1497,10 +1503,10 @@ multiclass MVE_VMINMAXNM_m; -defm MVE_VMAXNMf16 : MVE_VMINMAXNM_m<"vmaxnm", 0b0, MVE_v8f16, fmaxnum, int_arm_mve_max_predicated>; -defm MVE_VMINNMf32 : MVE_VMINMAXNM_m<"vminnm", 0b1, MVE_v4f32, fminnum, int_arm_mve_min_predicated>; -defm MVE_VMINNMf16 : MVE_VMINMAXNM_m<"vminnm", 0b1, MVE_v8f16, fminnum, int_arm_mve_min_predicated>; +defm MVE_VMAXNMf32 : MVE_VMINMAXNM_m<"vmaxnm", 0b0, MVE_v4f32, vmaxnm, int_arm_mve_max_predicated>; +defm MVE_VMAXNMf16 : MVE_VMINMAXNM_m<"vmaxnm", 0b0, MVE_v8f16, vmaxnm, int_arm_mve_max_predicated>; +defm MVE_VMINNMf32 : MVE_VMINMAXNM_m<"vminnm", 0b1, MVE_v4f32, vminnm, int_arm_mve_min_predicated>; +defm MVE_VMINNMf16 : MVE_VMINMAXNM_m<"vminnm", 0b1, MVE_v8f16, vminnm, int_arm_mve_min_predicated>; class MVE_VMINMAX size, @@ -4148,7 +4154,7 @@ class MVE_VMAXMINNMA size, bit bit_12, } multiclass MVE_VMAXMINNMA_m { def "" : MVE_VMAXMINNMA; defvar Inst = !cast(NAME); @@ -4168,13 +4174,13 @@ multiclass MVE_VMAXMINNMA_m - : MVE_VMAXMINNMA_m<"vmaxnma", VTI, fmaxnum, int_arm_mve_vmaxnma_predicated, bit_12>; + : MVE_VMAXMINNMA_m<"vmaxnma", VTI, vmaxnm, int_arm_mve_vmaxnma_predicated, bit_12>; defm MVE_VMAXNMAf32 : MVE_VMAXNMA; defm MVE_VMAXNMAf16 : MVE_VMAXNMA; multiclass MVE_VMINNMA - : MVE_VMAXMINNMA_m<"vminnma", VTI, fminnum, int_arm_mve_vminnma_predicated, bit_12>; + : MVE_VMAXMINNMA_m<"vminnma", VTI, vminnm, int_arm_mve_vminnma_predicated, bit_12>; defm MVE_VMINNMAf32 : MVE_VMINNMA; defm MVE_VMINNMAf16 : MVE_VMINNMA; diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp index 5767a74513e8d..bae9d705f5a7a 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -2115,7 +2115,7 @@ static Value *getUnderLyingObjectForBrevLdIntr(Value *V) { /// true and store the intrinsic information into the IntrinsicInfo that was /// passed to the function. bool HexagonTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, - const CallInst &I, + const CallBase &I, MachineFunction &MF, unsigned Intrinsic) const { switch (Intrinsic) { diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h index f4d2a79051c10..cde8b5ba8d8a7 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h @@ -145,7 +145,7 @@ class HexagonTargetLowering : public TargetLowering { const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, SelectionDAG& DAG) const; - bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, + bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallBase &I, MachineFunction &MF, unsigned Intrinsic) const override; diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index ba9d0682b26dd..32ea2198f7898 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -8912,7 +8912,7 @@ bool LoongArchTargetLowering::hasAndNot(SDValue Y) const { } bool LoongArchTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, - const CallInst &I, + const CallBase &I, MachineFunction &MF, unsigned Intrinsic) const { switch (Intrinsic) { diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h index 0c09fb6afd2d1..5277e7e3e74ca 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -78,7 +78,7 @@ class LoongArchTargetLowering : public TargetLowering { Value *NewVal, Value *Mask, AtomicOrdering Ord) const override; - bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, + bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallBase &I, MachineFunction &MF, unsigned Intrinsic) const override; diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 8b72b1e1f3a52..5081a093d4c34 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -4077,9 +4077,10 @@ void NVPTXTargetLowering::LowerAsmOperandForConstraint( // because we need the information that is only available in the "Value" type // of destination // pointer. In particular, the address space information. -bool NVPTXTargetLowering::getTgtMemIntrinsic( - IntrinsicInfo &Info, const CallInst &I, - MachineFunction &MF, unsigned Intrinsic) const { +bool NVPTXTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, + const CallBase &I, + MachineFunction &MF, + unsigned Intrinsic) const { switch (Intrinsic) { default: return false; diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h index dd8e49de7aa6a..cb0a1aa5dc892 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h @@ -32,7 +32,7 @@ class NVPTXTargetLowering : public TargetLowering { const NVPTXSubtarget &STI); SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; - bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, + bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallBase &I, MachineFunction &MF, unsigned Intrinsic) const override; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 1a9310c46cd1d..51212837fbb17 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -18495,7 +18495,7 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { } bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, - const CallInst &I, + const CallBase &I, MachineFunction &MF, unsigned Intrinsic) const { switch (Intrinsic) { diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 74af055ed5d30..daae839479c3c 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -492,8 +492,7 @@ namespace llvm { bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; - bool getTgtMemIntrinsic(IntrinsicInfo &Info, - const CallInst &I, + bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallBase &I, MachineFunction &MF, unsigned Intrinsic) const override; diff --git a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp index 3d5a55c631301..1e5d0a4297465 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp @@ -1569,7 +1569,7 @@ bool RISCVInstructionSelector::selectAddr(MachineInstr &MI, switch (TM.getCodeModel()) { default: { - reportGISelFailure(*MF, *TPC, *MORE, getName(), + reportGISelFailure(*MF, *MORE, getName(), "Unsupported code model for lowering", MI); return false; } diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 039bd55718e24..ab2652eac3823 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1868,7 +1868,7 @@ bool RISCVTargetLowering::shouldExpandCttzElements(EVT VT) const { } bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, - const CallInst &I, + const CallBase &I, MachineFunction &MF, unsigned Intrinsic) const { auto &DL = I.getDataLayout(); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index 69fcada6494a2..8a55a5634452c 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -35,7 +35,7 @@ class RISCVTargetLowering : public TargetLowering { const RISCVSubtarget &getSubtarget() const { return Subtarget; } - bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, + bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallBase &I, MachineFunction &MF, unsigned Intrinsic) const override; bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp index ae81d38579c18..0fb44052527f0 100644 --- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp @@ -883,10 +883,12 @@ SPIRVType *SPIRVGlobalRegistry::getOpTypeArray(uint32_t NumElems, .addUse(NumElementsVReg); }); } else { - assert(ST.isShader() && "Runtime arrays are not allowed in non-shader " - "SPIR-V modules."); - if (!ST.isShader()) + if (!ST.isShader()) { + llvm::reportFatalUsageError( + "Runtime arrays are not allowed in non-shader " + "SPIR-V modules"); return nullptr; + } ArrayType = createOpType(MIRBuilder, [&](MachineIRBuilder &MIRBuilder) { return MIRBuilder.buildInstr(SPIRV::OpTypeRuntimeArray) .addDef(createTypeVReg(MIRBuilder)) diff --git a/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp index 0ba6589c68944..36fa5fa9a70cb 100644 --- a/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp @@ -94,7 +94,7 @@ MVT SPIRVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, } bool SPIRVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, - const CallInst &I, + const CallBase &I, MachineFunction &MF, unsigned Intrinsic) const { unsigned AlignIdx = 3; diff --git a/llvm/lib/Target/SPIRV/SPIRVISelLowering.h b/llvm/lib/Target/SPIRV/SPIRVISelLowering.h index 3d31a116bad4a..5746832c8fd95 100644 --- a/llvm/lib/Target/SPIRV/SPIRVISelLowering.h +++ b/llvm/lib/Target/SPIRV/SPIRVISelLowering.h @@ -48,7 +48,7 @@ class SPIRVTargetLowering : public TargetLowering { EVT VT) const override; MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override; - bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, + bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallBase &I, MachineFunction &MF, unsigned Intrinsic) const override; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 98cb7aba562c4..e0c527b9b2581 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -1060,7 +1060,7 @@ EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL, } bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, - const CallInst &I, + const CallBase &I, MachineFunction &MF, unsigned Intrinsic) const { switch (Intrinsic) { diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h index f7052989b3c75..c37970f458e36 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h @@ -58,7 +58,7 @@ class WebAssemblyTargetLowering final : public TargetLowering { bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override; - bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, + bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallBase &I, MachineFunction &MF, unsigned Intrinsic) const override; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td index eb692679f5971..991507e883f28 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td @@ -109,6 +109,10 @@ def : Pat<(rotr I64:$lhs, (and I64:$rhs, 63)), (ROTR_I64 I64:$lhs, I64:$rhs)>; def : Pat<(shl I64:$lhs, (zext (and I32:$rhs, 63))), (SHL_I64 I64:$lhs, (I64_EXTEND_U_I32 I32:$rhs))>; +def : Pat<(sra I64:$lhs, (zext (and I32:$rhs, 63))), + (SHR_S_I64 I64:$lhs, (I64_EXTEND_U_I32 I32:$rhs))>; +def : Pat<(srl I64:$lhs, (zext (and I32:$rhs, 63))), + (SHR_U_I64 I64:$lhs, (I64_EXTEND_U_I32 I32:$rhs))>; defm SELECT_I32 : I<(outs I32:$dst), (ins I32:$lhs, I32:$rhs, I32:$cond), (outs), (ins), diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 9da121dd9ab87..d46f0f1572f1f 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -3104,7 +3104,7 @@ static bool useVPTERNLOG(const X86Subtarget &Subtarget, MVT VT) { } bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, - const CallInst &I, + const CallBase &I, MachineFunction &MF, unsigned Intrinsic) const { Info.flags = MachineMemOperand::MONone; diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index c5085299716ed..848fe4bf86d2c 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1482,7 +1482,7 @@ namespace llvm { /// to a MemIntrinsicNode (touches memory). If this is the case, it returns /// true and stores the intrinsic information into the IntrinsicInfo that was /// passed to the function. - bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, + bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallBase &I, MachineFunction &MF, unsigned Intrinsic) const override; diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 001215abcfb26..3af67ff6ac3f5 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -6024,33 +6024,34 @@ void LSRInstance::Rewrite(const LSRUse &LU, const LSRFixup &LF, DeadInsts.emplace_back(OperandIsInstr); } -// Trying to hoist the IVInc to loop header if all IVInc users are in -// the loop header. It will help backend to generate post index load/store -// when the latch block is different from loop header block. -static bool canHoistIVInc(const TargetTransformInfo &TTI, const LSRFixup &Fixup, - const LSRUse &LU, Instruction *IVIncInsertPos, - Loop *L) { +// Determine where to insert the transformed IV increment instruction for this +// fixup. By default this is the default insert position, but if this is a +// postincrement opportunity then we try to insert it in the same block as the +// fixup user instruction, as this is needed for a postincrement instruction to +// be generated. +static Instruction *getFixupInsertPos(const TargetTransformInfo &TTI, + const LSRFixup &Fixup, const LSRUse &LU, + Instruction *IVIncInsertPos, + DominatorTree &DT) { + // Only address uses can be postincremented if (LU.Kind != LSRUse::Address) - return false; - - // For now this code do the conservative optimization, only work for - // the header block. Later we can hoist the IVInc to the block post - // dominate all users. - BasicBlock *LHeader = L->getHeader(); - if (IVIncInsertPos->getParent() == LHeader) - return false; - - if (!Fixup.OperandValToReplace || - any_of(Fixup.OperandValToReplace->users(), [&LHeader](User *U) { - Instruction *UI = cast(U); - return UI->getParent() != LHeader; - })) - return false; + return IVIncInsertPos; + // Don't try to postincrement if it's not legal Instruction *I = Fixup.UserInst; Type *Ty = I->getType(); - return (isa(I) && TTI.isIndexedLoadLegal(TTI.MIM_PostInc, Ty)) || - (isa(I) && TTI.isIndexedStoreLegal(TTI.MIM_PostInc, Ty)); + if (!(isa(I) && TTI.isIndexedLoadLegal(TTI.MIM_PostInc, Ty)) && + !(isa(I) && TTI.isIndexedStoreLegal(TTI.MIM_PostInc, Ty))) + return IVIncInsertPos; + + // It's only legal to hoist to the user block if it dominates the default + // insert position. + BasicBlock *HoistBlock = I->getParent(); + BasicBlock *IVIncBlock = IVIncInsertPos->getParent(); + if (!DT.dominates(I, IVIncBlock)) + return IVIncInsertPos; + + return HoistBlock->getTerminator(); } /// Rewrite all the fixup locations with new values, following the chosen @@ -6071,9 +6072,7 @@ void LSRInstance::ImplementSolution( for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) for (const LSRFixup &Fixup : Uses[LUIdx].Fixups) { Instruction *InsertPos = - canHoistIVInc(TTI, Fixup, Uses[LUIdx], IVIncInsertPos, L) - ? L->getHeader()->getTerminator() - : IVIncInsertPos; + getFixupInsertPos(TTI, Fixup, Uses[LUIdx], IVIncInsertPos, DT); Rewriter.setIVIncInsertPos(L, InsertPos); Rewrite(Uses[LUIdx], Fixup, *Solution[LUIdx], DeadInsts); Changed = true; diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index e3dceb7677043..57ba35936f3f7 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -3150,7 +3150,6 @@ class AllocaSliceRewriter : public InstVisitor { assert(IsSplit || BeginOffset == NewBeginOffset); uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset; -#ifndef NDEBUG StringRef OldName = OldPtr->getName(); // Skip through the last '.sroa.' component of the name. size_t LastSROAPrefix = OldName.rfind(".sroa."); @@ -3169,17 +3168,10 @@ class AllocaSliceRewriter : public InstVisitor { } // Strip any SROA suffixes as well. OldName = OldName.substr(0, OldName.find(".sroa_")); -#endif return getAdjustedPtr(IRB, DL, &NewAI, APInt(DL.getIndexTypeSizeInBits(PointerTy), Offset), - PointerTy, -#ifndef NDEBUG - Twine(OldName) + "." -#else - Twine() -#endif - ); + PointerTy, Twine(OldName) + "."); } /// Compute suitable alignment to access this slice of the *new* diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 9b727a7998392..9a94d29ba3307 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5122,8 +5122,18 @@ InstructionCost LoopVectorizationCostModel::expectedCost(ElementCount VF) { InstructionCost C = getInstructionCost(&I, VF); // Check if we should override the cost. - if (C.isValid() && ForceTargetInstructionCost.getNumOccurrences() > 0) - C = InstructionCost(ForceTargetInstructionCost); + if (C.isValid() && ForceTargetInstructionCost.getNumOccurrences() > 0) { + // For interleave groups, use ForceTargetInstructionCost once for the + // whole group. + if (VF.isVector() && getWideningDecision(&I, VF) == CM_Interleave) { + if (getInterleavedAccessGroup(&I)->getInsertPos() == &I) + C = InstructionCost(ForceTargetInstructionCost); + else + C = InstructionCost(0); + } else { + C = InstructionCost(ForceTargetInstructionCost); + } + } BlockCost += C; LLVM_DEBUG(dbgs() << "LV: Found an estimated cost of " << C << " for VF " diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 6491a2ce6813b..422204ff3f292 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -280,7 +280,6 @@ InstructionCost VPRecipeBase::cost(ElementCount VF, VPCostContext &Ctx) { if (UI && Ctx.skipCostComputation(UI, VF.isVector())) { RecipeCost = 0; } else { - RecipeCost = computeCost(VF, Ctx); RecipeCost = computeCost(VF, Ctx); if (ForceTargetInstructionCost.getNumOccurrences() > 0 && RecipeCost.isValid()) { diff --git a/llvm/test/Analysis/ScalarEvolution/addrec-may-wrap-udiv-canonicalize.ll b/llvm/test/Analysis/ScalarEvolution/addrec-may-wrap-udiv-canonicalize.ll index ffae5d38a7d8f..9a9a6a7d45931 100644 --- a/llvm/test/Analysis/ScalarEvolution/addrec-may-wrap-udiv-canonicalize.ll +++ b/llvm/test/Analysis/ScalarEvolution/addrec-may-wrap-udiv-canonicalize.ll @@ -167,3 +167,236 @@ loop: exit: ret void } + +define void @test_step2_start_outer_add_rec_step_16(i64 %n, i64 %m) { +; CHECK-LABEL: 'test_step2_start_outer_add_rec_step_16' +; CHECK-NEXT: Classifying expressions for: @test_step2_start_outer_add_rec_step_16 +; CHECK-NEXT: %outer.iv = phi i64 [ 0, %entry ], [ %outer.iv.next, %outer.latch ] +; CHECK-NEXT: --> {0,+,16}<%outer.header> U: [0,-15) S: [-9223372036854775808,9223372036854775793) Exits: <> LoopDispositions: { %outer.header: Computable, %loop: Invariant } +; CHECK-NEXT: %iv = phi i64 [ %outer.iv, %outer.header ], [ %iv.next, %loop ] +; CHECK-NEXT: --> {{\{\{}}0,+,16}<%outer.header>,+,2}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } +; CHECK-NEXT: %div.0 = udiv i64 %iv, 4 +; CHECK-NEXT: --> ({{\{\{}}0,+,16}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } +; CHECK-NEXT: %iv.1 = add i64 %iv, 1 +; CHECK-NEXT: --> {{\{\{}}1,+,16}<%outer.header>,+,2}<%loop> U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } +; CHECK-NEXT: %div.1 = udiv i64 %iv.1, 4 +; CHECK-NEXT: --> ({{\{\{}}1,+,16}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } +; CHECK-NEXT: %iv.2 = add i64 %iv, 2 +; CHECK-NEXT: --> {{\{\{}}2,+,16}<%outer.header>,+,2}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } +; CHECK-NEXT: %div.2 = udiv i64 %iv.2, 4 +; CHECK-NEXT: --> ({{\{\{}}2,+,16}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } +; CHECK-NEXT: %iv.3 = add i64 %iv, 3 +; CHECK-NEXT: --> {{\{\{}}3,+,16}<%outer.header>,+,2}<%loop> U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } +; CHECK-NEXT: %div.3 = udiv i64 %iv.3, 4 +; CHECK-NEXT: --> ({{\{\{}}3,+,16}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } +; CHECK-NEXT: %iv.4 = add i64 %iv, 4 +; CHECK-NEXT: --> {{\{\{}}4,+,16}<%outer.header>,+,2}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } +; CHECK-NEXT: %div.4 = udiv i64 %iv.4, 4 +; CHECK-NEXT: --> ({{\{\{}}4,+,16}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } +; CHECK-NEXT: %iv.5 = add i64 %iv, 5 +; CHECK-NEXT: --> {{\{\{}}5,+,16}<%outer.header>,+,2}<%loop> U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } +; CHECK-NEXT: %div.5 = udiv i64 %iv.5, 4 +; CHECK-NEXT: --> ({{\{\{}}5,+,16}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } +; CHECK-NEXT: %iv.neg.1 = add i64 %iv, -1 +; CHECK-NEXT: --> {{\{\{}}-1,+,16}<%outer.header>,+,2}<%loop> U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } +; CHECK-NEXT: %div.neg.1 = udiv i64 %iv.neg.1, 4 +; CHECK-NEXT: --> ({{\{\{}}-1,+,16}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } +; CHECK-NEXT: %div3.0 = udiv i64 %iv, 3 +; CHECK-NEXT: --> ({{\{\{}}0,+,16}<%outer.header>,+,2}<%loop> /u 3) U: [0,6148914691236517205) S: [0,6148914691236517206) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } +; CHECK-NEXT: %div3.1 = udiv i64 %iv.1, 3 +; CHECK-NEXT: --> ({{\{\{}}1,+,16}<%outer.header>,+,2}<%loop> /u 3) U: [0,6148914691236517206) S: [0,6148914691236517206) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } +; CHECK-NEXT: %div3.2 = udiv i64 %iv.2, 3 +; CHECK-NEXT: --> ({{\{\{}}2,+,16}<%outer.header>,+,2}<%loop> /u 3) U: [0,6148914691236517205) S: [0,6148914691236517206) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } +; CHECK-NEXT: %div3.4 = udiv i64 %iv.4, 3 +; CHECK-NEXT: --> ({{\{\{}}4,+,16}<%outer.header>,+,2}<%loop> /u 3) U: [0,6148914691236517205) S: [0,6148914691236517206) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } +; CHECK-NEXT: %div3.5 = udiv i64 %iv.5, 3 +; CHECK-NEXT: --> ({{\{\{}}5,+,16}<%outer.header>,+,2}<%loop> /u 3) U: [0,6148914691236517206) S: [0,6148914691236517206) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } +; CHECK-NEXT: %iv.next = add i64 %iv, 2 +; CHECK-NEXT: --> {{\{\{}}2,+,16}<%outer.header>,+,2}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } +; CHECK-NEXT: %outer.iv.next = add i64 %outer.iv, 16 +; CHECK-NEXT: --> {16,+,16}<%outer.header> U: [0,-15) S: [-9223372036854775808,9223372036854775793) Exits: <> LoopDispositions: { %outer.header: Computable, %loop: Invariant } +; CHECK-NEXT: Determining loop execution counts for: @test_step2_start_outer_add_rec_step_16 +; CHECK-NEXT: Loop %loop: Unpredictable backedge-taken count. +; CHECK-NEXT: Loop %loop: Unpredictable constant max backedge-taken count. +; CHECK-NEXT: Loop %loop: Unpredictable symbolic max backedge-taken count. +; CHECK-NEXT: Loop %outer.header: Unpredictable backedge-taken count. +; CHECK-NEXT: Loop %outer.header: Unpredictable constant max backedge-taken count. +; CHECK-NEXT: Loop %outer.header: Unpredictable symbolic max backedge-taken count. +; CHECK-NEXT: Loop %outer.header: Predicated backedge-taken count is (%m /u 16) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: Equal predicate: (zext i4 (trunc i64 %m to i4) to i64) == 0 +; CHECK-NEXT: Loop %outer.header: Predicated constant max backedge-taken count is i64 1152921504606846975 +; CHECK-NEXT: Predicates: +; CHECK-NEXT: Equal predicate: (zext i4 (trunc i64 %m to i4) to i64) == 0 +; CHECK-NEXT: Loop %outer.header: Predicated symbolic max backedge-taken count is (%m /u 16) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: Equal predicate: (zext i4 (trunc i64 %m to i4) to i64) == 0 +; +entry: + br label %outer.header + +outer.header: + %outer.iv = phi i64 [ 0, %entry ], [ %outer.iv.next, %outer.latch ] + br label %loop + +loop: + %iv = phi i64 [ %outer.iv, %outer.header ], [ %iv.next, %loop ] + %div.0 = udiv i64 %iv, 4 + call void @use(i64 %div.0) + %iv.1 = add i64 %iv, 1 + %div.1 = udiv i64 %iv.1, 4 + call void @use(i64 %div.1) + %iv.2 = add i64 %iv, 2 + %div.2 = udiv i64 %iv.2, 4 + call void @use(i64 %div.2) + %iv.3 = add i64 %iv, 3 + %div.3 = udiv i64 %iv.3, 4 + call void @use(i64 %div.3) + %iv.4 = add i64 %iv, 4 + %div.4 = udiv i64 %iv.4, 4 + call void @use(i64 %div.4) + %iv.5 = add i64 %iv, 5 + %div.5 = udiv i64 %iv.5, 4 + call void @use(i64 %div.5) + %iv.neg.1 = add i64 %iv, -1 + %div.neg.1 = udiv i64 %iv.neg.1, 4 + call void @use(i64 %div.neg.1) + %div3.0 = udiv i64 %iv, 3 + call void @use(i64 %div3.0) + %div3.1 = udiv i64 %iv.1,3 + call void @use(i64 %div3.1) + %div3.2 = udiv i64 %iv.2, 3 + call void @use(i64 %div3.2) + %div3.4 = udiv i64 %iv.4, 3 + call void @use(i64 %div3.4) + %div3.5 = udiv i64 %iv.5, 3 + call void @use(i64 %div3.5) + %iv.next = add i64 %iv, 2 + %cond = icmp slt i64 %iv, %n + br i1 %cond, label %loop, label %outer.latch + +outer.latch: + %outer.iv.next = add i64 %outer.iv, 16 + %outer.ec = icmp eq i64 %outer.iv, %m + br i1 %outer.ec, label %exit, label %outer.header + +exit: + ret void +} + +define void @test_step2_div4_start_outer_add_rec_step_2(i64 %n, i64 %m) { +; CHECK-LABEL: 'test_step2_div4_start_outer_add_rec_step_2' +; CHECK-NEXT: Classifying expressions for: @test_step2_div4_start_outer_add_rec_step_2 +; CHECK-NEXT: %outer.iv = phi i64 [ 0, %entry ], [ %outer.iv.next, %outer.latch ] +; CHECK-NEXT: --> {0,+,2}<%outer.header> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <> LoopDispositions: { %outer.header: Computable, %loop: Invariant } +; CHECK-NEXT: %iv = phi i64 [ %outer.iv, %outer.header ], [ %iv.next, %loop ] +; CHECK-NEXT: --> {{\{\{}}0,+,2}<%outer.header>,+,2}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } +; CHECK-NEXT: %div.0 = udiv i64 %iv, 4 +; CHECK-NEXT: --> ({{\{\{}}0,+,2}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } +; CHECK-NEXT: %iv.1 = add i64 %iv, 1 +; CHECK-NEXT: --> {{\{\{}}1,+,2}<%outer.header>,+,2}<%loop> U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } +; CHECK-NEXT: %div.1 = udiv i64 %iv.1, 4 +; CHECK-NEXT: --> ({{\{\{}}1,+,2}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } +; CHECK-NEXT: %iv.2 = add i64 %iv, 2 +; CHECK-NEXT: --> {{\{\{}}2,+,2}<%outer.header>,+,2}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } +; CHECK-NEXT: %div.2 = udiv i64 %iv.2, 4 +; CHECK-NEXT: --> ({{\{\{}}2,+,2}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } +; CHECK-NEXT: %iv.3 = add i64 %iv, 3 +; CHECK-NEXT: --> {{\{\{}}3,+,2}<%outer.header>,+,2}<%loop> U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } +; CHECK-NEXT: %div.3 = udiv i64 %iv.3, 4 +; CHECK-NEXT: --> ({{\{\{}}3,+,2}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } +; CHECK-NEXT: %iv.4 = add i64 %iv, 4 +; CHECK-NEXT: --> {{\{\{}}4,+,2}<%outer.header>,+,2}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } +; CHECK-NEXT: %div.4 = udiv i64 %iv.4, 4 +; CHECK-NEXT: --> ({{\{\{}}4,+,2}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } +; CHECK-NEXT: %iv.5 = add i64 %iv, 5 +; CHECK-NEXT: --> {{\{\{}}5,+,2}<%outer.header>,+,2}<%loop> U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } +; CHECK-NEXT: %div.5 = udiv i64 %iv.5, 4 +; CHECK-NEXT: --> ({{\{\{}}5,+,2}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } +; CHECK-NEXT: %iv.neg.1 = add i64 %iv, -1 +; CHECK-NEXT: --> {{\{\{}}-1,+,2}<%outer.header>,+,2}<%loop> U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } +; CHECK-NEXT: %div.neg.1 = udiv i64 %iv.neg.1, 4 +; CHECK-NEXT: --> ({{\{\{}}-1,+,2}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } +; CHECK-NEXT: %div3.0 = udiv i64 %iv, 3 +; CHECK-NEXT: --> ({{\{\{}}0,+,2}<%outer.header>,+,2}<%loop> /u 3) U: [0,6148914691236517205) S: [0,6148914691236517206) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } +; CHECK-NEXT: %div3.1 = udiv i64 %iv.1, 3 +; CHECK-NEXT: --> ({{\{\{}}1,+,2}<%outer.header>,+,2}<%loop> /u 3) U: [0,6148914691236517206) S: [0,6148914691236517206) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } +; CHECK-NEXT: %div3.2 = udiv i64 %iv.2, 3 +; CHECK-NEXT: --> ({{\{\{}}2,+,2}<%outer.header>,+,2}<%loop> /u 3) U: [0,6148914691236517205) S: [0,6148914691236517206) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } +; CHECK-NEXT: %div3.4 = udiv i64 %iv.4, 3 +; CHECK-NEXT: --> ({{\{\{}}4,+,2}<%outer.header>,+,2}<%loop> /u 3) U: [0,6148914691236517205) S: [0,6148914691236517206) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } +; CHECK-NEXT: %div3.5 = udiv i64 %iv.5, 3 +; CHECK-NEXT: --> ({{\{\{}}5,+,2}<%outer.header>,+,2}<%loop> /u 3) U: [0,6148914691236517206) S: [0,6148914691236517206) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } +; CHECK-NEXT: %iv.next = add i64 %iv, 2 +; CHECK-NEXT: --> {{\{\{}}2,+,2}<%outer.header>,+,2}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <> LoopDispositions: { %loop: Computable, %outer.header: Variant } +; CHECK-NEXT: %outer.iv.next = add i64 %outer.iv, 2 +; CHECK-NEXT: --> {2,+,2}<%outer.header> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <> LoopDispositions: { %outer.header: Computable, %loop: Invariant } +; CHECK-NEXT: Determining loop execution counts for: @test_step2_div4_start_outer_add_rec_step_2 +; CHECK-NEXT: Loop %loop: Unpredictable backedge-taken count. +; CHECK-NEXT: Loop %loop: Unpredictable constant max backedge-taken count. +; CHECK-NEXT: Loop %loop: Unpredictable symbolic max backedge-taken count. +; CHECK-NEXT: Loop %outer.header: Unpredictable backedge-taken count. +; CHECK-NEXT: Loop %outer.header: Unpredictable constant max backedge-taken count. +; CHECK-NEXT: Loop %outer.header: Unpredictable symbolic max backedge-taken count. +; CHECK-NEXT: Loop %outer.header: Predicated backedge-taken count is (%m /u 2) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: Equal predicate: (zext i1 (trunc i64 %m to i1) to i64) == 0 +; CHECK-NEXT: Loop %outer.header: Predicated constant max backedge-taken count is i64 9223372036854775807 +; CHECK-NEXT: Predicates: +; CHECK-NEXT: Equal predicate: (zext i1 (trunc i64 %m to i1) to i64) == 0 +; CHECK-NEXT: Loop %outer.header: Predicated symbolic max backedge-taken count is (%m /u 2) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: Equal predicate: (zext i1 (trunc i64 %m to i1) to i64) == 0 +; +entry: + br label %outer.header + +outer.header: + %outer.iv = phi i64 [ 0, %entry ], [ %outer.iv.next, %outer.latch ] + br label %loop + +loop: + %iv = phi i64 [ %outer.iv, %outer.header ], [ %iv.next, %loop ] + %div.0 = udiv i64 %iv, 4 + call void @use(i64 %div.0) + %iv.1 = add i64 %iv, 1 + %div.1 = udiv i64 %iv.1, 4 + call void @use(i64 %div.1) + %iv.2 = add i64 %iv, 2 + %div.2 = udiv i64 %iv.2, 4 + call void @use(i64 %div.2) + %iv.3 = add i64 %iv, 3 + %div.3 = udiv i64 %iv.3, 4 + call void @use(i64 %div.3) + %iv.4 = add i64 %iv, 4 + %div.4 = udiv i64 %iv.4, 4 + call void @use(i64 %div.4) + %iv.5 = add i64 %iv, 5 + %div.5 = udiv i64 %iv.5, 4 + call void @use(i64 %div.5) + %iv.neg.1 = add i64 %iv, -1 + %div.neg.1 = udiv i64 %iv.neg.1, 4 + call void @use(i64 %div.neg.1) + %div3.0 = udiv i64 %iv, 3 + call void @use(i64 %div3.0) + %div3.1 = udiv i64 %iv.1,3 + call void @use(i64 %div3.1) + %div3.2 = udiv i64 %iv.2, 3 + call void @use(i64 %div3.2) + %div3.4 = udiv i64 %iv.4, 3 + call void @use(i64 %div3.4) + %div3.5 = udiv i64 %iv.5, 3 + call void @use(i64 %div3.5) + call void @use(i64 %div.neg.1) + %iv.next = add i64 %iv, 2 + %cond = icmp slt i64 %iv, %n + br i1 %cond, label %loop, label %outer.latch + +outer.latch: + %outer.iv.next = add i64 %outer.iv, 2 + %outer.ec = icmp eq i64 %outer.iv, %m + br i1 %outer.ec, label %exit, label %outer.header + +exit: + ret void +} diff --git a/llvm/test/CodeGen/AArch64/sve-int-mulh-pred.ll b/llvm/test/CodeGen/AArch64/sve-int-mulh-pred.ll index 32760caa524ec..146720febf486 100644 --- a/llvm/test/CodeGen/AArch64/sve-int-mulh-pred.ll +++ b/llvm/test/CodeGen/AArch64/sve-int-mulh-pred.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s ; ; SMULH ; -define @smulh_i8( %a, %b) #0 { +define @smulh_i8( %a, %b) { ; CHECK-LABEL: smulh_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.b @@ -19,7 +19,7 @@ define @smulh_i8( %a, %b ret %tr } -define @smulh_i16( %a, %b) #0 { +define @smulh_i16( %a, %b) { ; CHECK-LABEL: smulh_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h @@ -33,7 +33,7 @@ define @smulh_i16( %a, % ret %tr } -define @smulh_i32( %a, %b) #0 { +define @smulh_i32( %a, %b) { ; CHECK-LABEL: smulh_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s @@ -47,7 +47,7 @@ define @smulh_i32( %a, % ret %tr } -define @smulh_i64( %a, %b) #0 { +define @smulh_i64( %a, %b) { ; CHECK-LABEL: smulh_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d @@ -65,7 +65,7 @@ define @smulh_i64( %a, % ; UMULH ; -define @umulh_i8( %a, %b) #0 { +define @umulh_i8( %a, %b) { ; CHECK-LABEL: umulh_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.b @@ -79,7 +79,7 @@ define @umulh_i8( %a, %b ret %tr } -define @umulh_i16( %a, %b) #0 { +define @umulh_i16( %a, %b) { ; CHECK-LABEL: umulh_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h @@ -93,7 +93,7 @@ define @umulh_i16( %a, % ret %tr } -define @umulh_i32( %a, %b) #0 { +define @umulh_i32( %a, %b) { ; CHECK-LABEL: umulh_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s @@ -107,7 +107,7 @@ define @umulh_i32( %a, % ret %tr } -define @umulh_i64( %a, %b) #0 { +define @umulh_i64( %a, %b) { ; CHECK-LABEL: umulh_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d @@ -121,4 +121,262 @@ define @umulh_i64( %a, % ret %tr } -attributes #0 = { "target-features"="+sve" } + +; Fixed-length 128bits + +define <16 x i8> @smulh_v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: smulh_v16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: smull2 v2.8h, v0.16b, v1.16b +; CHECK-NEXT: smull v0.8h, v0.8b, v1.8b +; CHECK-NEXT: uzp2 v0.16b, v0.16b, v2.16b +; CHECK-NEXT: ret + %1 = sext <16 x i8> %a to <16 x i16> + %2 = sext <16 x i8> %b to <16 x i16> + %mul = mul <16 x i16> %1, %2 + %shr = lshr <16 x i16> %mul, splat(i16 8) + %tr = trunc <16 x i16> %shr to <16 x i8> + ret <16 x i8> %tr +} + +define <8 x i16> @smulh_v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: smulh_v8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: smull2 v2.4s, v0.8h, v1.8h +; CHECK-NEXT: smull v0.4s, v0.4h, v1.4h +; CHECK-NEXT: uzp2 v0.8h, v0.8h, v2.8h +; CHECK-NEXT: ret + %1 = sext <8 x i16> %a to <8 x i32> + %2 = sext <8 x i16> %b to <8 x i32> + %mul = mul <8 x i32> %1, %2 + %shr = lshr <8 x i32> %mul, splat(i32 16) + %tr = trunc <8 x i32> %shr to <8 x i16> + ret <8 x i16> %tr +} + +define <4 x i32> @smulh_v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: smulh_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: smull2 v2.2d, v0.4s, v1.4s +; CHECK-NEXT: smull v0.2d, v0.2s, v1.2s +; CHECK-NEXT: uzp2 v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ret + %1 = sext <4 x i32> %a to <4 x i64> + %2 = sext <4 x i32> %b to <4 x i64> + %mul = mul <4 x i64> %1, %2 + %shr = lshr <4 x i64> %mul, splat(i64 32) + %tr = trunc <4 x i64> %shr to <4 x i32> + ret <4 x i32> %tr +} + +define <2 x i64> @smulh_v2i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: smulh_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, v0.d[1] +; CHECK-NEXT: mov x9, v1.d[1] +; CHECK-NEXT: fmov x10, d0 +; CHECK-NEXT: fmov x11, d1 +; CHECK-NEXT: smulh x10, x10, x11 +; CHECK-NEXT: smulh x8, x8, x9 +; CHECK-NEXT: fmov d0, x10 +; CHECK-NEXT: fmov d1, x8 +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: ret + %1 = sext <2 x i64> %a to <2 x i128> + %2 = sext <2 x i64> %b to <2 x i128> + %mul = mul <2 x i128> %1, %2 + %shr = lshr <2 x i128> %mul, splat(i128 64) + %tr = trunc <2 x i128> %shr to <2 x i64> + ret <2 x i64> %tr +} + +define <16 x i8> @umulh_v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: umulh_v16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: umull2 v2.8h, v0.16b, v1.16b +; CHECK-NEXT: umull v0.8h, v0.8b, v1.8b +; CHECK-NEXT: uzp2 v0.16b, v0.16b, v2.16b +; CHECK-NEXT: ret + %1 = zext <16 x i8> %a to <16 x i16> + %2 = zext <16 x i8> %b to <16 x i16> + %mul = mul <16 x i16> %1, %2 + %shr = lshr <16 x i16> %mul, splat(i16 8) + %tr = trunc <16 x i16> %shr to <16 x i8> + ret <16 x i8> %tr +} + +define <8 x i16> @umulh_v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: umulh_v8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: umull2 v2.4s, v0.8h, v1.8h +; CHECK-NEXT: umull v0.4s, v0.4h, v1.4h +; CHECK-NEXT: uzp2 v0.8h, v0.8h, v2.8h +; CHECK-NEXT: ret + %1 = zext <8 x i16> %a to <8 x i32> + %2 = zext <8 x i16> %b to <8 x i32> + %mul = mul <8 x i32> %1, %2 + %shr = lshr <8 x i32> %mul, splat(i32 16) + %tr = trunc <8 x i32> %shr to <8 x i16> + ret <8 x i16> %tr +} + +define <4 x i32> @umulh_v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: umulh_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: umull2 v2.2d, v0.4s, v1.4s +; CHECK-NEXT: umull v0.2d, v0.2s, v1.2s +; CHECK-NEXT: uzp2 v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ret + %1 = zext <4 x i32> %a to <4 x i64> + %2 = zext <4 x i32> %b to <4 x i64> + %mul = mul <4 x i64> %1, %2 + %shr = lshr <4 x i64> %mul, splat(i64 32) + %tr = trunc <4 x i64> %shr to <4 x i32> + ret <4 x i32> %tr +} + +define <2 x i64> @umulh_v2i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: umulh_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, v0.d[1] +; CHECK-NEXT: mov x9, v1.d[1] +; CHECK-NEXT: fmov x10, d0 +; CHECK-NEXT: fmov x11, d1 +; CHECK-NEXT: umulh x10, x10, x11 +; CHECK-NEXT: umulh x8, x8, x9 +; CHECK-NEXT: fmov d0, x10 +; CHECK-NEXT: fmov d1, x8 +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: ret + %1 = zext <2 x i64> %a to <2 x i128> + %2 = zext <2 x i64> %b to <2 x i128> + %mul = mul <2 x i128> %1, %2 + %shr = lshr <2 x i128> %mul, splat(i128 64) + %tr = trunc <2 x i128> %shr to <2 x i64> + ret <2 x i64> %tr +} + + + +; Fixed-length 64bits + +define <8 x i8> @smulh_v8i8(<8 x i8> %a, <8 x i8> %b) { +; CHECK-LABEL: smulh_v8i8: +; CHECK: // %bb.0: +; CHECK-NEXT: smull v0.8h, v0.8b, v1.8b +; CHECK-NEXT: shrn v0.8b, v0.8h, #8 +; CHECK-NEXT: ret + %1 = sext <8 x i8> %a to <8 x i16> + %2 = sext <8 x i8> %b to <8 x i16> + %mul = mul <8 x i16> %1, %2 + %shr = lshr <8 x i16> %mul, splat(i16 8) + %tr = trunc <8 x i16> %shr to <8 x i8> + ret <8 x i8> %tr +} + +define <4 x i16> @smulh_v4i16(<4 x i16> %a, <4 x i16> %b) { +; CHECK-LABEL: smulh_v4i16: +; CHECK: // %bb.0: +; CHECK-NEXT: smull v0.4s, v0.4h, v1.4h +; CHECK-NEXT: shrn v0.4h, v0.4s, #16 +; CHECK-NEXT: ret + %1 = sext <4 x i16> %a to <4 x i32> + %2 = sext <4 x i16> %b to <4 x i32> + %mul = mul <4 x i32> %1, %2 + %shr = lshr <4 x i32> %mul, splat(i32 16) + %tr = trunc <4 x i32> %shr to <4 x i16> + ret <4 x i16> %tr +} + +define <2 x i32> @smulh_v2i32(<2 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: smulh_v2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: smull v0.2d, v0.2s, v1.2s +; CHECK-NEXT: shrn v0.2s, v0.2d, #32 +; CHECK-NEXT: ret + %1 = sext <2 x i32> %a to <2 x i64> + %2 = sext <2 x i32> %b to <2 x i64> + %mul = mul <2 x i64> %1, %2 + %shr = lshr <2 x i64> %mul, splat(i64 32) + %tr = trunc <2 x i64> %shr to <2 x i32> + ret <2 x i32> %tr +} + +define <1 x i64> @smulh_v1i64(<1 x i64> %a, <1 x i64> %b) { +; CHECK-LABEL: smulh_v1i64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: fmov x9, d1 +; CHECK-NEXT: smulh x8, x8, x9 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ret + %1 = sext <1 x i64> %a to <1 x i128> + %2 = sext <1 x i64> %b to <1 x i128> + %mul = mul <1 x i128> %1, %2 + %shr = lshr <1 x i128> %mul, splat(i128 64) + %tr = trunc <1 x i128> %shr to <1 x i64> + ret <1 x i64> %tr +} + +define <8 x i8> @umulh_v8i8(<8 x i8> %a, <8 x i8> %b) { +; CHECK-LABEL: umulh_v8i8: +; CHECK: // %bb.0: +; CHECK-NEXT: umull v0.8h, v0.8b, v1.8b +; CHECK-NEXT: shrn v0.8b, v0.8h, #8 +; CHECK-NEXT: ret + %1 = zext <8 x i8> %a to <8 x i16> + %2 = zext <8 x i8> %b to <8 x i16> + %mul = mul <8 x i16> %1, %2 + %shr = lshr <8 x i16> %mul, splat(i16 8) + %tr = trunc <8 x i16> %shr to <8 x i8> + ret <8 x i8> %tr +} + +define <4 x i16> @umulh_v4i16(<4 x i16> %a, <4 x i16> %b) { +; CHECK-LABEL: umulh_v4i16: +; CHECK: // %bb.0: +; CHECK-NEXT: umull v0.4s, v0.4h, v1.4h +; CHECK-NEXT: shrn v0.4h, v0.4s, #16 +; CHECK-NEXT: ret + %1 = zext <4 x i16> %a to <4 x i32> + %2 = zext <4 x i16> %b to <4 x i32> + %mul = mul <4 x i32> %1, %2 + %shr = lshr <4 x i32> %mul, splat(i32 16) + %tr = trunc <4 x i32> %shr to <4 x i16> + ret <4 x i16> %tr +} + +define <2 x i32> @umulh_v2i32(<2 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: umulh_v2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: umull v0.2d, v0.2s, v1.2s +; CHECK-NEXT: shrn v0.2s, v0.2d, #32 +; CHECK-NEXT: ret + %1 = zext <2 x i32> %a to <2 x i64> + %2 = zext <2 x i32> %b to <2 x i64> + %mul = mul <2 x i64> %1, %2 + %shr = lshr <2 x i64> %mul, splat(i64 32) + %tr = trunc <2 x i64> %shr to <2 x i32> + ret <2 x i32> %tr +} + +define <1 x i64> @umulh_v1i64(<1 x i64> %a, <1 x i64> %b) { +; CHECK-LABEL: umulh_v1i64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: fmov x9, d1 +; CHECK-NEXT: umulh x8, x8, x9 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ret + %1 = zext <1 x i64> %a to <1 x i128> + %2 = zext <1 x i64> %b to <1 x i128> + %mul = mul <1 x i128> %1, %2 + %shr = lshr <1 x i128> %mul, splat(i128 64) + %tr = trunc <1 x i128> %shr to <1 x i64> + ret <1 x i64> %tr +} + diff --git a/llvm/test/CodeGen/AArch64/sve2-int-mulh.ll b/llvm/test/CodeGen/AArch64/sve2-int-mulh.ll index bcf76d5b13d62..d7534712b53a0 100644 --- a/llvm/test/CodeGen/AArch64/sve2-int-mulh.ll +++ b/llvm/test/CodeGen/AArch64/sve2-int-mulh.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s ; ; SMULH ; -define @smulh_i8( %a, %b) #0 { +define @smulh_i8( %a, %b) { ; CHECK-LABEL: smulh_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: smulh z0.b, z0.b, z1.b @@ -18,7 +18,7 @@ define @smulh_i8( %a, %b ret %tr } -define @smulh_i16( %a, %b) #0 { +define @smulh_i16( %a, %b) { ; CHECK-LABEL: smulh_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: smulh z0.h, z0.h, z1.h @@ -31,7 +31,7 @@ define @smulh_i16( %a, % ret %tr } -define @smulh_i32( %a, %b) #0 { +define @smulh_i32( %a, %b) { ; CHECK-LABEL: smulh_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: smulh z0.s, z0.s, z1.s @@ -44,7 +44,7 @@ define @smulh_i32( %a, % ret %tr } -define @smulh_i64( %a, %b) #0 { +define @smulh_i64( %a, %b) { ; CHECK-LABEL: smulh_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: smulh z0.d, z0.d, z1.d @@ -61,7 +61,7 @@ define @smulh_i64( %a, % ; UMULH ; -define @umulh_i8( %a, %b) #0 { +define @umulh_i8( %a, %b) { ; CHECK-LABEL: umulh_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: umulh z0.b, z0.b, z1.b @@ -74,7 +74,7 @@ define @umulh_i8( %a, %b ret %tr } -define @umulh_i16( %a, %b) #0 { +define @umulh_i16( %a, %b) { ; CHECK-LABEL: umulh_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: umulh z0.h, z0.h, z1.h @@ -87,7 +87,7 @@ define @umulh_i16( %a, % ret %tr } -define @umulh_i32( %a, %b) #0 { +define @umulh_i32( %a, %b) { ; CHECK-LABEL: umulh_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: umulh z0.s, z0.s, z1.s @@ -100,7 +100,7 @@ define @umulh_i32( %a, % ret %tr } -define @umulh_i64( %a, %b) #0 { +define @umulh_i64( %a, %b) { ; CHECK-LABEL: umulh_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: umulh z0.d, z0.d, z1.d @@ -113,4 +113,261 @@ define @umulh_i64( %a, % ret %tr } -attributes #0 = { "target-features"="+sve2" } + +; Fixed-length 128bits + +define <16 x i8> @smulh_v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: smulh_v16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: smull2 v2.8h, v0.16b, v1.16b +; CHECK-NEXT: smull v0.8h, v0.8b, v1.8b +; CHECK-NEXT: uzp2 v0.16b, v0.16b, v2.16b +; CHECK-NEXT: ret + %1 = sext <16 x i8> %a to <16 x i16> + %2 = sext <16 x i8> %b to <16 x i16> + %mul = mul <16 x i16> %1, %2 + %shr = lshr <16 x i16> %mul, splat(i16 8) + %tr = trunc <16 x i16> %shr to <16 x i8> + ret <16 x i8> %tr +} + +define <8 x i16> @smulh_v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: smulh_v8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: smull2 v2.4s, v0.8h, v1.8h +; CHECK-NEXT: smull v0.4s, v0.4h, v1.4h +; CHECK-NEXT: uzp2 v0.8h, v0.8h, v2.8h +; CHECK-NEXT: ret + %1 = sext <8 x i16> %a to <8 x i32> + %2 = sext <8 x i16> %b to <8 x i32> + %mul = mul <8 x i32> %1, %2 + %shr = lshr <8 x i32> %mul, splat(i32 16) + %tr = trunc <8 x i32> %shr to <8 x i16> + ret <8 x i16> %tr +} + +define <4 x i32> @smulh_v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: smulh_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: smull2 v2.2d, v0.4s, v1.4s +; CHECK-NEXT: smull v0.2d, v0.2s, v1.2s +; CHECK-NEXT: uzp2 v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ret + %1 = sext <4 x i32> %a to <4 x i64> + %2 = sext <4 x i32> %b to <4 x i64> + %mul = mul <4 x i64> %1, %2 + %shr = lshr <4 x i64> %mul, splat(i64 32) + %tr = trunc <4 x i64> %shr to <4 x i32> + ret <4 x i32> %tr +} + +define <2 x i64> @smulh_v2i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: smulh_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, v0.d[1] +; CHECK-NEXT: mov x9, v1.d[1] +; CHECK-NEXT: fmov x10, d0 +; CHECK-NEXT: fmov x11, d1 +; CHECK-NEXT: smulh x10, x10, x11 +; CHECK-NEXT: smulh x8, x8, x9 +; CHECK-NEXT: fmov d0, x10 +; CHECK-NEXT: fmov d1, x8 +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: ret + %1 = sext <2 x i64> %a to <2 x i128> + %2 = sext <2 x i64> %b to <2 x i128> + %mul = mul <2 x i128> %1, %2 + %shr = lshr <2 x i128> %mul, splat(i128 64) + %tr = trunc <2 x i128> %shr to <2 x i64> + ret <2 x i64> %tr +} + +define <16 x i8> @umulh_v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: umulh_v16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: umull2 v2.8h, v0.16b, v1.16b +; CHECK-NEXT: umull v0.8h, v0.8b, v1.8b +; CHECK-NEXT: uzp2 v0.16b, v0.16b, v2.16b +; CHECK-NEXT: ret + %1 = zext <16 x i8> %a to <16 x i16> + %2 = zext <16 x i8> %b to <16 x i16> + %mul = mul <16 x i16> %1, %2 + %shr = lshr <16 x i16> %mul, splat(i16 8) + %tr = trunc <16 x i16> %shr to <16 x i8> + ret <16 x i8> %tr +} + +define <8 x i16> @umulh_v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: umulh_v8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: umull2 v2.4s, v0.8h, v1.8h +; CHECK-NEXT: umull v0.4s, v0.4h, v1.4h +; CHECK-NEXT: uzp2 v0.8h, v0.8h, v2.8h +; CHECK-NEXT: ret + %1 = zext <8 x i16> %a to <8 x i32> + %2 = zext <8 x i16> %b to <8 x i32> + %mul = mul <8 x i32> %1, %2 + %shr = lshr <8 x i32> %mul, splat(i32 16) + %tr = trunc <8 x i32> %shr to <8 x i16> + ret <8 x i16> %tr +} + +define <4 x i32> @umulh_v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: umulh_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: umull2 v2.2d, v0.4s, v1.4s +; CHECK-NEXT: umull v0.2d, v0.2s, v1.2s +; CHECK-NEXT: uzp2 v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ret + %1 = zext <4 x i32> %a to <4 x i64> + %2 = zext <4 x i32> %b to <4 x i64> + %mul = mul <4 x i64> %1, %2 + %shr = lshr <4 x i64> %mul, splat(i64 32) + %tr = trunc <4 x i64> %shr to <4 x i32> + ret <4 x i32> %tr +} + +define <2 x i64> @umulh_v2i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: umulh_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, v0.d[1] +; CHECK-NEXT: mov x9, v1.d[1] +; CHECK-NEXT: fmov x10, d0 +; CHECK-NEXT: fmov x11, d1 +; CHECK-NEXT: umulh x10, x10, x11 +; CHECK-NEXT: umulh x8, x8, x9 +; CHECK-NEXT: fmov d0, x10 +; CHECK-NEXT: fmov d1, x8 +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: ret + %1 = zext <2 x i64> %a to <2 x i128> + %2 = zext <2 x i64> %b to <2 x i128> + %mul = mul <2 x i128> %1, %2 + %shr = lshr <2 x i128> %mul, splat(i128 64) + %tr = trunc <2 x i128> %shr to <2 x i64> + ret <2 x i64> %tr +} + + + +; Fixed-length 64bits + +define <8 x i8> @smulh_v8i8(<8 x i8> %a, <8 x i8> %b) { +; CHECK-LABEL: smulh_v8i8: +; CHECK: // %bb.0: +; CHECK-NEXT: smull v0.8h, v0.8b, v1.8b +; CHECK-NEXT: shrn v0.8b, v0.8h, #8 +; CHECK-NEXT: ret + %1 = sext <8 x i8> %a to <8 x i16> + %2 = sext <8 x i8> %b to <8 x i16> + %mul = mul <8 x i16> %1, %2 + %shr = lshr <8 x i16> %mul, splat(i16 8) + %tr = trunc <8 x i16> %shr to <8 x i8> + ret <8 x i8> %tr +} + +define <4 x i16> @smulh_v4i16(<4 x i16> %a, <4 x i16> %b) { +; CHECK-LABEL: smulh_v4i16: +; CHECK: // %bb.0: +; CHECK-NEXT: smull v0.4s, v0.4h, v1.4h +; CHECK-NEXT: shrn v0.4h, v0.4s, #16 +; CHECK-NEXT: ret + %1 = sext <4 x i16> %a to <4 x i32> + %2 = sext <4 x i16> %b to <4 x i32> + %mul = mul <4 x i32> %1, %2 + %shr = lshr <4 x i32> %mul, splat(i32 16) + %tr = trunc <4 x i32> %shr to <4 x i16> + ret <4 x i16> %tr +} + +define <2 x i32> @smulh_v2i32(<2 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: smulh_v2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: smull v0.2d, v0.2s, v1.2s +; CHECK-NEXT: shrn v0.2s, v0.2d, #32 +; CHECK-NEXT: ret + %1 = sext <2 x i32> %a to <2 x i64> + %2 = sext <2 x i32> %b to <2 x i64> + %mul = mul <2 x i64> %1, %2 + %shr = lshr <2 x i64> %mul, splat(i64 32) + %tr = trunc <2 x i64> %shr to <2 x i32> + ret <2 x i32> %tr +} + +define <1 x i64> @smulh_v1i64(<1 x i64> %a, <1 x i64> %b) { +; CHECK-LABEL: smulh_v1i64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: fmov x9, d1 +; CHECK-NEXT: smulh x8, x8, x9 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ret + %1 = sext <1 x i64> %a to <1 x i128> + %2 = sext <1 x i64> %b to <1 x i128> + %mul = mul <1 x i128> %1, %2 + %shr = lshr <1 x i128> %mul, splat(i128 64) + %tr = trunc <1 x i128> %shr to <1 x i64> + ret <1 x i64> %tr +} + +define <8 x i8> @umulh_v8i8(<8 x i8> %a, <8 x i8> %b) { +; CHECK-LABEL: umulh_v8i8: +; CHECK: // %bb.0: +; CHECK-NEXT: umull v0.8h, v0.8b, v1.8b +; CHECK-NEXT: shrn v0.8b, v0.8h, #8 +; CHECK-NEXT: ret + %1 = zext <8 x i8> %a to <8 x i16> + %2 = zext <8 x i8> %b to <8 x i16> + %mul = mul <8 x i16> %1, %2 + %shr = lshr <8 x i16> %mul, splat(i16 8) + %tr = trunc <8 x i16> %shr to <8 x i8> + ret <8 x i8> %tr +} + +define <4 x i16> @umulh_v4i16(<4 x i16> %a, <4 x i16> %b) { +; CHECK-LABEL: umulh_v4i16: +; CHECK: // %bb.0: +; CHECK-NEXT: umull v0.4s, v0.4h, v1.4h +; CHECK-NEXT: shrn v0.4h, v0.4s, #16 +; CHECK-NEXT: ret + %1 = zext <4 x i16> %a to <4 x i32> + %2 = zext <4 x i16> %b to <4 x i32> + %mul = mul <4 x i32> %1, %2 + %shr = lshr <4 x i32> %mul, splat(i32 16) + %tr = trunc <4 x i32> %shr to <4 x i16> + ret <4 x i16> %tr +} + +define <2 x i32> @umulh_v2i32(<2 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: umulh_v2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: umull v0.2d, v0.2s, v1.2s +; CHECK-NEXT: shrn v0.2s, v0.2d, #32 +; CHECK-NEXT: ret + %1 = zext <2 x i32> %a to <2 x i64> + %2 = zext <2 x i32> %b to <2 x i64> + %mul = mul <2 x i64> %1, %2 + %shr = lshr <2 x i64> %mul, splat(i64 32) + %tr = trunc <2 x i64> %shr to <2 x i32> + ret <2 x i32> %tr +} + +define <1 x i64> @umulh_v1i64(<1 x i64> %a, <1 x i64> %b) { +; CHECK-LABEL: umulh_v1i64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: fmov x9, d1 +; CHECK-NEXT: umulh x8, x8, x9 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ret + %1 = zext <1 x i64> %a to <1 x i128> + %2 = zext <1 x i64> %b to <1 x i128> + %mul = mul <1 x i128> %1, %2 + %shr = lshr <1 x i128> %mul, splat(i128 64) + %tr = trunc <1 x i128> %shr to <1 x i64> + ret <1 x i64> %tr +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll index 405861d791169..9dfd0a47d1e1e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll @@ -10,41 +10,75 @@ define amdgpu_ps i128 @extractelement_sgpr_v4i128_sgpr_idx(ptr addrspace(4) inre ; GFX9: ; %bb.0: ; GFX9-NEXT: s_and_b32 s0, s4, 3 ; GFX9-NEXT: s_lshl_b32 s0, s0, 4 -; GFX9-NEXT: s_load_dwordx4 s[0:3], s[2:3], s0 offset:0x0 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: global_load_dwordx4 v[0:3], v0, s[2:3] +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_readfirstlane_b32 s0, v0 +; GFX9-NEXT: v_readfirstlane_b32 s1, v1 +; GFX9-NEXT: v_readfirstlane_b32 s2, v2 +; GFX9-NEXT: v_readfirstlane_b32 s3, v3 ; GFX9-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: extractelement_sgpr_v4i128_sgpr_idx: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_and_b32 s0, s4, 3 ; GFX8-NEXT: s_lshl_b32 s0, s0, 4 -; GFX8-NEXT: s_load_dwordx4 s[0:3], s[2:3], s0 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_add_u32 s0, s2, s0 +; GFX8-NEXT: s_addc_u32 s1, s3, 0 +; GFX8-NEXT: v_mov_b32_e32 v0, s0 +; GFX8-NEXT: v_mov_b32_e32 v1, s1 +; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: v_readfirstlane_b32 s0, v0 +; GFX8-NEXT: v_readfirstlane_b32 s1, v1 +; GFX8-NEXT: v_readfirstlane_b32 s2, v2 +; GFX8-NEXT: v_readfirstlane_b32 s3, v3 ; GFX8-NEXT: ; return to shader part epilog ; ; GFX7-LABEL: extractelement_sgpr_v4i128_sgpr_idx: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_and_b32 s0, s4, 3 -; GFX7-NEXT: s_lshl_b32 s0, s0, 4 -; GFX7-NEXT: s_load_dwordx4 s[0:3], s[2:3], s0 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s0, s2 +; GFX7-NEXT: s_and_b32 s2, s4, 3 +; GFX7-NEXT: s_lshl_b32 s4, s2, 4 +; GFX7-NEXT: s_mov_b32 s5, 0 +; GFX7-NEXT: v_mov_b32_e32 v0, s4 +; GFX7-NEXT: s_mov_b32 s1, s3 +; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: s_mov_b32 s2, s5 +; GFX7-NEXT: v_mov_b32_e32 v1, s5 +; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[0:3], 0 addr64 +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_readfirstlane_b32 s0, v0 +; GFX7-NEXT: v_readfirstlane_b32 s1, v1 +; GFX7-NEXT: v_readfirstlane_b32 s2, v2 +; GFX7-NEXT: v_readfirstlane_b32 s3, v3 ; GFX7-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: extractelement_sgpr_v4i128_sgpr_idx: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_and_b32 s0, s4, 3 ; GFX10-NEXT: s_lshl_b32 s0, s0, 4 -; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], s0 offset:0x0 -; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: v_mov_b32_e32 v0, s0 +; GFX10-NEXT: global_load_dwordx4 v[0:3], v0, s[2:3] +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: v_readfirstlane_b32 s0, v0 +; GFX10-NEXT: v_readfirstlane_b32 s1, v1 +; GFX10-NEXT: v_readfirstlane_b32 s2, v2 +; GFX10-NEXT: v_readfirstlane_b32 s3, v3 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: extractelement_sgpr_v4i128_sgpr_idx: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_and_b32 s0, s4, 3 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: s_lshl_b32 s0, s0, 4 -; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], s0 offset:0x0 -; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: global_load_b128 v[0:3], v0, s[2:3] +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-NEXT: v_readfirstlane_b32 s2, v2 +; GFX11-NEXT: v_readfirstlane_b32 s3, v3 ; GFX11-NEXT: ; return to shader part epilog %vector = load <4 x i128>, ptr addrspace(4) %ptr %element = extractelement <4 x i128> %vector, i32 %idx @@ -281,22 +315,63 @@ define amdgpu_ps i128 @extractelement_sgpr_v4i128_vgpr_idx(ptr addrspace(4) inre } define amdgpu_ps i128 @extractelement_sgpr_v4i128_idx0(ptr addrspace(4) inreg %ptr) { -; GCN-LABEL: extractelement_sgpr_v4i128_idx0: -; GCN: ; %bb.0: -; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 -; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: ; return to shader part epilog +; GFX9-LABEL: extractelement_sgpr_v4i128_idx0: +; GFX9: ; %bb.0: +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: global_load_dwordx4 v[0:3], v0, s[2:3] +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_readfirstlane_b32 s0, v0 +; GFX9-NEXT: v_readfirstlane_b32 s1, v1 +; GFX9-NEXT: v_readfirstlane_b32 s2, v2 +; GFX9-NEXT: v_readfirstlane_b32 s3, v3 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: extractelement_sgpr_v4i128_idx0: +; GFX8: ; %bb.0: +; GFX8-NEXT: v_mov_b32_e32 v0, s2 +; GFX8-NEXT: v_mov_b32_e32 v1, s3 +; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: v_readfirstlane_b32 s0, v0 +; GFX8-NEXT: v_readfirstlane_b32 s1, v1 +; GFX8-NEXT: v_readfirstlane_b32 s2, v2 +; GFX8-NEXT: v_readfirstlane_b32 s3, v3 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX7-LABEL: extractelement_sgpr_v4i128_idx0: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_mov_b32 s0, s2 +; GFX7-NEXT: s_mov_b32 s1, s3 +; GFX7-NEXT: s_mov_b32 s2, -1 +; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_readfirstlane_b32 s0, v0 +; GFX7-NEXT: v_readfirstlane_b32 s1, v1 +; GFX7-NEXT: v_readfirstlane_b32 s2, v2 +; GFX7-NEXT: v_readfirstlane_b32 s3, v3 +; GFX7-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: extractelement_sgpr_v4i128_idx0: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 -; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: v_mov_b32_e32 v0, 0 +; GFX10-NEXT: global_load_dwordx4 v[0:3], v0, s[2:3] +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: v_readfirstlane_b32 s0, v0 +; GFX10-NEXT: v_readfirstlane_b32 s1, v1 +; GFX10-NEXT: v_readfirstlane_b32 s2, v2 +; GFX10-NEXT: v_readfirstlane_b32 s3, v3 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: extractelement_sgpr_v4i128_idx0: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x0 -; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: v_mov_b32_e32 v0, 0 +; GFX11-NEXT: global_load_b128 v[0:3], v0, s[2:3] +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-NEXT: v_readfirstlane_b32 s2, v2 +; GFX11-NEXT: v_readfirstlane_b32 s3, v3 ; GFX11-NEXT: ; return to shader part epilog %vector = load <4 x i128>, ptr addrspace(4) %ptr %element = extractelement <4 x i128> %vector, i32 0 @@ -306,32 +381,63 @@ define amdgpu_ps i128 @extractelement_sgpr_v4i128_idx0(ptr addrspace(4) inreg %p define amdgpu_ps i128 @extractelement_sgpr_v4i128_idx1(ptr addrspace(4) inreg %ptr) { ; GFX9-LABEL: extractelement_sgpr_v4i128_idx1: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x10 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: global_load_dwordx4 v[0:3], v0, s[2:3] offset:16 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_readfirstlane_b32 s0, v0 +; GFX9-NEXT: v_readfirstlane_b32 s1, v1 +; GFX9-NEXT: v_readfirstlane_b32 s2, v2 +; GFX9-NEXT: v_readfirstlane_b32 s3, v3 ; GFX9-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: extractelement_sgpr_v4i128_idx1: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x10 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_add_u32 s0, s2, 16 +; GFX8-NEXT: s_addc_u32 s1, s3, 0 +; GFX8-NEXT: v_mov_b32_e32 v0, s0 +; GFX8-NEXT: v_mov_b32_e32 v1, s1 +; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: v_readfirstlane_b32 s0, v0 +; GFX8-NEXT: v_readfirstlane_b32 s1, v1 +; GFX8-NEXT: v_readfirstlane_b32 s2, v2 +; GFX8-NEXT: v_readfirstlane_b32 s3, v3 ; GFX8-NEXT: ; return to shader part epilog ; ; GFX7-LABEL: extractelement_sgpr_v4i128_idx1: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x4 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s0, s2 +; GFX7-NEXT: s_mov_b32 s1, s3 +; GFX7-NEXT: s_mov_b32 s2, -1 +; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:16 +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_readfirstlane_b32 s0, v0 +; GFX7-NEXT: v_readfirstlane_b32 s1, v1 +; GFX7-NEXT: v_readfirstlane_b32 s2, v2 +; GFX7-NEXT: v_readfirstlane_b32 s3, v3 ; GFX7-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: extractelement_sgpr_v4i128_idx1: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x10 -; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: v_mov_b32_e32 v0, 0 +; GFX10-NEXT: global_load_dwordx4 v[0:3], v0, s[2:3] offset:16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: v_readfirstlane_b32 s0, v0 +; GFX10-NEXT: v_readfirstlane_b32 s1, v1 +; GFX10-NEXT: v_readfirstlane_b32 s2, v2 +; GFX10-NEXT: v_readfirstlane_b32 s3, v3 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: extractelement_sgpr_v4i128_idx1: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x10 -; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: v_mov_b32_e32 v0, 0 +; GFX11-NEXT: global_load_b128 v[0:3], v0, s[2:3] offset:16 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-NEXT: v_readfirstlane_b32 s2, v2 +; GFX11-NEXT: v_readfirstlane_b32 s3, v3 ; GFX11-NEXT: ; return to shader part epilog %vector = load <4 x i128>, ptr addrspace(4) %ptr %element = extractelement <4 x i128> %vector, i32 1 @@ -341,32 +447,63 @@ define amdgpu_ps i128 @extractelement_sgpr_v4i128_idx1(ptr addrspace(4) inreg %p define amdgpu_ps i128 @extractelement_sgpr_v4i128_idx2(ptr addrspace(4) inreg %ptr) { ; GFX9-LABEL: extractelement_sgpr_v4i128_idx2: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x20 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: global_load_dwordx4 v[0:3], v0, s[2:3] offset:32 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_readfirstlane_b32 s0, v0 +; GFX9-NEXT: v_readfirstlane_b32 s1, v1 +; GFX9-NEXT: v_readfirstlane_b32 s2, v2 +; GFX9-NEXT: v_readfirstlane_b32 s3, v3 ; GFX9-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: extractelement_sgpr_v4i128_idx2: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x20 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_add_u32 s0, s2, 32 +; GFX8-NEXT: s_addc_u32 s1, s3, 0 +; GFX8-NEXT: v_mov_b32_e32 v0, s0 +; GFX8-NEXT: v_mov_b32_e32 v1, s1 +; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: v_readfirstlane_b32 s0, v0 +; GFX8-NEXT: v_readfirstlane_b32 s1, v1 +; GFX8-NEXT: v_readfirstlane_b32 s2, v2 +; GFX8-NEXT: v_readfirstlane_b32 s3, v3 ; GFX8-NEXT: ; return to shader part epilog ; ; GFX7-LABEL: extractelement_sgpr_v4i128_idx2: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x8 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s0, s2 +; GFX7-NEXT: s_mov_b32 s1, s3 +; GFX7-NEXT: s_mov_b32 s2, -1 +; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:32 +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_readfirstlane_b32 s0, v0 +; GFX7-NEXT: v_readfirstlane_b32 s1, v1 +; GFX7-NEXT: v_readfirstlane_b32 s2, v2 +; GFX7-NEXT: v_readfirstlane_b32 s3, v3 ; GFX7-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: extractelement_sgpr_v4i128_idx2: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x20 -; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: v_mov_b32_e32 v0, 0 +; GFX10-NEXT: global_load_dwordx4 v[0:3], v0, s[2:3] offset:32 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: v_readfirstlane_b32 s0, v0 +; GFX10-NEXT: v_readfirstlane_b32 s1, v1 +; GFX10-NEXT: v_readfirstlane_b32 s2, v2 +; GFX10-NEXT: v_readfirstlane_b32 s3, v3 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: extractelement_sgpr_v4i128_idx2: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x20 -; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: v_mov_b32_e32 v0, 0 +; GFX11-NEXT: global_load_b128 v[0:3], v0, s[2:3] offset:32 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-NEXT: v_readfirstlane_b32 s2, v2 +; GFX11-NEXT: v_readfirstlane_b32 s3, v3 ; GFX11-NEXT: ; return to shader part epilog %vector = load <4 x i128>, ptr addrspace(4) %ptr %element = extractelement <4 x i128> %vector, i32 2 @@ -376,32 +513,63 @@ define amdgpu_ps i128 @extractelement_sgpr_v4i128_idx2(ptr addrspace(4) inreg %p define amdgpu_ps i128 @extractelement_sgpr_v4i128_idx3(ptr addrspace(4) inreg %ptr) { ; GFX9-LABEL: extractelement_sgpr_v4i128_idx3: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x30 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: global_load_dwordx4 v[0:3], v0, s[2:3] offset:48 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_readfirstlane_b32 s0, v0 +; GFX9-NEXT: v_readfirstlane_b32 s1, v1 +; GFX9-NEXT: v_readfirstlane_b32 s2, v2 +; GFX9-NEXT: v_readfirstlane_b32 s3, v3 ; GFX9-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: extractelement_sgpr_v4i128_idx3: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x30 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_add_u32 s0, s2, 48 +; GFX8-NEXT: s_addc_u32 s1, s3, 0 +; GFX8-NEXT: v_mov_b32_e32 v0, s0 +; GFX8-NEXT: v_mov_b32_e32 v1, s1 +; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: v_readfirstlane_b32 s0, v0 +; GFX8-NEXT: v_readfirstlane_b32 s1, v1 +; GFX8-NEXT: v_readfirstlane_b32 s2, v2 +; GFX8-NEXT: v_readfirstlane_b32 s3, v3 ; GFX8-NEXT: ; return to shader part epilog ; ; GFX7-LABEL: extractelement_sgpr_v4i128_idx3: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0xc -; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s0, s2 +; GFX7-NEXT: s_mov_b32 s1, s3 +; GFX7-NEXT: s_mov_b32 s2, -1 +; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:48 +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_readfirstlane_b32 s0, v0 +; GFX7-NEXT: v_readfirstlane_b32 s1, v1 +; GFX7-NEXT: v_readfirstlane_b32 s2, v2 +; GFX7-NEXT: v_readfirstlane_b32 s3, v3 ; GFX7-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: extractelement_sgpr_v4i128_idx3: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x30 -; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: v_mov_b32_e32 v0, 0 +; GFX10-NEXT: global_load_dwordx4 v[0:3], v0, s[2:3] offset:48 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: v_readfirstlane_b32 s0, v0 +; GFX10-NEXT: v_readfirstlane_b32 s1, v1 +; GFX10-NEXT: v_readfirstlane_b32 s2, v2 +; GFX10-NEXT: v_readfirstlane_b32 s3, v3 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: extractelement_sgpr_v4i128_idx3: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x30 -; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: v_mov_b32_e32 v0, 0 +; GFX11-NEXT: global_load_b128 v[0:3], v0, s[2:3] offset:48 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-NEXT: v_readfirstlane_b32 s2, v2 +; GFX11-NEXT: v_readfirstlane_b32 s3, v3 ; GFX11-NEXT: ; return to shader part epilog %vector = load <4 x i128>, ptr addrspace(4) %ptr %element = extractelement <4 x i128> %vector, i32 3 @@ -585,3 +753,5 @@ define i128 @extractelement_vgpr_v4i128_idx3(ptr addrspace(1) %ptr) { %element = extractelement <4 x i128> %vector, i32 3 ret i128 %element } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GCN: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll index 9539ec465e02f..91ee7642790fc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll @@ -11,28 +11,40 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr ; GFX8V4-LABEL: addrspacecast: ; GFX8V4: ; %bb.0: ; GFX8V4-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 -; GFX8V4-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x40 ; GFX8V4-NEXT: s_add_i32 s12, s12, s17 ; GFX8V4-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 -; GFX8V4-NEXT: s_mov_b32 flat_scratch_lo, s13 +; GFX8V4-NEXT: s_add_u32 s2, s6, 0x44 +; GFX8V4-NEXT: s_addc_u32 s3, s7, 0 +; GFX8V4-NEXT: v_mov_b32_e32 v0, s2 ; GFX8V4-NEXT: s_waitcnt lgkmcnt(0) -; GFX8V4-NEXT: s_mov_b32 s4, s0 -; GFX8V4-NEXT: s_mov_b32 s5, s3 ; GFX8V4-NEXT: s_cmp_lg_u32 s0, -1 -; GFX8V4-NEXT: s_cselect_b64 s[4:5], s[4:5], 0 -; GFX8V4-NEXT: s_mov_b32 s6, s1 -; GFX8V4-NEXT: s_mov_b32 s7, s2 +; GFX8V4-NEXT: v_mov_b32_e32 v1, s3 +; GFX8V4-NEXT: s_cselect_b32 s2, 1, 0 +; GFX8V4-NEXT: s_and_b32 s4, 1, s2 +; GFX8V4-NEXT: s_mov_b32 flat_scratch_lo, s13 +; GFX8V4-NEXT: s_add_u32 s2, s6, 64 +; GFX8V4-NEXT: flat_load_dword v3, v[0:1] +; GFX8V4-NEXT: s_addc_u32 s3, s7, 0 +; GFX8V4-NEXT: v_mov_b32_e32 v0, s2 +; GFX8V4-NEXT: v_mov_b32_e32 v1, s3 +; GFX8V4-NEXT: flat_load_dword v4, v[0:1] ; GFX8V4-NEXT: s_cmp_lg_u32 s1, -1 -; GFX8V4-NEXT: v_mov_b32_e32 v0, s4 -; GFX8V4-NEXT: s_cselect_b64 s[0:1], s[6:7], 0 -; GFX8V4-NEXT: v_mov_b32_e32 v2, 1 -; GFX8V4-NEXT: v_mov_b32_e32 v1, s5 -; GFX8V4-NEXT: flat_store_dword v[0:1], v2 -; GFX8V4-NEXT: s_waitcnt vmcnt(0) ; GFX8V4-NEXT: v_mov_b32_e32 v0, s0 -; GFX8V4-NEXT: v_mov_b32_e32 v2, 2 +; GFX8V4-NEXT: s_cselect_b32 s0, 1, 0 +; GFX8V4-NEXT: s_and_b32 s0, 1, s0 ; GFX8V4-NEXT: v_mov_b32_e32 v1, s1 -; GFX8V4-NEXT: flat_store_dword v[0:1], v2 +; GFX8V4-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 +; GFX8V4-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0 +; GFX8V4-NEXT: v_mov_b32_e32 v5, 1 +; GFX8V4-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GFX8V4-NEXT: v_cndmask_b32_e64 v2, 0, v1, s[0:1] +; GFX8V4-NEXT: s_waitcnt vmcnt(1) +; GFX8V4-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc +; GFX8V4-NEXT: flat_store_dword v[0:1], v5 +; GFX8V4-NEXT: s_waitcnt vmcnt(0) +; GFX8V4-NEXT: v_mov_b32_e32 v0, 2 +; GFX8V4-NEXT: v_cndmask_b32_e64 v3, 0, v4, s[0:1] +; GFX8V4-NEXT: flat_store_dword v[2:3], v0 ; GFX8V4-NEXT: s_waitcnt vmcnt(0) ; GFX8V4-NEXT: s_endpgm ; @@ -124,13 +136,15 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr define amdgpu_kernel void @llvm_amdgcn_is_shared(ptr %ptr) #0 { ; GFX8V4-LABEL: llvm_amdgcn_is_shared: ; GFX8V4: ; %bb.0: -; GFX8V4-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 -; GFX8V4-NEXT: s_waitcnt lgkmcnt(0) -; GFX8V4-NEXT: s_load_dword s0, s[6:7], 0x40 -; GFX8V4-NEXT: s_waitcnt lgkmcnt(0) -; GFX8V4-NEXT: s_cmp_eq_u32 s1, s0 -; GFX8V4-NEXT: s_cselect_b32 s0, 1, 0 +; GFX8V4-NEXT: s_add_u32 s0, s6, 64 +; GFX8V4-NEXT: s_addc_u32 s1, s7, 0 ; GFX8V4-NEXT: v_mov_b32_e32 v0, s0 +; GFX8V4-NEXT: v_mov_b32_e32 v1, s1 +; GFX8V4-NEXT: flat_load_dword v0, v[0:1] +; GFX8V4-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 +; GFX8V4-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8V4-NEXT: v_cmp_eq_u32_e32 vcc, s1, v0 +; GFX8V4-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; GFX8V4-NEXT: flat_store_dword v[0:1], v0 ; GFX8V4-NEXT: s_waitcnt vmcnt(0) ; GFX8V4-NEXT: s_endpgm @@ -180,13 +194,15 @@ define amdgpu_kernel void @llvm_amdgcn_is_shared(ptr %ptr) #0 { define amdgpu_kernel void @llvm_amdgcn_is_private(ptr %ptr) #0 { ; GFX8V4-LABEL: llvm_amdgcn_is_private: ; GFX8V4: ; %bb.0: -; GFX8V4-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 -; GFX8V4-NEXT: s_waitcnt lgkmcnt(0) -; GFX8V4-NEXT: s_load_dword s0, s[6:7], 0x44 -; GFX8V4-NEXT: s_waitcnt lgkmcnt(0) -; GFX8V4-NEXT: s_cmp_eq_u32 s1, s0 -; GFX8V4-NEXT: s_cselect_b32 s0, 1, 0 +; GFX8V4-NEXT: s_add_u32 s0, s6, 0x44 +; GFX8V4-NEXT: s_addc_u32 s1, s7, 0 ; GFX8V4-NEXT: v_mov_b32_e32 v0, s0 +; GFX8V4-NEXT: v_mov_b32_e32 v1, s1 +; GFX8V4-NEXT: flat_load_dword v0, v[0:1] +; GFX8V4-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 +; GFX8V4-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8V4-NEXT: v_cmp_eq_u32_e32 vcc, s1, v0 +; GFX8V4-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; GFX8V4-NEXT: flat_store_dword v[0:1], v0 ; GFX8V4-NEXT: s_waitcnt vmcnt(0) ; GFX8V4-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-scale-to-agpr.mir b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-scale-to-agpr.mir index 999ea42910d92..e35927e8bf00d 100644 --- a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-scale-to-agpr.mir +++ b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-scale-to-agpr.mir @@ -1,7 +1,9 @@ -# RUN: not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -run-pass=greedy,amdgpu-rewrite-agpr-copy-mfma -verify-machineinstrs -o - %s 2>&1 | FileCheck %s -# CHECK: Illegal virtual register for instruction -# CHECK: Expected a VGPR_32 register, but got a AGPR_32 register - +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -run-pass=greedy,amdgpu-rewrite-agpr-copy-mfma -verify-machineinstrs -o - %s | FileCheck %s +# CHECK: bb.1: +# CHECK: dead %{{[0-9]+}}:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %{{[0-9]+}}, %{{[0-9]+}}, %{{[0-9]+}}, 4, 4, %{{[0-9]+}}, %[[REG:[0-9]+]], 4, 0, implicit $mode, implicit $exec +# CHECK: %{{[0-9]+}}:agpr_32 = IMPLICIT_DEF +# CHECK: %[[REG]]:vgpr_32 = COPY %{{[0-9]+}} + # Test for issue in amdgpu-rewrite-agpr-copy-mfma, which reassigns scale operand # in vgpr_32 register to agpr_32, not permitted by instruction format. --- diff --git a/llvm/test/CodeGen/SPIRV/zero-length-array.ll b/llvm/test/CodeGen/SPIRV/zero-length-array.ll index 5fd94d25dfd87..cb34529ebfecd 100644 --- a/llvm/test/CodeGen/SPIRV/zero-length-array.ll +++ b/llvm/test/CodeGen/SPIRV/zero-length-array.ll @@ -1,7 +1,9 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - | FileCheck %s +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-vulkan-compute < %s | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - -filetype=obj | spirv-val %} -; Nothing is generated, but compilation doesn't crash. +; RUN: not llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown < %s 2>&1 | FileCheck -check-prefix=CHECK-ERR %s + +; For compute, nothing is generated, but compilation doesn't crash. ; CHECK: OpName %[[#FOO:]] "foo" ; CHECK: OpName %[[#RTM:]] "reg2mem alloca point" ; CHECK: %[[#INT:]] = OpTypeInt 32 0 @@ -11,6 +13,10 @@ ; CHECK-NEXT: OpReturn ; CHECK-NEXT: OpFunctionEnd + +; For non-compute, error. +; CHECK-ERR: LLVM ERROR: Runtime arrays are not allowed in non-shader SPIR-V modules + define spir_func void @foo() { entry: %i = alloca [0 x i32], align 4 diff --git a/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll b/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll index d076cb00ad7e0..706a7c34c3df5 100644 --- a/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll +++ b/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll @@ -66,9 +66,8 @@ define i32 @test(i8 zeroext %var_2, i16 signext %var_15, ptr %arr_60) { ; CHECK-NEXT: cset r6, ne ; CHECK-NEXT: strb r6, [r5] ; CHECK-NEXT: add.w r2, r2, #792 -; CHECK-NEXT: ldrb r6, [r3] +; CHECK-NEXT: ldrb r6, [r3], #2 ; CHECK-NEXT: adds r4, #8 -; CHECK-NEXT: adds r3, #2 ; CHECK-NEXT: cmp r6, #0 ; CHECK-NEXT: ite ne ; CHECK-NEXT: sxthne r6, r1 @@ -101,8 +100,7 @@ define i32 @test(i8 zeroext %var_2, i16 signext %var_15, ptr %arr_60) { ; CHECK-NEXT: cset r6, ne ; CHECK-NEXT: adds r4, #8 ; CHECK-NEXT: strb r6, [r5] -; CHECK-NEXT: ldrb r6, [r3] -; CHECK-NEXT: adds r3, #2 +; CHECK-NEXT: ldrb r6, [r3], #2 ; CHECK-NEXT: cmp r6, #0 ; CHECK-NEXT: ite ne ; CHECK-NEXT: sxthne r6, r1 @@ -134,8 +132,7 @@ define i32 @test(i8 zeroext %var_2, i16 signext %var_15, ptr %arr_60) { ; CHECK-NEXT: cset r4, ne ; CHECK-NEXT: add.w r11, r11, #8 ; CHECK-NEXT: strb r4, [r5] -; CHECK-NEXT: ldrb r4, [r3] -; CHECK-NEXT: adds r3, #2 +; CHECK-NEXT: ldrb r4, [r3], #2 ; CHECK-NEXT: cmp r4, #0 ; CHECK-NEXT: ite ne ; CHECK-NEXT: sxthne r4, r1 diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/strict-intrinsics.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/strict-intrinsics.ll index d19844c683a8f..9e42f3984c24d 100644 --- a/llvm/test/CodeGen/Thumb2/mve-intrinsics/strict-intrinsics.ll +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/strict-intrinsics.ll @@ -240,4 +240,93 @@ entry: ret <8 x half> %0 } -attributes #0 = { strictfp } \ No newline at end of file + +define arm_aapcs_vfpcc <8 x half> @test_vminnmq_f16(<8 x half> %a, <8 x half> %b) #0 { +; CHECK-LABEL: test_vminnmq_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmaxnm.f16 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %2 = tail call <8 x half> @llvm.arm.mve.vmaxnm.v8f16(<8 x half> %a, <8 x half> %b) + ret <8 x half> %2 +} + +define arm_aapcs_vfpcc <4 x float> @test_vminnmq_f32(<4 x float> %a, <4 x float> %b) #0 { +; CHECK-LABEL: test_vminnmq_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmaxnm.f32 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %2 = tail call <4 x float> @llvm.arm.mve.vmaxnm.v4f32(<4 x float> %a, <4 x float> %b) + ret <4 x float> %2 +} + +define arm_aapcs_vfpcc <8 x half> @test_vmaxnmq_f16(<8 x half> %a, <8 x half> %b) #0 { +; CHECK-LABEL: test_vmaxnmq_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmaxnm.f16 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %2 = tail call <8 x half> @llvm.arm.mve.vmaxnm.v8f16(<8 x half> %a, <8 x half> %b) + ret <8 x half> %2 +} + +define arm_aapcs_vfpcc <4 x float> @test_vmaxnmq_f32(<4 x float> %a, <4 x float> %b) #0 { +; CHECK-LABEL: test_vmaxnmq_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmaxnm.f32 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %2 = tail call <4 x float> @llvm.arm.mve.vmaxnm.v4f32(<4 x float> %a, <4 x float> %b) + ret <4 x float> %2 +} + +define arm_aapcs_vfpcc <8 x half> @test_vminnmaq_f16(<8 x half> %a, <8 x half> %b) #0 { +; CHECK-LABEL: test_vminnmaq_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmaxnma.f16 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> %a) + %1 = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> %b) + %2 = tail call <8 x half> @llvm.arm.mve.vmaxnm.v8f16(<8 x half> %0, <8 x half> %1) + ret <8 x half> %2 +} + +define arm_aapcs_vfpcc <4 x float> @test_vminnmaq_f32(<4 x float> %a, <4 x float> %b) #0 { +; CHECK-LABEL: test_vminnmaq_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmaxnma.f32 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> %a) + %1 = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> %b) + %2 = tail call <4 x float> @llvm.arm.mve.vmaxnm.v4f32(<4 x float> %0, <4 x float> %1) + ret <4 x float> %2 +} + +define arm_aapcs_vfpcc <8 x half> @test_vmaxnmaq_f16(<8 x half> %a, <8 x half> %b) #0 { +; CHECK-LABEL: test_vmaxnmaq_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmaxnma.f16 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> %a) + %1 = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> %b) + %2 = tail call <8 x half> @llvm.arm.mve.vmaxnm.v8f16(<8 x half> %0, <8 x half> %1) + ret <8 x half> %2 +} + +define arm_aapcs_vfpcc <4 x float> @test_vmaxnmaq_f32(<4 x float> %a, <4 x float> %b) #0 { +; CHECK-LABEL: test_vmaxnmaq_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmaxnma.f32 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> %a) + %1 = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> %b) + %2 = tail call <4 x float> @llvm.arm.mve.vmaxnm.v4f32(<4 x float> %0, <4 x float> %1) + ret <4 x float> %2 +} + +attributes #0 = { strictfp } diff --git a/llvm/test/CodeGen/WebAssembly/masked-shifts.ll b/llvm/test/CodeGen/WebAssembly/masked-shifts.ll index 368f30fd5d7ed..8f90fa68e8fbd 100644 --- a/llvm/test/CodeGen/WebAssembly/masked-shifts.ll +++ b/llvm/test/CodeGen/WebAssembly/masked-shifts.ll @@ -46,6 +46,21 @@ define i32 @sra_i32(i32 %v, i32 %x) { ret i32 %a } +define i64 @sra_i64_zext(i64 %v, i32 %x) { +; CHECK-LABEL: sra_i64_zext: +; CHECK: .functype sra_i64_zext (i64, i32) -> (i64) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i64.extend_i32_u +; CHECK-NEXT: i64.shr_s +; CHECK-NEXT: # fallthrough-return + %m = and i32 %x, 63 + %z = zext i32 %m to i64 + %a = ashr i64 %v, %z + ret i64 %a +} + define i32 @srl_i32(i32 %v, i32 %x) { ; CHECK-LABEL: srl_i32: ; CHECK: .functype srl_i32 (i32, i32) -> (i32) @@ -59,6 +74,21 @@ define i32 @srl_i32(i32 %v, i32 %x) { ret i32 %a } +define i64 @srl_i64_zext(i64 %v, i32 %x) { +; CHECK-LABEL: srl_i64_zext: +; CHECK: .functype srl_i64_zext (i64, i32) -> (i64) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i64.extend_i32_u +; CHECK-NEXT: i64.shr_u +; CHECK-NEXT: # fallthrough-return + %m = and i32 %x, 63 + %z = zext i32 %m to i64 + %a = lshr i64 %v, %z + ret i64 %a +} + define i64 @shl_i64(i64 %v, i64 %x) { ; CHECK-LABEL: shl_i64: ; CHECK: .functype shl_i64 (i64, i64) -> (i64) diff --git a/llvm/test/CodeGen/X86/combine-fcmp.ll b/llvm/test/CodeGen/X86/combine-fcmp.ll new file mode 100644 index 0000000000000..f2666f69949b7 --- /dev/null +++ b/llvm/test/CodeGen/X86/combine-fcmp.ll @@ -0,0 +1,330 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefixes=SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge | FileCheck %s --check-prefixes=AVX1OR2,AVX1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=AVX1OR2,AVX2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=AVX512 + +define i4 @concat_fcmp_v4f64_v2f64(<2 x double> %a0, <2 x double> %a1) { +; SSE-LABEL: concat_fcmp_v4f64_v2f64: +; SSE: # %bb.0: +; SSE-NEXT: xorpd %xmm2, %xmm2 +; SSE-NEXT: xorpd %xmm3, %xmm3 +; SSE-NEXT: cmpltpd %xmm0, %xmm3 +; SSE-NEXT: cmpltpd %xmm1, %xmm2 +; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,2],xmm2[0,2] +; SSE-NEXT: movmskps %xmm3, %eax +; SSE-NEXT: # kill: def $al killed $al killed $eax +; SSE-NEXT: retq +; +; AVX1OR2-LABEL: concat_fcmp_v4f64_v2f64: +; AVX1OR2: # %bb.0: +; AVX1OR2-NEXT: vxorpd %xmm2, %xmm2, %xmm2 +; AVX1OR2-NEXT: vcmpltpd %xmm0, %xmm2, %xmm0 +; AVX1OR2-NEXT: vcmpltpd %xmm1, %xmm2, %xmm1 +; AVX1OR2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] +; AVX1OR2-NEXT: vmovmskps %xmm0, %eax +; AVX1OR2-NEXT: # kill: def $al killed $al killed $eax +; AVX1OR2-NEXT: retq +; +; AVX512-LABEL: concat_fcmp_v4f64_v2f64: +; AVX512: # %bb.0: +; AVX512-NEXT: vxorpd %xmm2, %xmm2, %xmm2 +; AVX512-NEXT: vcmpltpd %xmm0, %xmm2, %k0 +; AVX512-NEXT: vcmpltpd %xmm1, %xmm2, %k1 +; AVX512-NEXT: kshiftlb $2, %k1, %k1 +; AVX512-NEXT: korw %k1, %k0, %k0 +; AVX512-NEXT: kmovd %k0, %eax +; AVX512-NEXT: # kill: def $al killed $al killed $eax +; AVX512-NEXT: retq + %v0 = fcmp ogt <2 x double> %a0, zeroinitializer + %v1 = fcmp ogt <2 x double> %a1, zeroinitializer + %v = shufflevector <2 x i1> %v0, <2 x i1> %v1, <4 x i32> + %r = bitcast <4 x i1> %v to i4 + ret i4 %r +} + +define i8 @concat_fcmp_v8f32_v4f32(<4 x float> %a0, <4 x float> %a1) { +; SSE-LABEL: concat_fcmp_v8f32_v4f32: +; SSE: # %bb.0: +; SSE-NEXT: xorps %xmm2, %xmm2 +; SSE-NEXT: cmpeqps %xmm2, %xmm0 +; SSE-NEXT: cmpeqps %xmm2, %xmm1 +; SSE-NEXT: packssdw %xmm1, %xmm0 +; SSE-NEXT: packsswb %xmm0, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: # kill: def $al killed $al killed $eax +; SSE-NEXT: retq +; +; AVX1OR2-LABEL: concat_fcmp_v8f32_v4f32: +; AVX1OR2: # %bb.0: +; AVX1OR2-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; AVX1OR2-NEXT: vcmpeqps %xmm2, %xmm0, %xmm0 +; AVX1OR2-NEXT: vcmpeqps %xmm2, %xmm1, %xmm1 +; AVX1OR2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 +; AVX1OR2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 +; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax +; AVX1OR2-NEXT: # kill: def $al killed $al killed $eax +; AVX1OR2-NEXT: retq +; +; AVX512-LABEL: concat_fcmp_v8f32_v4f32: +; AVX512: # %bb.0: +; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX512-NEXT: vcmpeqps %ymm1, %ymm0, %k0 +; AVX512-NEXT: kmovd %k0, %eax +; AVX512-NEXT: # kill: def $al killed $al killed $eax +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq + %v0 = fcmp oeq <4 x float> %a0, zeroinitializer + %v1 = fcmp oeq <4 x float> %a1, zeroinitializer + %v = shufflevector <4 x i1> %v0, <4 x i1> %v1, <8 x i32> + %r = bitcast <8 x i1> %v to i8 + ret i8 %r +} + +define i8 @concat_fcmp_v8f64_v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> %a3) { +; SSE-LABEL: concat_fcmp_v8f64_v2f64: +; SSE: # %bb.0: +; SSE-NEXT: xorpd %xmm4, %xmm4 +; SSE-NEXT: cmpltpd %xmm4, %xmm0 +; SSE-NEXT: cmpltpd %xmm4, %xmm1 +; SSE-NEXT: packssdw %xmm1, %xmm0 +; SSE-NEXT: cmpltpd %xmm4, %xmm2 +; SSE-NEXT: cmpltpd %xmm4, %xmm3 +; SSE-NEXT: packssdw %xmm3, %xmm2 +; SSE-NEXT: packssdw %xmm0, %xmm0 +; SSE-NEXT: packssdw %xmm2, %xmm2 +; SSE-NEXT: packsswb %xmm2, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3] +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: # kill: def $al killed $al killed $eax +; SSE-NEXT: retq +; +; AVX1OR2-LABEL: concat_fcmp_v8f64_v2f64: +; AVX1OR2: # %bb.0: +; AVX1OR2-NEXT: vxorpd %xmm4, %xmm4, %xmm4 +; AVX1OR2-NEXT: vcmpltpd %xmm4, %xmm0, %xmm0 +; AVX1OR2-NEXT: vcmpltpd %xmm4, %xmm1, %xmm1 +; AVX1OR2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 +; AVX1OR2-NEXT: vcmpltpd %xmm4, %xmm2, %xmm1 +; AVX1OR2-NEXT: vcmpltpd %xmm4, %xmm3, %xmm2 +; AVX1OR2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 +; AVX1OR2-NEXT: vpackssdw %xmm1, %xmm1, %xmm1 +; AVX1OR2-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 +; AVX1OR2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 +; AVX1OR2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,0,3] +; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax +; AVX1OR2-NEXT: # kill: def $al killed $al killed $eax +; AVX1OR2-NEXT: retq +; +; AVX512-LABEL: concat_fcmp_v8f64_v2f64: +; AVX512: # %bb.0: +; AVX512-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2 +; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 +; AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX512-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX512-NEXT: vcmpltpd %zmm1, %zmm0, %k0 +; AVX512-NEXT: kmovd %k0, %eax +; AVX512-NEXT: # kill: def $al killed $al killed $eax +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq + %v0 = fcmp olt <2 x double> %a0, zeroinitializer + %v1 = fcmp olt <2 x double> %a1, zeroinitializer + %v2 = fcmp olt <2 x double> %a2, zeroinitializer + %v3 = fcmp olt <2 x double> %a3, zeroinitializer + %v01 = shufflevector <2 x i1> %v0, <2 x i1> %v1, <4 x i32> + %v23 = shufflevector <2 x i1> %v2, <2 x i1> %v3, <4 x i32> + %v = shufflevector <4 x i1> %v01, <4 x i1> %v23, <8 x i32> + %r = bitcast <8 x i1> %v to i8 + ret i8 %r +} + +define i16 @concat_fcmp_v16f32_v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> %a3) { +; SSE-LABEL: concat_fcmp_v16f32_v4f32: +; SSE: # %bb.0: +; SSE-NEXT: xorps %xmm4, %xmm4 +; SSE-NEXT: xorps %xmm5, %xmm5 +; SSE-NEXT: cmpleps %xmm0, %xmm5 +; SSE-NEXT: xorps %xmm0, %xmm0 +; SSE-NEXT: cmpleps %xmm1, %xmm0 +; SSE-NEXT: packssdw %xmm0, %xmm5 +; SSE-NEXT: xorps %xmm0, %xmm0 +; SSE-NEXT: cmpleps %xmm2, %xmm0 +; SSE-NEXT: cmpleps %xmm3, %xmm4 +; SSE-NEXT: packssdw %xmm4, %xmm0 +; SSE-NEXT: packsswb %xmm0, %xmm5 +; SSE-NEXT: pmovmskb %xmm5, %eax +; SSE-NEXT: # kill: def $ax killed $ax killed $eax +; SSE-NEXT: retq +; +; AVX1OR2-LABEL: concat_fcmp_v16f32_v4f32: +; AVX1OR2: # %bb.0: +; AVX1OR2-NEXT: vxorps %xmm4, %xmm4, %xmm4 +; AVX1OR2-NEXT: vcmpleps %xmm0, %xmm4, %xmm0 +; AVX1OR2-NEXT: vcmpleps %xmm1, %xmm4, %xmm1 +; AVX1OR2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 +; AVX1OR2-NEXT: vcmpleps %xmm2, %xmm4, %xmm1 +; AVX1OR2-NEXT: vcmpleps %xmm3, %xmm4, %xmm2 +; AVX1OR2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 +; AVX1OR2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 +; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax +; AVX1OR2-NEXT: # kill: def $ax killed $ax killed $eax +; AVX1OR2-NEXT: retq +; +; AVX512-LABEL: concat_fcmp_v16f32_v4f32: +; AVX512: # %bb.0: +; AVX512-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2 +; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 +; AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX512-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX512-NEXT: vcmpleps %zmm0, %zmm1, %k0 +; AVX512-NEXT: kmovd %k0, %eax +; AVX512-NEXT: # kill: def $ax killed $ax killed $eax +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq + %v0 = fcmp oge <4 x float> %a0, zeroinitializer + %v1 = fcmp oge <4 x float> %a1, zeroinitializer + %v2 = fcmp oge <4 x float> %a2, zeroinitializer + %v3 = fcmp oge <4 x float> %a3, zeroinitializer + %v01 = shufflevector <4 x i1> %v0, <4 x i1> %v1, <8 x i32> + %v23 = shufflevector <4 x i1> %v2, <4 x i1> %v3, <8 x i32> + %v = shufflevector <8 x i1> %v01, <8 x i1> %v23, <16 x i32> + %r = bitcast <16 x i1> %v to i16 + ret i16 %r +} + +define i8 @concat_fcmp_v8f64_v4f64(<4 x double> %a0, <4 x double> %a1) { +; SSE-LABEL: concat_fcmp_v8f64_v4f64: +; SSE: # %bb.0: +; SSE-NEXT: xorpd %xmm4, %xmm4 +; SSE-NEXT: movapd %xmm1, %xmm5 +; SSE-NEXT: cmpneqpd %xmm4, %xmm5 +; SSE-NEXT: cmpordpd %xmm4, %xmm1 +; SSE-NEXT: andpd %xmm5, %xmm1 +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] +; SSE-NEXT: movapd %xmm0, %xmm5 +; SSE-NEXT: cmpneqpd %xmm4, %xmm5 +; SSE-NEXT: cmpordpd %xmm4, %xmm0 +; SSE-NEXT: andpd %xmm5, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSE-NEXT: movapd %xmm3, %xmm1 +; SSE-NEXT: cmpneqpd %xmm4, %xmm1 +; SSE-NEXT: cmpordpd %xmm4, %xmm3 +; SSE-NEXT: andpd %xmm1, %xmm3 +; SSE-NEXT: movapd %xmm2, %xmm1 +; SSE-NEXT: cmpneqpd %xmm4, %xmm1 +; SSE-NEXT: cmpordpd %xmm4, %xmm2 +; SSE-NEXT: andpd %xmm1, %xmm2 +; SSE-NEXT: packssdw %xmm3, %xmm2 +; SSE-NEXT: packssdw %xmm2, %xmm2 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7] +; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; SSE-NEXT: packsswb %xmm0, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: # kill: def $al killed $al killed $eax +; SSE-NEXT: retq +; +; AVX1-LABEL: concat_fcmp_v8f64_v4f64: +; AVX1: # %bb.0: +; AVX1-NEXT: vxorpd %xmm2, %xmm2, %xmm2 +; AVX1-NEXT: vcmpneq_oqpd %ymm2, %ymm0, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2] +; AVX1-NEXT: vcmpneq_oqpd %ymm2, %ymm1, %ymm1 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2] +; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 +; AVX1-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 +; AVX1-NEXT: vpmovmskb %xmm0, %eax +; AVX1-NEXT: # kill: def $al killed $al killed $eax +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: concat_fcmp_v8f64_v4f64: +; AVX2: # %bb.0: +; AVX2-NEXT: vxorpd %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vcmpneq_oqpd %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2] +; AVX2-NEXT: vcmpneq_oqpd %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2] +; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u] +; AVX2-NEXT: vpmovmskb %xmm0, %eax +; AVX2-NEXT: # kill: def $al killed $al killed $eax +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512-LABEL: concat_fcmp_v8f64_v4f64: +; AVX512: # %bb.0: +; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX512-NEXT: vcmpneq_oqpd %zmm1, %zmm0, %k0 +; AVX512-NEXT: kmovd %k0, %eax +; AVX512-NEXT: # kill: def $al killed $al killed $eax +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq + %v0 = fcmp one <4 x double> %a0, zeroinitializer + %v1 = fcmp one <4 x double> %a1, zeroinitializer + %v = shufflevector <4 x i1> %v0, <4 x i1> %v1, <8 x i32> + %r = bitcast <8 x i1> %v to i8 + ret i8 %r +} + +define i16 @concat_fcmp_v16f32_v8f32(<8 x float> %a0, <8 x float> %a1) { +; SSE-LABEL: concat_fcmp_v16f32_v8f32: +; SSE: # %bb.0: +; SSE-NEXT: xorps %xmm4, %xmm4 +; SSE-NEXT: cmpleps %xmm4, %xmm1 +; SSE-NEXT: cmpleps %xmm4, %xmm0 +; SSE-NEXT: packssdw %xmm1, %xmm0 +; SSE-NEXT: cmpleps %xmm4, %xmm3 +; SSE-NEXT: cmpleps %xmm4, %xmm2 +; SSE-NEXT: packssdw %xmm3, %xmm2 +; SSE-NEXT: packsswb %xmm2, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: # kill: def $ax killed $ax killed $eax +; SSE-NEXT: retq +; +; AVX1OR2-LABEL: concat_fcmp_v16f32_v8f32: +; AVX1OR2: # %bb.0: +; AVX1OR2-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; AVX1OR2-NEXT: vcmpleps %ymm2, %ymm0, %ymm0 +; AVX1OR2-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX1OR2-NEXT: vpackssdw %xmm3, %xmm0, %xmm0 +; AVX1OR2-NEXT: vcmpleps %ymm2, %ymm1, %ymm1 +; AVX1OR2-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1OR2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 +; AVX1OR2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 +; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax +; AVX1OR2-NEXT: # kill: def $ax killed $ax killed $eax +; AVX1OR2-NEXT: vzeroupper +; AVX1OR2-NEXT: retq +; +; AVX512-LABEL: concat_fcmp_v16f32_v8f32: +; AVX512: # %bb.0: +; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX512-NEXT: vcmpleps %zmm1, %zmm0, %k0 +; AVX512-NEXT: kmovd %k0, %eax +; AVX512-NEXT: # kill: def $ax killed $ax killed $eax +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq + %v0 = fcmp ole <8 x float> %a0, zeroinitializer + %v1 = fcmp ole <8 x float> %a1, zeroinitializer + %v = shufflevector <8 x i1> %v0, <8 x i1> %v1, <16 x i32> + %r = bitcast <16 x i1> %v to i16 + ret i16 %r +} diff --git a/llvm/test/Transforms/Attributor/nofpclass.ll b/llvm/test/Transforms/Attributor/nofpclass.ll index a9ebdaa397015..d82dc412f5e36 100644 --- a/llvm/test/Transforms/Attributor/nofpclass.ll +++ b/llvm/test/Transforms/Attributor/nofpclass.ll @@ -2667,15 +2667,10 @@ define [4 x float] @constant_aggregate_zero() { } define @scalable_splat_pnorm() { -; CHECK-CV: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) -; CHECK-CV-LABEL: define noundef @scalable_splat_pnorm -; CHECK-CV-SAME: () #[[ATTR3]] { -; CHECK-CV-NEXT: ret splat (float 1.000000e+00) -; -; CHECK-CI: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) -; CHECK-CI-LABEL: define noundef nofpclass(nan inf zero sub nnorm) @scalable_splat_pnorm -; CHECK-CI-SAME: () #[[ATTR3]] { -; CHECK-CI-NEXT: ret splat (float 1.000000e+00) +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define noundef nofpclass(nan inf zero sub nnorm) @scalable_splat_pnorm +; CHECK-SAME: () #[[ATTR3]] { +; CHECK-NEXT: ret splat (float 1.000000e+00) ; ret splat (float 1.0) } @@ -2689,6 +2684,19 @@ define @scalable_splat_zero() { ret zeroinitializer } +define @scalable_splat_nnan(float nofpclass(nan) %x) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define nofpclass(nan) @scalable_splat_nnan +; CHECK-SAME: (float nofpclass(nan) [[X:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: [[HEAD:%.*]] = insertelement poison, float [[X]], i32 0 +; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector [[HEAD]], poison, zeroinitializer +; CHECK-NEXT: ret [[SPLAT]] +; + %head = insertelement poison, float %x, i32 0 + %splat = shufflevector %head, poison, zeroinitializer + ret %splat +} + ; Verify we do not derive 'nofpclass(inf zero sub norm)' for the argument __x. ; See https://github.com/llvm/llvm-project/issues/78507 @@ -2989,5 +2997,7 @@ attributes #5 = { "denormal-fp-math"="ieee,positive-zero" } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; CGSCC-CI: {{.*}} ; CGSCC-CV: {{.*}} +; CHECK-CI: {{.*}} +; CHECK-CV: {{.*}} ; TUNIT-CI: {{.*}} ; TUNIT-CV: {{.*}} diff --git a/llvm/test/Transforms/IndVarSimplify/AArch64/widen-loop-comp.ll b/llvm/test/Transforms/IndVarSimplify/AArch64/widen-loop-comp.ll index 257816650017a..1a6400997f080 100644 --- a/llvm/test/Transforms/IndVarSimplify/AArch64/widen-loop-comp.ll +++ b/llvm/test/Transforms/IndVarSimplify/AArch64/widen-loop-comp.ll @@ -97,7 +97,7 @@ define void @test2(ptr %a, ptr %b, i8 %limit, i1 %arg) { ; CHECK-LABEL: @test2( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[LIMIT:%.*]] to i32 -; CHECK-NEXT: br i1 %arg, label [[FOR_COND1_PREHEADER_PREHEADER:%.*]], label [[FOR_COND1_PREHEADER_US_PREHEADER:%.*]] +; CHECK-NEXT: br i1 [[ARG:%.*]], label [[FOR_COND1_PREHEADER_PREHEADER:%.*]], label [[FOR_COND1_PREHEADER_US_PREHEADER:%.*]] ; CHECK: for.cond1.preheader.us.preheader: ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[CONV]], i32 1) ; CHECK-NEXT: br label [[FOR_COND1_PREHEADER_US:%.*]] diff --git a/llvm/test/Transforms/IndVarSimplify/ARM/code-size.ll b/llvm/test/Transforms/IndVarSimplify/ARM/code-size.ll index 2003b1a72206d..acba88ef5a54d 100644 --- a/llvm/test/Transforms/IndVarSimplify/ARM/code-size.ll +++ b/llvm/test/Transforms/IndVarSimplify/ARM/code-size.ll @@ -4,13 +4,12 @@ define i32 @remove_loop(i32 %size) #0 { ; CHECK-V8M-LABEL: @remove_loop( -; CHECK-V8M-SAME: i32 [[SIZE:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-V8M-NEXT: entry: -; CHECK-V8M-NEXT: br label %[[WHILE_COND:.*]] +; CHECK-V8M-NEXT: br label [[WHILE_COND:%.*]] ; CHECK-V8M: while.cond: -; CHECK-V8M-NEXT: br i1 false, label %[[WHILE_COND]], label %[[WHILE_END:.*]] +; CHECK-V8M-NEXT: br i1 false, label [[WHILE_COND]], label [[WHILE_END:%.*]] ; CHECK-V8M: while.end: -; CHECK-V8M-NEXT: [[TMP0:%.*]] = add i32 [[SIZE]], 31 +; CHECK-V8M-NEXT: [[TMP0:%.*]] = add i32 [[SIZE:%.*]], 31 ; CHECK-V8M-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SIZE]], i32 31) ; CHECK-V8M-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[UMIN]] ; CHECK-V8M-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 5 @@ -19,13 +18,12 @@ define i32 @remove_loop(i32 %size) #0 { ; CHECK-V8M-NEXT: ret i32 [[TMP4]] ; ; CHECK-V8A-LABEL: @remove_loop( -; CHECK-V8A-SAME: i32 [[SIZE:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-V8A-NEXT: entry: -; CHECK-V8A-NEXT: br label %[[WHILE_COND:.*]] +; CHECK-V8A-NEXT: br label [[WHILE_COND:%.*]] ; CHECK-V8A: while.cond: -; CHECK-V8A-NEXT: br i1 false, label %[[WHILE_COND]], label %[[WHILE_END:.*]] +; CHECK-V8A-NEXT: br i1 false, label [[WHILE_COND]], label [[WHILE_END:%.*]] ; CHECK-V8A: while.end: -; CHECK-V8A-NEXT: [[TMP0:%.*]] = add i32 [[SIZE]], 31 +; CHECK-V8A-NEXT: [[TMP0:%.*]] = add i32 [[SIZE:%.*]], 31 ; CHECK-V8A-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SIZE]], i32 31) ; CHECK-V8A-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[UMIN]] ; CHECK-V8A-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 5 diff --git a/llvm/test/Transforms/IndVarSimplify/X86/iv-widen.ll b/llvm/test/Transforms/IndVarSimplify/X86/iv-widen.ll index cc0f2587266a2..45bb66d1d7d80 100644 --- a/llvm/test/Transforms/IndVarSimplify/X86/iv-widen.ll +++ b/llvm/test/Transforms/IndVarSimplify/X86/iv-widen.ll @@ -16,7 +16,7 @@ declare void @use(i64 %x) define void @loop_0(ptr %a, i1 %arg) { ; CHECK-LABEL: @loop_0( ; CHECK-NEXT: Prologue: -; CHECK-NEXT: br i1 %arg, label [[B18_PREHEADER:%.*]], label [[B6:%.*]] +; CHECK-NEXT: br i1 [[ARG:%.*]], label [[B18_PREHEADER:%.*]], label [[B6:%.*]] ; CHECK: B18.preheader: ; CHECK-NEXT: br label [[B18:%.*]] ; CHECK: B18: @@ -70,7 +70,7 @@ exit24: ; preds = %B18 define void @loop_0_dead(ptr %a, i1 %arg) { ; CHECK-LABEL: @loop_0_dead( ; CHECK-NEXT: Prologue: -; CHECK-NEXT: br i1 %arg, label [[B18_PREHEADER:%.*]], label [[B6:%.*]] +; CHECK-NEXT: br i1 [[ARG:%.*]], label [[B18_PREHEADER:%.*]], label [[B6:%.*]] ; CHECK: B18.preheader: ; CHECK-NEXT: br label [[B18:%.*]] ; CHECK: B18: diff --git a/llvm/test/Transforms/IndVarSimplify/X86/pr59615.ll b/llvm/test/Transforms/IndVarSimplify/X86/pr59615.ll index 17b7b9d40b07a..5ecb684b8b2f5 100644 --- a/llvm/test/Transforms/IndVarSimplify/X86/pr59615.ll +++ b/llvm/test/Transforms/IndVarSimplify/X86/pr59615.ll @@ -7,7 +7,7 @@ target triple = "x86_64-unknown-linux-gnu" define void @test() { ; CHECK-LABEL: @test( ; CHECK-NEXT: bb: -; CHECK-NEXT: [[VAR:%.*]] = load atomic i32, ptr addrspace(1) poison unordered, align 8, !range [[RNG0:![0-9]+]], !invariant.load !1, !noundef !1 +; CHECK-NEXT: [[VAR:%.*]] = load atomic i32, ptr addrspace(1) poison unordered, align 8, !range [[RNG0:![0-9]+]], !invariant.load [[META1:![0-9]+]], !noundef [[META1]] ; CHECK-NEXT: [[VAR2:%.*]] = icmp eq i32 [[VAR]], 0 ; CHECK-NEXT: br i1 [[VAR2]], label [[BB18:%.*]], label [[BB19:%.*]] ; CHECK: bb3: @@ -16,7 +16,7 @@ define void @test() { ; CHECK: bb7: ; CHECK-NEXT: ret void ; CHECK: bb8: -; CHECK-NEXT: [[VAR9:%.*]] = load atomic i32, ptr addrspace(1) poison unordered, align 8, !range [[RNG0]], !invariant.load !1, !noundef !1 +; CHECK-NEXT: [[VAR9:%.*]] = load atomic i32, ptr addrspace(1) poison unordered, align 8, !range [[RNG0]], !invariant.load [[META1]], !noundef [[META1]] ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[VAR9]] to i64 ; CHECK-NEXT: [[VAR10:%.*]] = icmp ult i64 [[INDVARS_IV]], [[TMP0]] ; CHECK-NEXT: br i1 [[VAR10]], label [[BB12]], label [[BB11:%.*]] diff --git a/llvm/test/Transforms/IndVarSimplify/debugloc-rem-subst.ll b/llvm/test/Transforms/IndVarSimplify/debugloc-rem-subst.ll index 121eec75c1b3c..4502416a19477 100644 --- a/llvm/test/Transforms/IndVarSimplify/debugloc-rem-subst.ll +++ b/llvm/test/Transforms/IndVarSimplify/debugloc-rem-subst.ll @@ -51,7 +51,7 @@ bb2: ; preds = %bb2, %bb1 !8 = !DILocation(line: 1, column: 1, scope: !5) ;. ; CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C, file: [[META1:![0-9]+]], producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug) -; CHECK: [[META1]] = !DIFile(filename: "llvm/test/Transforms/IndVarSimplify/debugloc-rem-subst.ll", directory: {{.*}}) +; CHECK: [[META1]] = !DIFile(filename: "{{.*}}debugloc-rem-subst.ll", directory: {{.*}}) ; CHECK: [[DBG5]] = distinct !DISubprogram(name: "widget", linkageName: "widget", scope: null, file: [[META1]], line: 1, type: [[META6:![0-9]+]], scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META0]]) ; CHECK: [[META6]] = !DISubroutineType(types: [[META7:![0-9]+]]) ; CHECK: [[META7]] = !{} diff --git a/llvm/test/Transforms/IndVarSimplify/dont-recompute.ll b/llvm/test/Transforms/IndVarSimplify/dont-recompute.ll index b4cd98cd234f0..6a809fe45d660 100644 --- a/llvm/test/Transforms/IndVarSimplify/dont-recompute.ll +++ b/llvm/test/Transforms/IndVarSimplify/dont-recompute.ll @@ -211,7 +211,7 @@ define void @test6(i32 %m, ptr %p) nounwind uwtable { ; CHECK-NEXT: [[ADD]] = add i32 [[A_05]], [[M:%.*]] ; CHECK-NEXT: [[SOFT_USE:%.*]] = add i32 [[ADD]], 123 ; CHECK-NEXT: [[PIDX:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 [[ADD]] -; CHECK-NEXT: store i32 [[SOFT_USE]], ptr [[PIDX]] +; CHECK-NEXT: store i32 [[SOFT_USE]], ptr [[PIDX]], align 4 ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_06]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 186 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] diff --git a/llvm/test/Transforms/IndVarSimplify/eliminate-exit.ll b/llvm/test/Transforms/IndVarSimplify/eliminate-exit.ll index b24650830778f..488aed2ba0211 100644 --- a/llvm/test/Transforms/IndVarSimplify/eliminate-exit.ll +++ b/llvm/test/Transforms/IndVarSimplify/eliminate-exit.ll @@ -221,6 +221,220 @@ exit: } define void @many_exits([100 x i64] %len) { +; CHECK-LABEL: @many_exits( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LEN1:%.*]] = extractvalue [100 x i64] [[LEN:%.*]], 1 +; CHECK-NEXT: [[LEN2:%.*]] = extractvalue [100 x i64] [[LEN]], 2 +; CHECK-NEXT: [[LEN3:%.*]] = extractvalue [100 x i64] [[LEN]], 3 +; CHECK-NEXT: [[LEN4:%.*]] = extractvalue [100 x i64] [[LEN]], 4 +; CHECK-NEXT: [[LEN5:%.*]] = extractvalue [100 x i64] [[LEN]], 5 +; CHECK-NEXT: [[LEN6:%.*]] = extractvalue [100 x i64] [[LEN]], 6 +; CHECK-NEXT: [[LEN7:%.*]] = extractvalue [100 x i64] [[LEN]], 7 +; CHECK-NEXT: [[LEN8:%.*]] = extractvalue [100 x i64] [[LEN]], 8 +; CHECK-NEXT: [[LEN9:%.*]] = extractvalue [100 x i64] [[LEN]], 9 +; CHECK-NEXT: [[LEN10:%.*]] = extractvalue [100 x i64] [[LEN]], 10 +; CHECK-NEXT: [[LEN11:%.*]] = extractvalue [100 x i64] [[LEN]], 11 +; CHECK-NEXT: [[LEN12:%.*]] = extractvalue [100 x i64] [[LEN]], 12 +; CHECK-NEXT: [[LEN13:%.*]] = extractvalue [100 x i64] [[LEN]], 13 +; CHECK-NEXT: [[LEN14:%.*]] = extractvalue [100 x i64] [[LEN]], 14 +; CHECK-NEXT: [[LEN15:%.*]] = extractvalue [100 x i64] [[LEN]], 15 +; CHECK-NEXT: [[LEN16:%.*]] = extractvalue [100 x i64] [[LEN]], 16 +; CHECK-NEXT: [[LEN17:%.*]] = extractvalue [100 x i64] [[LEN]], 17 +; CHECK-NEXT: [[LEN18:%.*]] = extractvalue [100 x i64] [[LEN]], 18 +; CHECK-NEXT: [[LEN19:%.*]] = extractvalue [100 x i64] [[LEN]], 19 +; CHECK-NEXT: [[LEN20:%.*]] = extractvalue [100 x i64] [[LEN]], 20 +; CHECK-NEXT: [[LEN21:%.*]] = extractvalue [100 x i64] [[LEN]], 21 +; CHECK-NEXT: [[LEN22:%.*]] = extractvalue [100 x i64] [[LEN]], 22 +; CHECK-NEXT: [[LEN23:%.*]] = extractvalue [100 x i64] [[LEN]], 23 +; CHECK-NEXT: [[LEN24:%.*]] = extractvalue [100 x i64] [[LEN]], 24 +; CHECK-NEXT: [[LEN25:%.*]] = extractvalue [100 x i64] [[LEN]], 25 +; CHECK-NEXT: [[LEN26:%.*]] = extractvalue [100 x i64] [[LEN]], 26 +; CHECK-NEXT: [[LEN27:%.*]] = extractvalue [100 x i64] [[LEN]], 27 +; CHECK-NEXT: [[LEN28:%.*]] = extractvalue [100 x i64] [[LEN]], 28 +; CHECK-NEXT: [[LEN29:%.*]] = extractvalue [100 x i64] [[LEN]], 29 +; CHECK-NEXT: [[LEN30:%.*]] = extractvalue [100 x i64] [[LEN]], 30 +; CHECK-NEXT: [[LEN31:%.*]] = extractvalue [100 x i64] [[LEN]], 31 +; CHECK-NEXT: [[LEN32:%.*]] = extractvalue [100 x i64] [[LEN]], 32 +; CHECK-NEXT: [[LEN33:%.*]] = extractvalue [100 x i64] [[LEN]], 33 +; CHECK-NEXT: [[LEN34:%.*]] = extractvalue [100 x i64] [[LEN]], 34 +; CHECK-NEXT: [[LEN35:%.*]] = extractvalue [100 x i64] [[LEN]], 35 +; CHECK-NEXT: [[LEN36:%.*]] = extractvalue [100 x i64] [[LEN]], 36 +; CHECK-NEXT: [[LEN37:%.*]] = extractvalue [100 x i64] [[LEN]], 37 +; CHECK-NEXT: [[LEN38:%.*]] = extractvalue [100 x i64] [[LEN]], 38 +; CHECK-NEXT: [[LEN39:%.*]] = extractvalue [100 x i64] [[LEN]], 39 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[LEN0:%.*]] = extractvalue [100 x i64] [[LEN]], 0 +; CHECK-NEXT: [[EARLY0:%.*]] = icmp eq i64 [[IV]], [[LEN0]] +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: br i1 [[EARLY0]], label [[EXIT:%.*]], label [[CONT0:%.*]] +; CHECK: cont0: +; CHECK-NEXT: [[EARLY1:%.*]] = icmp eq i64 [[IV]], [[LEN1]] +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: br i1 [[EARLY1]], label [[EXIT]], label [[CONT1:%.*]] +; CHECK: cont1: +; CHECK-NEXT: [[EARLY2:%.*]] = icmp eq i64 [[IV]], [[LEN2]] +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: br i1 [[EARLY2]], label [[EXIT]], label [[CONT2:%.*]] +; CHECK: cont2: +; CHECK-NEXT: [[EARLY3:%.*]] = icmp eq i64 [[IV]], [[LEN3]] +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: br i1 [[EARLY3]], label [[EXIT]], label [[CONT3:%.*]] +; CHECK: cont3: +; CHECK-NEXT: [[EARLY4:%.*]] = icmp eq i64 [[IV]], [[LEN4]] +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: br i1 [[EARLY4]], label [[EXIT]], label [[CONT4:%.*]] +; CHECK: cont4: +; CHECK-NEXT: [[EARLY5:%.*]] = icmp eq i64 [[IV]], [[LEN5]] +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: br i1 [[EARLY5]], label [[EXIT]], label [[CONT5:%.*]] +; CHECK: cont5: +; CHECK-NEXT: [[EARLY6:%.*]] = icmp eq i64 [[IV]], [[LEN6]] +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: br i1 [[EARLY6]], label [[EXIT]], label [[CONT6:%.*]] +; CHECK: cont6: +; CHECK-NEXT: [[EARLY7:%.*]] = icmp eq i64 [[IV]], [[LEN7]] +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: br i1 [[EARLY7]], label [[EXIT]], label [[CONT7:%.*]] +; CHECK: cont7: +; CHECK-NEXT: [[EARLY8:%.*]] = icmp eq i64 [[IV]], [[LEN8]] +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: br i1 [[EARLY8]], label [[EXIT]], label [[CONT8:%.*]] +; CHECK: cont8: +; CHECK-NEXT: [[EARLY9:%.*]] = icmp eq i64 [[IV]], [[LEN9]] +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: br i1 [[EARLY9]], label [[EXIT]], label [[CONT9:%.*]] +; CHECK: cont9: +; CHECK-NEXT: [[EARLY10:%.*]] = icmp eq i64 [[IV]], [[LEN10]] +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: br i1 [[EARLY10]], label [[EXIT]], label [[CONT10:%.*]] +; CHECK: cont10: +; CHECK-NEXT: [[EARLY11:%.*]] = icmp eq i64 [[IV]], [[LEN11]] +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: br i1 [[EARLY11]], label [[EXIT]], label [[CONT11:%.*]] +; CHECK: cont11: +; CHECK-NEXT: [[EARLY12:%.*]] = icmp eq i64 [[IV]], [[LEN12]] +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: br i1 [[EARLY12]], label [[EXIT]], label [[CONT12:%.*]] +; CHECK: cont12: +; CHECK-NEXT: [[EARLY13:%.*]] = icmp eq i64 [[IV]], [[LEN13]] +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: br i1 [[EARLY13]], label [[EXIT]], label [[CONT13:%.*]] +; CHECK: cont13: +; CHECK-NEXT: [[EARLY14:%.*]] = icmp eq i64 [[IV]], [[LEN14]] +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: br i1 [[EARLY14]], label [[EXIT]], label [[CONT14:%.*]] +; CHECK: cont14: +; CHECK-NEXT: [[EARLY15:%.*]] = icmp eq i64 [[IV]], [[LEN15]] +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: br i1 [[EARLY15]], label [[EXIT]], label [[CONT15:%.*]] +; CHECK: cont15: +; CHECK-NEXT: [[EARLY16:%.*]] = icmp eq i64 [[IV]], [[LEN16]] +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: br i1 [[EARLY16]], label [[EXIT]], label [[CONT16:%.*]] +; CHECK: cont16: +; CHECK-NEXT: [[EARLY17:%.*]] = icmp eq i64 [[IV]], [[LEN17]] +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: br i1 [[EARLY17]], label [[EXIT]], label [[CONT17:%.*]] +; CHECK: cont17: +; CHECK-NEXT: [[EARLY18:%.*]] = icmp eq i64 [[IV]], [[LEN18]] +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: br i1 [[EARLY18]], label [[EXIT]], label [[CONT18:%.*]] +; CHECK: cont18: +; CHECK-NEXT: [[EARLY19:%.*]] = icmp eq i64 [[IV]], [[LEN19]] +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: br i1 [[EARLY19]], label [[EXIT]], label [[CONT19:%.*]] +; CHECK: cont19: +; CHECK-NEXT: [[EARLY20:%.*]] = icmp eq i64 [[IV]], [[LEN20]] +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: br i1 [[EARLY20]], label [[EXIT]], label [[CONT20:%.*]] +; CHECK: cont20: +; CHECK-NEXT: [[EARLY21:%.*]] = icmp eq i64 [[IV]], [[LEN21]] +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: br i1 [[EARLY21]], label [[EXIT]], label [[CONT21:%.*]] +; CHECK: cont21: +; CHECK-NEXT: [[EARLY22:%.*]] = icmp eq i64 [[IV]], [[LEN22]] +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: br i1 [[EARLY22]], label [[EXIT]], label [[CONT22:%.*]] +; CHECK: cont22: +; CHECK-NEXT: [[EARLY23:%.*]] = icmp eq i64 [[IV]], [[LEN23]] +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: br i1 [[EARLY23]], label [[EXIT]], label [[CONT23:%.*]] +; CHECK: cont23: +; CHECK-NEXT: [[EARLY24:%.*]] = icmp eq i64 [[IV]], [[LEN24]] +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: br i1 [[EARLY24]], label [[EXIT]], label [[CONT24:%.*]] +; CHECK: cont24: +; CHECK-NEXT: [[EARLY25:%.*]] = icmp eq i64 [[IV]], [[LEN25]] +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: br i1 [[EARLY25]], label [[EXIT]], label [[CONT25:%.*]] +; CHECK: cont25: +; CHECK-NEXT: [[EARLY26:%.*]] = icmp eq i64 [[IV]], [[LEN26]] +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: br i1 [[EARLY26]], label [[EXIT]], label [[CONT26:%.*]] +; CHECK: cont26: +; CHECK-NEXT: [[EARLY27:%.*]] = icmp eq i64 [[IV]], [[LEN27]] +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: br i1 [[EARLY27]], label [[EXIT]], label [[CONT27:%.*]] +; CHECK: cont27: +; CHECK-NEXT: [[EARLY28:%.*]] = icmp eq i64 [[IV]], [[LEN28]] +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: br i1 [[EARLY28]], label [[EXIT]], label [[CONT28:%.*]] +; CHECK: cont28: +; CHECK-NEXT: [[EARLY29:%.*]] = icmp eq i64 [[IV]], [[LEN29]] +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: br i1 [[EARLY29]], label [[EXIT]], label [[CONT29:%.*]] +; CHECK: cont29: +; CHECK-NEXT: [[EARLY30:%.*]] = icmp eq i64 [[IV]], [[LEN30]] +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: br i1 [[EARLY30]], label [[EXIT]], label [[CONT30:%.*]] +; CHECK: cont30: +; CHECK-NEXT: [[EARLY31:%.*]] = icmp eq i64 [[IV]], [[LEN31]] +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: br i1 [[EARLY31]], label [[EXIT]], label [[CONT31:%.*]] +; CHECK: cont31: +; CHECK-NEXT: [[EARLY32:%.*]] = icmp eq i64 [[IV]], [[LEN32]] +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: br i1 [[EARLY32]], label [[EXIT]], label [[CONT32:%.*]] +; CHECK: cont32: +; CHECK-NEXT: [[EARLY33:%.*]] = icmp eq i64 [[IV]], [[LEN33]] +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: br i1 [[EARLY33]], label [[EXIT]], label [[CONT33:%.*]] +; CHECK: cont33: +; CHECK-NEXT: [[EARLY34:%.*]] = icmp eq i64 [[IV]], [[LEN34]] +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: br i1 [[EARLY34]], label [[EXIT]], label [[CONT34:%.*]] +; CHECK: cont34: +; CHECK-NEXT: [[EARLY35:%.*]] = icmp eq i64 [[IV]], [[LEN35]] +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: br i1 [[EARLY35]], label [[EXIT]], label [[CONT35:%.*]] +; CHECK: cont35: +; CHECK-NEXT: [[EARLY36:%.*]] = icmp eq i64 [[IV]], [[LEN36]] +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: br i1 [[EARLY36]], label [[EXIT]], label [[CONT36:%.*]] +; CHECK: cont36: +; CHECK-NEXT: [[EARLY37:%.*]] = icmp eq i64 [[IV]], [[LEN37]] +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: br i1 [[EARLY37]], label [[EXIT]], label [[CONT37:%.*]] +; CHECK: cont37: +; CHECK-NEXT: [[EARLY38:%.*]] = icmp eq i64 [[IV]], [[LEN38]] +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: br i1 [[EARLY38]], label [[EXIT]], label [[CONT38:%.*]] +; CHECK: cont38: +; CHECK-NEXT: [[EARLY39:%.*]] = icmp eq i64 [[IV]], [[LEN39]] +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: br i1 [[EARLY39]], label [[EXIT]], label [[CONT39:%.*]] +; CHECK: cont39: +; CHECK-NEXT: br label [[BACKEDGE]] +; CHECK: backedge: +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i64 [[IV]], 999 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: br i1 [[CMP2]], label [[LOOP]], label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; entry: br label %loop loop: diff --git a/llvm/test/Transforms/IndVarSimplify/eliminate-sat.ll b/llvm/test/Transforms/IndVarSimplify/eliminate-sat.ll index 9fcfc7c9b349a..dc0e49efb091f 100644 --- a/llvm/test/Transforms/IndVarSimplify/eliminate-sat.ll +++ b/llvm/test/Transforms/IndVarSimplify/eliminate-sat.ll @@ -13,7 +13,7 @@ define void @uadd_sat(ptr %p) { ; CHECK: loop: ; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_INC:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[SAT1:%.*]] = add nuw nsw i32 [[I]], 1 -; CHECK-NEXT: store volatile i32 [[SAT1]], ptr [[P:%.*]] +; CHECK-NEXT: store volatile i32 [[SAT1]], ptr [[P:%.*]], align 4 ; CHECK-NEXT: [[I_INC]] = add nuw nsw i32 [[I]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[I_INC]], 100 ; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[END:%.*]] @@ -42,7 +42,7 @@ define void @sadd_sat(ptr %p) { ; CHECK: loop: ; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_INC:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[SAT1:%.*]] = add nuw nsw i32 [[I]], 1 -; CHECK-NEXT: store volatile i32 [[SAT1]], ptr [[P:%.*]] +; CHECK-NEXT: store volatile i32 [[SAT1]], ptr [[P:%.*]], align 4 ; CHECK-NEXT: [[I_INC]] = add nuw nsw i32 [[I]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[I_INC]], 100 ; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[END:%.*]] @@ -71,7 +71,7 @@ define void @usub_sat(ptr %p) { ; CHECK: loop: ; CHECK-NEXT: [[I:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[I_INC:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[SAT1:%.*]] = sub nuw nsw i32 [[I]], 1 -; CHECK-NEXT: store volatile i32 [[SAT1]], ptr [[P:%.*]] +; CHECK-NEXT: store volatile i32 [[SAT1]], ptr [[P:%.*]], align 4 ; CHECK-NEXT: [[I_INC]] = add nuw nsw i32 [[I]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[I_INC]], 100 ; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[END:%.*]] @@ -100,7 +100,7 @@ define void @ssub_sat(ptr %p) { ; CHECK: loop: ; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_INC:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[SAT1:%.*]] = sub nsw i32 [[I]], 1 -; CHECK-NEXT: store volatile i32 [[SAT1]], ptr [[P:%.*]] +; CHECK-NEXT: store volatile i32 [[SAT1]], ptr [[P:%.*]], align 4 ; CHECK-NEXT: [[I_INC]] = add nuw nsw i32 [[I]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[I_INC]], 100 ; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[END:%.*]] diff --git a/llvm/test/Transforms/IndVarSimplify/monotonic_checks.ll b/llvm/test/Transforms/IndVarSimplify/monotonic_checks.ll index a1c07b0a24638..1f8bf5fecb248 100644 --- a/llvm/test/Transforms/IndVarSimplify/monotonic_checks.ll +++ b/llvm/test/Transforms/IndVarSimplify/monotonic_checks.ll @@ -6,7 +6,7 @@ define i32 @test_01(ptr %p) { ; CHECK-LABEL: @test_01( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[LEN:%.*]] = load i32, ptr [[P:%.*]], align 4, [[RNG0:!range !.*]] +; CHECK-NEXT: [[LEN:%.*]] = load i32, ptr [[P:%.*]], align 4, !range [[RNG0:![0-9]+]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] @@ -45,7 +45,7 @@ exit: define i32 @test_01_neg(ptr %p) { ; CHECK-LABEL: @test_01_neg( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[LEN:%.*]] = load i32, ptr [[P:%.*]], align 4, [[RNG0]] +; CHECK-NEXT: [[LEN:%.*]] = load i32, ptr [[P:%.*]], align 4, !range [[RNG0]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] @@ -86,7 +86,7 @@ exit: define i32 @test_02(ptr %p) { ; CHECK-LABEL: @test_02( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[LEN:%.*]] = load i32, ptr [[P:%.*]], align 4, [[RNG1:!range !.*]] +; CHECK-NEXT: [[LEN:%.*]] = load i32, ptr [[P:%.*]], align 4, !range [[RNG1:![0-9]+]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] @@ -125,7 +125,7 @@ exit: define i32 @test_02_neg(ptr %p) { ; CHECK-LABEL: @test_02_neg( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[LEN:%.*]] = load i32, ptr [[P:%.*]], align 4, [[RNG1]] +; CHECK-NEXT: [[LEN:%.*]] = load i32, ptr [[P:%.*]], align 4, !range [[RNG1]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] @@ -164,7 +164,7 @@ exit: define i32 @test_03(ptr %p) { ; CHECK-LABEL: @test_03( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[LEN:%.*]] = load i32, ptr [[P:%.*]], align 4, [[RNG2:!range !.*]] +; CHECK-NEXT: [[LEN:%.*]] = load i32, ptr [[P:%.*]], align 4, !range [[RNG2:![0-9]+]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] @@ -202,7 +202,7 @@ exit: define i32 @test_04(ptr %p) { ; CHECK-LABEL: @test_04( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[LEN:%.*]] = load i32, ptr [[P:%.*]], align 4, [[RNG2]] +; CHECK-NEXT: [[LEN:%.*]] = load i32, ptr [[P:%.*]], align 4, !range [[RNG2]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] diff --git a/llvm/test/Transforms/LoopStrengthReduce/AArch64/prefer-all.ll b/llvm/test/Transforms/LoopStrengthReduce/AArch64/prefer-all.ll index 1944a9c800355..5fe72ea0d4fea 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/AArch64/prefer-all.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/AArch64/prefer-all.ll @@ -230,8 +230,6 @@ exit: ; The control-flow before and after the load of qval shouldn't prevent postindex ; addressing from happening. -; FIXME: We choose postindex addressing, but the scevgep is placed in for.inc so -; during codegen we will fail to actually generate a postindex load. define void @middle_block_load(ptr %p, ptr %q, i64 %n) { ; CHECK-LABEL: define void @middle_block_load( ; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i64 [[N:%.*]]) { @@ -254,6 +252,7 @@ define void @middle_block_load(ptr %p, ptr %q, i64 %n) { ; CHECK: [[IF_END]]: ; CHECK-NEXT: [[QVAL:%.*]] = load i32, ptr [[LSR_IV1]], align 4 ; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[QVAL]], 0 +; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV1]], i64 4 ; CHECK-NEXT: br i1 [[CMP2]], label %[[IF_THEN2:.*]], label %[[IF_ELSE2:.*]] ; CHECK: [[IF_THEN2]]: ; CHECK-NEXT: tail call void @otherfn1() @@ -263,7 +262,6 @@ define void @middle_block_load(ptr %p, ptr %q, i64 %n) { ; CHECK-NEXT: br label %[[FOR_INC]] ; CHECK: [[FOR_INC]]: ; CHECK-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -1 -; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV1]], i64 4 ; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0 ; CHECK-NEXT: br i1 [[CMP3]], label %[[EXIT:.*]], label %[[FOR_BODY]] ; CHECK: [[EXIT]]: diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll index de54852313456..7be3a94b90e46 100644 --- a/llvm/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll +++ b/llvm/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll @@ -265,8 +265,8 @@ define void @test4(i16 %c3) { ; CHECK-NEXT: br label [[EXITING_PROL:%.*]] ; CHECK: exiting.prol: ; CHECK-NEXT: switch i16 [[C3:%.*]], label [[DEFAULT_LOOPEXIT_LOOPEXIT1:%.*]] [ -; CHECK-NEXT: i16 45, label [[OTHEREXIT_LOOPEXIT2:%.*]] -; CHECK-NEXT: i16 95, label [[LATCH_PROL]] +; CHECK-NEXT: i16 45, label [[OTHEREXIT_LOOPEXIT2:%.*]] +; CHECK-NEXT: i16 95, label [[LATCH_PROL]] ; CHECK-NEXT: ] ; CHECK: latch.prol: ; CHECK-NEXT: [[INDVARS_IV_NEXT_PROL]] = add nuw nsw i64 [[INDVARS_IV_PROL]], 1 @@ -288,29 +288,29 @@ define void @test4(i16 %c3) { ; CHECK-NEXT: br label [[EXITING:%.*]] ; CHECK: exiting: ; CHECK-NEXT: switch i16 [[C3]], label [[DEFAULT_LOOPEXIT_LOOPEXIT:%.*]] [ -; CHECK-NEXT: i16 45, label [[OTHEREXIT_LOOPEXIT:%.*]] -; CHECK-NEXT: i16 95, label [[LATCH:%.*]] +; CHECK-NEXT: i16 45, label [[OTHEREXIT_LOOPEXIT:%.*]] +; CHECK-NEXT: i16 95, label [[LATCH:%.*]] ; CHECK-NEXT: ] ; CHECK: latch: ; CHECK-NEXT: br label [[EXITING_1:%.*]] ; CHECK: exiting.1: ; CHECK-NEXT: switch i16 [[C3]], label [[DEFAULT_LOOPEXIT_LOOPEXIT]] [ -; CHECK-NEXT: i16 45, label [[OTHEREXIT_LOOPEXIT]] -; CHECK-NEXT: i16 95, label [[LATCH_1:%.*]] +; CHECK-NEXT: i16 45, label [[OTHEREXIT_LOOPEXIT]] +; CHECK-NEXT: i16 95, label [[LATCH_1:%.*]] ; CHECK-NEXT: ] ; CHECK: latch.1: ; CHECK-NEXT: br label [[EXITING_2:%.*]] ; CHECK: exiting.2: ; CHECK-NEXT: switch i16 [[C3]], label [[DEFAULT_LOOPEXIT_LOOPEXIT]] [ -; CHECK-NEXT: i16 45, label [[OTHEREXIT_LOOPEXIT]] -; CHECK-NEXT: i16 95, label [[LATCH_2:%.*]] +; CHECK-NEXT: i16 45, label [[OTHEREXIT_LOOPEXIT]] +; CHECK-NEXT: i16 95, label [[LATCH_2:%.*]] ; CHECK-NEXT: ] ; CHECK: latch.2: ; CHECK-NEXT: br label [[EXITING_3:%.*]] ; CHECK: exiting.3: ; CHECK-NEXT: switch i16 [[C3]], label [[DEFAULT_LOOPEXIT_LOOPEXIT]] [ -; CHECK-NEXT: i16 45, label [[OTHEREXIT_LOOPEXIT]] -; CHECK-NEXT: i16 95, label [[LATCH_3]] +; CHECK-NEXT: i16 45, label [[OTHEREXIT_LOOPEXIT]] +; CHECK-NEXT: i16 95, label [[LATCH_3]] ; CHECK-NEXT: ] ; CHECK: latch.3: ; CHECK-NEXT: [[INDVARS_IV_NEXT_3]] = add nuw nsw i64 [[INDVARS_IV]], 4 diff --git a/llvm/test/Transforms/LoopUnrollAndJam/unroll-and-jam.ll b/llvm/test/Transforms/LoopUnrollAndJam/unroll-and-jam.ll index 9ee51cfbcb590..a3d2fcb5ab946 100644 --- a/llvm/test/Transforms/LoopUnrollAndJam/unroll-and-jam.ll +++ b/llvm/test/Transforms/LoopUnrollAndJam/unroll-and-jam.ll @@ -638,18 +638,18 @@ define i32 @test6() #0 { ; CHECK: [[FOR_LATCH]]: ; CHECK-NEXT: br i1 false, label %[[FOR_OUTER]], label %[[FOR_END_UNR_LCSSA:.*]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: [[FOR_END_UNR_LCSSA]]: -; CHECK-NEXT: [[DOTLCSSA_LCSSA_PH_PH:%.*]] = phi i32 [ 2, %[[FOR_LATCH]] ] -; CHECK-NEXT: [[INC_LCSSA_LCSSA_PH_PH:%.*]] = phi i32 [ 7, %[[FOR_LATCH]] ] -; CHECK-NEXT: [[P0_UNR_PH:%.*]] = phi i32 [ 2, %[[FOR_LATCH]] ] +; CHECK-NEXT: [[DOTLCSSA_LCSSA_PH:%.*]] = phi i32 [ 2, %[[FOR_LATCH]] ] +; CHECK-NEXT: [[INC_LCSSA_LCSSA_PH:%.*]] = phi i32 [ 7, %[[FOR_LATCH]] ] +; CHECK-NEXT: [[P0_UNR:%.*]] = phi i32 [ 2, %[[FOR_LATCH]] ] ; CHECK-NEXT: br i1 true, label %[[FOR_OUTER_EPIL_PREHEADER]], label %[[FOR_END:.*]] ; CHECK: [[FOR_OUTER_EPIL_PREHEADER]]: -; CHECK-NEXT: [[P0_UNR:%.*]] = phi i32 [ [[F_PROMOTED10]], %[[ENTRY]] ], [ [[P0_UNR_PH]], %[[FOR_END_UNR_LCSSA]] ] +; CHECK-NEXT: [[P0_EPIL_INIT:%.*]] = phi i32 [ [[F_PROMOTED10]], %[[ENTRY]] ], [ [[P0_UNR]], %[[FOR_END_UNR_LCSSA]] ] ; CHECK-NEXT: call void @llvm.assume(i1 true) ; CHECK-NEXT: br label %[[FOR_OUTER_EPIL:.*]] ; CHECK: [[FOR_OUTER_EPIL]]: ; CHECK-NEXT: br label %[[FOR_INNER_EPIL:.*]] ; CHECK: [[FOR_INNER_EPIL]]: -; CHECK-NEXT: [[P1_EPIL:%.*]] = phi i32 [ [[P0_UNR]], %[[FOR_OUTER_EPIL]] ], [ 2, %[[FOR_INNER_EPIL]] ] +; CHECK-NEXT: [[P1_EPIL:%.*]] = phi i32 [ [[P0_EPIL_INIT]], %[[FOR_OUTER_EPIL]] ], [ 2, %[[FOR_INNER_EPIL]] ] ; CHECK-NEXT: [[INC_SINK8_EPIL:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL]] ], [ [[INC_EPIL:%.*]], %[[FOR_INNER_EPIL]] ] ; CHECK-NEXT: [[INC_EPIL]] = add nuw nsw i32 [[INC_SINK8_EPIL]], 1 ; CHECK-NEXT: [[EXITCOND_EPIL:%.*]] = icmp ne i32 [[INC_EPIL]], 7 @@ -658,8 +658,8 @@ define i32 @test6() #0 { ; CHECK-NEXT: [[DOTLCSSA_EPIL:%.*]] = phi i32 [ [[P1_EPIL]], %[[FOR_INNER_EPIL]] ] ; CHECK-NEXT: br label %[[FOR_END]] ; CHECK: [[FOR_END]]: -; CHECK-NEXT: [[DOTLCSSA_LCSSA:%.*]] = phi i32 [ [[DOTLCSSA_LCSSA_PH_PH]], %[[FOR_END_UNR_LCSSA]] ], [ [[DOTLCSSA_EPIL]], %[[FOR_LATCH_EPIL]] ] -; CHECK-NEXT: [[INC_LCSSA_LCSSA:%.*]] = phi i32 [ [[INC_LCSSA_LCSSA_PH_PH]], %[[FOR_END_UNR_LCSSA]] ], [ 7, %[[FOR_LATCH_EPIL]] ] +; CHECK-NEXT: [[DOTLCSSA_LCSSA:%.*]] = phi i32 [ [[DOTLCSSA_LCSSA_PH]], %[[FOR_END_UNR_LCSSA]] ], [ [[DOTLCSSA_EPIL]], %[[FOR_LATCH_EPIL]] ] +; CHECK-NEXT: [[INC_LCSSA_LCSSA:%.*]] = phi i32 [ [[INC_LCSSA_LCSSA_PH]], %[[FOR_END_UNR_LCSSA]] ], [ 7, %[[FOR_LATCH_EPIL]] ] ; CHECK-NEXT: ret i32 0 ; entry: @@ -1324,9 +1324,9 @@ define signext i16 @test10(i32 %k) #0 { ; CHECK-NEXT: [[STOREMERGE_4_LCSSA_3:%.*]] = phi i64 [ [[STOREMERGE_4_3:%.*]], %[[FOR_INC21_3]] ] ; CHECK-NEXT: br i1 false, label %[[FOR_BODY]], label %[[FOR_END26_UNR_LCSSA:.*]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: [[FOR_END26_UNR_LCSSA]]: -; CHECK-NEXT: [[DEC_LCSSA_LCSSA_PH_PH:%.*]] = phi i64 [ 0, %[[FOR_INC24]] ] -; CHECK-NEXT: [[STOREMERGE_4_LCSSA_LCSSA_PH_PH:%.*]] = phi i64 [ [[STOREMERGE_4_LCSSA_3]], %[[FOR_INC24]] ] -; CHECK-NEXT: [[STOREMERGE_5_LCSSA_LCSSA_PH_PH:%.*]] = phi i32 [ 0, %[[FOR_INC24]] ] +; CHECK-NEXT: [[DEC_LCSSA_LCSSA_PH:%.*]] = phi i64 [ 0, %[[FOR_INC24]] ] +; CHECK-NEXT: [[STOREMERGE_4_LCSSA_LCSSA_PH:%.*]] = phi i64 [ [[STOREMERGE_4_LCSSA_3]], %[[FOR_INC24]] ] +; CHECK-NEXT: [[STOREMERGE_5_LCSSA_LCSSA_PH:%.*]] = phi i32 [ 0, %[[FOR_INC24]] ] ; CHECK-NEXT: br i1 true, label %[[FOR_BODY_EPIL_PREHEADER]], label %[[FOR_END26:.*]] ; CHECK: [[FOR_BODY_EPIL_PREHEADER]]: ; CHECK-NEXT: call void @llvm.assume(i1 true) @@ -1353,9 +1353,9 @@ define signext i16 @test10(i32 %k) #0 { ; CHECK-NEXT: [[STOREMERGE_4_LCSSA_EPIL:%.*]] = phi i64 [ [[STOREMERGE_4_EPIL]], %[[FOR_INC21_EPIL]] ] ; CHECK-NEXT: br label %[[FOR_END26]] ; CHECK: [[FOR_END26]]: -; CHECK-NEXT: [[DEC_LCSSA_LCSSA:%.*]] = phi i64 [ [[DEC_LCSSA_LCSSA_PH_PH]], %[[FOR_END26_UNR_LCSSA]] ], [ 0, %[[FOR_INC24_EPIL]] ] -; CHECK-NEXT: [[STOREMERGE_4_LCSSA_LCSSA:%.*]] = phi i64 [ [[STOREMERGE_4_LCSSA_LCSSA_PH_PH]], %[[FOR_END26_UNR_LCSSA]] ], [ [[STOREMERGE_4_LCSSA_EPIL]], %[[FOR_INC24_EPIL]] ] -; CHECK-NEXT: [[STOREMERGE_5_LCSSA_LCSSA:%.*]] = phi i32 [ [[STOREMERGE_5_LCSSA_LCSSA_PH_PH]], %[[FOR_END26_UNR_LCSSA]] ], [ 0, %[[FOR_INC24_EPIL]] ] +; CHECK-NEXT: [[DEC_LCSSA_LCSSA:%.*]] = phi i64 [ [[DEC_LCSSA_LCSSA_PH]], %[[FOR_END26_UNR_LCSSA]] ], [ 0, %[[FOR_INC24_EPIL]] ] +; CHECK-NEXT: [[STOREMERGE_4_LCSSA_LCSSA:%.*]] = phi i64 [ [[STOREMERGE_4_LCSSA_LCSSA_PH]], %[[FOR_END26_UNR_LCSSA]] ], [ [[STOREMERGE_4_LCSSA_EPIL]], %[[FOR_INC24_EPIL]] ] +; CHECK-NEXT: [[STOREMERGE_5_LCSSA_LCSSA:%.*]] = phi i32 [ [[STOREMERGE_5_LCSSA_LCSSA_PH]], %[[FOR_END26_UNR_LCSSA]] ], [ 0, %[[FOR_INC24_EPIL]] ] ; CHECK-NEXT: store i64 [[DEC_LCSSA_LCSSA]], ptr @g, align 8 ; CHECK-NEXT: ret i16 0 ; CHECK: [[FOR_BODY2_SPLIT2_1]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll index 21b21774d18cf..91c65ba8f6267 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll @@ -380,7 +380,7 @@ for.end: ret void } -define void @loop_with_freeze_and_conditional_srem(ptr %dst, ptr %keyinfo, ptr %invariant.ptr, i32 %divisor) #1 { +define void @loop_with_freeze_and_conditional_srem(ptr %dst, ptr %keyinfo, ptr %invariant.ptr, i32 %divisor) { ; COMMON-LABEL: define void @loop_with_freeze_and_conditional_srem( ; COMMON-SAME: ptr [[DST:%.*]], ptr [[KEYINFO:%.*]], ptr [[INVARIANT_PTR:%.*]], i32 [[DIVISOR:%.*]]) { ; COMMON-NEXT: [[ENTRY:.*]]: @@ -433,7 +433,165 @@ exit: ; preds = %loop.latch ret void } +define void @interleave_group(ptr %dst) #1 { +; COST1-LABEL: define void @interleave_group( +; COST1-SAME: ptr [[DST:%.*]]) #[[ATTR1:[0-9]+]] { +; COST1-NEXT: [[ITER_CHECK:.*:]] +; COST1-NEXT: br i1 false, label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] +; COST1: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: +; COST1-NEXT: br i1 false, label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] +; COST1: [[VECTOR_PH]]: +; COST1-NEXT: br label %[[VECTOR_BODY:.*]] +; COST1: [[VECTOR_BODY]]: +; COST1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; COST1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 16 +; COST1-NEXT: [[TMP1:%.*]] = mul i64 [[INDEX]], 3 +; COST1-NEXT: [[TMP2:%.*]] = mul i64 [[TMP0]], 3 +; COST1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP1]] +; COST1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP2]] +; COST1-NEXT: store <48 x i8> zeroinitializer, ptr [[TMP3]], align 1 +; COST1-NEXT: store <48 x i8> zeroinitializer, ptr [[TMP4]], align 1 +; COST1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 +; COST1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96 +; COST1-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; COST1: [[MIDDLE_BLOCK]]: +; COST1-NEXT: br i1 false, [[EXIT:label %.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] +; COST1: [[VEC_EPILOG_ITER_CHECK]]: +; COST1-NEXT: br i1 false, label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF4]] +; COST1: [[VEC_EPILOG_PH]]: +; COST1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 96, %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; COST1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[BC_RESUME_VAL]], i64 0 +; COST1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; COST1-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], +; COST1-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] +; COST1: [[VEC_EPILOG_VECTOR_BODY]]: +; COST1-NEXT: [[INDEX1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT2:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] +; COST1-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] +; COST1-NEXT: [[TMP6:%.*]] = mul <4 x i64> [[VEC_IND]], splat (i64 3) +; COST1-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0 +; COST1-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP6]], i32 1 +; COST1-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP6]], i32 2 +; COST1-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP6]], i32 3 +; COST1-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP7]] +; COST1-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP8]] +; COST1-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP9]] +; COST1-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP10]] +; COST1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[TMP11]], i64 2 +; COST1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[TMP12]], i64 2 +; COST1-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP13]], i64 2 +; COST1-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[TMP14]], i64 2 +; COST1-NEXT: store i8 0, ptr [[TMP15]], align 1 +; COST1-NEXT: store i8 0, ptr [[TMP16]], align 1 +; COST1-NEXT: store i8 0, ptr [[TMP17]], align 1 +; COST1-NEXT: store i8 0, ptr [[TMP18]], align 1 +; COST1-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[TMP11]], i64 1 +; COST1-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[TMP12]], i64 1 +; COST1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP13]], i64 1 +; COST1-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[TMP14]], i64 1 +; COST1-NEXT: store i8 0, ptr [[TMP19]], align 1 +; COST1-NEXT: store i8 0, ptr [[TMP20]], align 1 +; COST1-NEXT: store i8 0, ptr [[TMP21]], align 1 +; COST1-NEXT: store i8 0, ptr [[TMP22]], align 1 +; COST1-NEXT: store i8 0, ptr [[TMP11]], align 1 +; COST1-NEXT: store i8 0, ptr [[TMP12]], align 1 +; COST1-NEXT: store i8 0, ptr [[TMP13]], align 1 +; COST1-NEXT: store i8 0, ptr [[TMP14]], align 1 +; COST1-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 4 +; COST1-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; COST1-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 100 +; COST1-NEXT: br i1 [[TMP23]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; COST1: [[VEC_EPILOG_MIDDLE_BLOCK]]: +; COST1-NEXT: br i1 false, [[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]] +; COST1: [[VEC_EPILOG_SCALAR_PH]]: +; +; COST10-LABEL: define void @interleave_group( +; COST10-SAME: ptr [[DST:%.*]]) #[[ATTR1:[0-9]+]] { +; COST10-NEXT: [[ITER_CHECK:.*:]] +; COST10-NEXT: br i1 false, label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] +; COST10: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: +; COST10-NEXT: br i1 false, label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] +; COST10: [[VECTOR_PH]]: +; COST10-NEXT: br label %[[VECTOR_BODY:.*]] +; COST10: [[VECTOR_BODY]]: +; COST10-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; COST10-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3 +; COST10-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP0]] +; COST10-NEXT: store <48 x i8> zeroinitializer, ptr [[TMP1]], align 1 +; COST10-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; COST10-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96 +; COST10-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; COST10: [[MIDDLE_BLOCK]]: +; COST10-NEXT: br i1 false, [[EXIT:label %.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] +; COST10: [[VEC_EPILOG_ITER_CHECK]]: +; COST10-NEXT: br i1 false, label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF4]] +; COST10: [[VEC_EPILOG_PH]]: +; COST10-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 96, %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; COST10-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[BC_RESUME_VAL]], i64 0 +; COST10-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; COST10-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], +; COST10-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] +; COST10: [[VEC_EPILOG_VECTOR_BODY]]: +; COST10-NEXT: [[INDEX1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT2:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] +; COST10-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] +; COST10-NEXT: [[TMP3:%.*]] = mul <4 x i64> [[VEC_IND]], splat (i64 3) +; COST10-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0 +; COST10-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1 +; COST10-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2 +; COST10-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3 +; COST10-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP4]] +; COST10-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP5]] +; COST10-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP6]] +; COST10-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP7]] +; COST10-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[TMP8]], i64 2 +; COST10-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[TMP9]], i64 2 +; COST10-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[TMP10]], i64 2 +; COST10-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[TMP11]], i64 2 +; COST10-NEXT: store i8 0, ptr [[TMP12]], align 1 +; COST10-NEXT: store i8 0, ptr [[TMP13]], align 1 +; COST10-NEXT: store i8 0, ptr [[TMP14]], align 1 +; COST10-NEXT: store i8 0, ptr [[TMP15]], align 1 +; COST10-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[TMP8]], i64 1 +; COST10-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP9]], i64 1 +; COST10-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[TMP10]], i64 1 +; COST10-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[TMP11]], i64 1 +; COST10-NEXT: store i8 0, ptr [[TMP16]], align 1 +; COST10-NEXT: store i8 0, ptr [[TMP17]], align 1 +; COST10-NEXT: store i8 0, ptr [[TMP18]], align 1 +; COST10-NEXT: store i8 0, ptr [[TMP19]], align 1 +; COST10-NEXT: store i8 0, ptr [[TMP8]], align 1 +; COST10-NEXT: store i8 0, ptr [[TMP9]], align 1 +; COST10-NEXT: store i8 0, ptr [[TMP10]], align 1 +; COST10-NEXT: store i8 0, ptr [[TMP11]], align 1 +; COST10-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 4 +; COST10-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; COST10-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 100 +; COST10-NEXT: br i1 [[TMP20]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; COST10: [[VEC_EPILOG_MIDDLE_BLOCK]]: +; COST10-NEXT: br i1 false, [[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]] +; COST10: [[VEC_EPILOG_SCALAR_PH]]: +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %iv.3 = mul i64 %iv, 3 + %gep.0 = getelementptr i8, ptr %dst, i64 %iv.3 + %gep.2 = getelementptr i8, ptr %gep.0, i64 2 + store i8 0, ptr %gep.2, align 1 + %gep.1 = getelementptr i8, ptr %gep.0, i64 1 + store i8 0, ptr %gep.1, align 1 + store i8 0, ptr %gep.0, align 1 + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv, 100 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + attributes #0 = { "target-features"="+neon,+sve" vscale_range(1,16) } +attributes #1 = { "target-cpu"="neoverse-512tvb" } declare void @llvm.assume(i1 noundef) declare i64 @llvm.umin.i64(i64, i64) diff --git a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll index ae772da8862b3..cdbe9bb555834 100644 --- a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll +++ b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll @@ -755,7 +755,7 @@ define void @sink_multiple_store_groups_noalias_via_scev(ptr %dst, ptr %src) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 12688 +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 12696 ; CHECK-NEXT: [[SCEVGEP8:%.*]] = getelementptr i8, ptr [[SRC]], i64 12828 ; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP8]] ; CHECK-NEXT: [[BOUND2:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP]] @@ -764,88 +764,59 @@ define void @sink_multiple_store_groups_noalias_via_scev(ptr %dst, ptr %src) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE9:.*]] ] +; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE7:.*]] ] ; CHECK-NEXT: [[INDEX:%.*]] = mul i64 [[INDEX1]], 16 ; CHECK-NEXT: [[IV:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 16 ; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr double, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr double, ptr [[SRC]], i64 [[TMP17]] -; CHECK-NEXT: [[TMP23:%.*]] = insertelement <2 x ptr> poison, ptr [[GEP_SRC]], i32 0 -; CHECK-NEXT: [[TMP24:%.*]] = insertelement <2 x ptr> [[TMP23]], ptr [[TMP22]], i32 1 ; CHECK-NEXT: [[GEP_FLAG:%.*]] = getelementptr i8, ptr [[GEP_SRC]], i64 152 ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[TMP22]], i64 152 ; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[GEP_FLAG]], align 4, !alias.scope [[META78:![0-9]+]] ; CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP26]], align 4, !alias.scope [[META78]] ; CHECK-NEXT: [[TMP29:%.*]] = insertelement <2 x i32> poison, i32 [[TMP27]], i32 0 ; CHECK-NEXT: [[TMP30:%.*]] = insertelement <2 x i32> [[TMP29]], i32 [[TMP28]], i32 1 -; CHECK-NEXT: [[TMP31:%.*]] = icmp eq <2 x i32> [[TMP30]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq <2 x i32> [[TMP30]], zeroinitializer ; CHECK-NEXT: [[TMP13:%.*]] = load double, ptr [[GEP_SRC]], align 8, !alias.scope [[META78]] ; CHECK-NEXT: [[TMP14:%.*]] = load double, ptr [[TMP22]], align 8, !alias.scope [[META78]] ; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x double> poison, double [[TMP13]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = insertelement <2 x double> [[TMP15]], double [[TMP14]], i32 1 -; CHECK-NEXT: [[TMP33:%.*]] = xor <2 x i1> [[TMP31]], splat (i1 true) +; CHECK-NEXT: [[TMP16:%.*]] = xor <2 x i1> [[TMP10]], splat (i1 true) ; CHECK-NEXT: [[TMP34:%.*]] = fadd <2 x double> [[WIDE_LOAD]], splat (double 8.000000e+00) -; CHECK-NEXT: [[GEP_DST1_ELSE:%.*]] = getelementptr double, ptr [[DST]], i64 [[IV]] -; CHECK-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP17]] -; CHECK-NEXT: [[TMP38:%.*]] = insertelement <2 x ptr> poison, ptr [[GEP_DST1_ELSE]], i32 0 -; CHECK-NEXT: [[TMP39:%.*]] = insertelement <2 x ptr> [[TMP38]], ptr [[TMP37]], i32 1 -; CHECK-NEXT: [[TMP40:%.*]] = extractelement <2 x i1> [[TMP33]], i32 0 -; CHECK-NEXT: br i1 [[TMP40]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] -; CHECK: [[PRED_LOAD_IF]]: -; CHECK-NEXT: [[TMP41:%.*]] = extractelement <2 x double> [[TMP34]], i32 0 -; CHECK-NEXT: store double [[TMP41]], ptr [[GEP_DST1_ELSE]], align 8, !alias.scope [[META81:![0-9]+]], !noalias [[META78]] -; CHECK-NEXT: [[GEP_SRC_16:%.*]] = getelementptr i8, ptr [[GEP_SRC]], i64 16 -; CHECK-NEXT: [[TMP43:%.*]] = load double, ptr [[GEP_SRC_16]], align 8, !alias.scope [[META78]] -; CHECK-NEXT: [[TMP44:%.*]] = insertelement <2 x double> poison, double [[TMP43]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] -; CHECK: [[PRED_LOAD_CONTINUE]]: -; CHECK-NEXT: [[TMP45:%.*]] = phi <2 x double> [ poison, %[[VECTOR_BODY]] ], [ [[TMP44]], %[[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP46:%.*]] = extractelement <2 x i1> [[TMP33]], i32 1 -; CHECK-NEXT: br i1 [[TMP46]], label %[[PRED_LOAD_IF2:.*]], label %[[PRED_LOAD_CONTINUE3:.*]] -; CHECK: [[PRED_LOAD_IF2]]: -; CHECK-NEXT: [[TMP47:%.*]] = extractelement <2 x double> [[TMP34]], i32 1 -; CHECK-NEXT: store double [[TMP47]], ptr [[TMP37]], align 8, !alias.scope [[META81]], !noalias [[META78]] -; CHECK-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr [[TMP22]], i64 16 -; CHECK-NEXT: [[TMP49:%.*]] = load double, ptr [[TMP48]], align 8, !alias.scope [[META78]] -; CHECK-NEXT: [[TMP50:%.*]] = insertelement <2 x double> [[TMP45]], double [[TMP49]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE3]] -; CHECK: [[PRED_LOAD_CONTINUE3]]: -; CHECK-NEXT: [[TMP51:%.*]] = phi <2 x double> [ [[TMP45]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP50]], %[[PRED_LOAD_IF2]] ] -; CHECK-NEXT: [[TMP53:%.*]] = fmul <2 x double> splat (double 2.000000e+01), [[TMP51]] -; CHECK-NEXT: [[TMP54:%.*]] = extractelement <2 x i1> [[TMP33]], i32 0 -; CHECK-NEXT: br i1 [[TMP54]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <2 x i1> [[TMP16]], i32 0 +; CHECK-NEXT: br i1 [[TMP24]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: -; CHECK-NEXT: [[GEP_DST2_ELSE:%.*]] = getelementptr i8, ptr [[GEP_DST1_ELSE]], i64 8 -; CHECK-NEXT: [[TMP56:%.*]] = extractelement <2 x double> [[TMP53]], i32 0 -; CHECK-NEXT: store double [[TMP56]], ptr [[GEP_DST2_ELSE]], align 8, !alias.scope [[META81]], !noalias [[META78]] +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr double, ptr [[DST]], i64 [[IV]] +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x double> [[TMP34]], i32 0 +; CHECK-NEXT: store double [[TMP19]], ptr [[TMP18]], align 8, !alias.scope [[META81:![0-9]+]], !noalias [[META78]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: -; CHECK-NEXT: [[TMP57:%.*]] = extractelement <2 x i1> [[TMP33]], i32 1 -; CHECK-NEXT: br i1 [[TMP57]], label %[[PRED_STORE_IF4:.*]], label %[[PRED_STORE_CONTINUE5:.*]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x i1> [[TMP16]], i32 1 +; CHECK-NEXT: br i1 [[TMP20]], label %[[PRED_STORE_IF2:.*]], label %[[PRED_STORE_CONTINUE3:.*]] +; CHECK: [[PRED_STORE_IF2]]: +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP17]] +; CHECK-NEXT: [[TMP33:%.*]] = extractelement <2 x double> [[TMP34]], i32 1 +; CHECK-NEXT: store double [[TMP33]], ptr [[TMP21]], align 8, !alias.scope [[META81]], !noalias [[META78]] +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE3]] +; CHECK: [[PRED_STORE_CONTINUE3]]: +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0 +; CHECK-NEXT: br i1 [[TMP23]], label %[[PRED_STORE_IF4:.*]], label %[[PRED_STORE_CONTINUE5:.*]] ; CHECK: [[PRED_STORE_IF4]]: -; CHECK-NEXT: [[TMP58:%.*]] = getelementptr i8, ptr [[TMP37]], i64 8 -; CHECK-NEXT: [[TMP59:%.*]] = extractelement <2 x double> [[TMP53]], i32 1 -; CHECK-NEXT: store double [[TMP59]], ptr [[TMP58]], align 8, !alias.scope [[META81]], !noalias [[META78]] +; CHECK-NEXT: [[TMP31:%.*]] = getelementptr double, ptr [[DST]], i64 [[IV]] +; CHECK-NEXT: store double [[TMP13]], ptr [[TMP31]], align 8, !alias.scope [[META81]], !noalias [[META78]] +; CHECK-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr [[TMP31]], i64 16 +; CHECK-NEXT: store double 1.000000e+01, ptr [[TMP37]], align 8, !alias.scope [[META81]], !noalias [[META78]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE5]] ; CHECK: [[PRED_STORE_CONTINUE5]]: -; CHECK-NEXT: [[TMP60:%.*]] = extractelement <2 x i1> [[TMP31]], i32 0 -; CHECK-NEXT: br i1 [[TMP60]], label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7:.*]] +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i1> [[TMP10]], i32 1 +; CHECK-NEXT: br i1 [[TMP25]], label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7]] ; CHECK: [[PRED_STORE_IF6]]: -; CHECK-NEXT: [[TMP62:%.*]] = getelementptr double, ptr [[DST]], i64 [[IV]] -; CHECK-NEXT: store double [[TMP13]], ptr [[TMP62]], align 8, !alias.scope [[META81]], !noalias [[META78]] -; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i8, ptr [[TMP62]], i64 8 -; CHECK-NEXT: store double 1.000000e+01, ptr [[TMP64]], align 8, !alias.scope [[META81]], !noalias [[META78]] +; CHECK-NEXT: [[TMP32:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP17]] +; CHECK-NEXT: store double [[TMP14]], ptr [[TMP32]], align 8, !alias.scope [[META81]], !noalias [[META78]] +; CHECK-NEXT: [[TMP47:%.*]] = getelementptr i8, ptr [[TMP32]], i64 16 +; CHECK-NEXT: store double 1.000000e+01, ptr [[TMP47]], align 8, !alias.scope [[META81]], !noalias [[META78]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE7]] ; CHECK: [[PRED_STORE_CONTINUE7]]: -; CHECK-NEXT: [[TMP66:%.*]] = extractelement <2 x i1> [[TMP31]], i32 1 -; CHECK-NEXT: br i1 [[TMP66]], label %[[PRED_STORE_IF8:.*]], label %[[PRED_STORE_CONTINUE9]] -; CHECK: [[PRED_STORE_IF8]]: -; CHECK-NEXT: [[TMP68:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP17]] -; CHECK-NEXT: store double [[TMP14]], ptr [[TMP68]], align 8, !alias.scope [[META81]], !noalias [[META78]] -; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i8, ptr [[TMP68]], i64 8 -; CHECK-NEXT: store double 1.000000e+01, ptr [[TMP70]], align 8, !alias.scope [[META81]], !noalias [[META78]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE9]] -; CHECK: [[PRED_STORE_CONTINUE9]]: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 2 ; CHECK-NEXT: [[TMP52:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP52]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP83:![0-9]+]] @@ -865,6 +836,117 @@ loop: %v.1 = load double, ptr %gep.src, align 8 br i1 %cmp, label %then, label %else +then: + %gep.dst1.then = getelementptr double, ptr %dst, i64 %iv + store double %v.1, ptr %gep.dst1.then, align 8 + %gep.dst2.then = getelementptr i8, ptr %gep.dst1.then, i64 16 + store double 10.0, ptr %gep.dst2.then, align 8 + br label %loop.latch + +else: + %r.1 = fadd double %v.1, 8.0 + %gep.dst1.else = getelementptr double, ptr %dst, i64 %iv + store double %r.1, ptr %gep.dst1.else, align 8 + br label %loop.latch + +loop.latch: + %iv.next = add i64 %iv, 16 + %exit.cond = icmp eq i64 %iv.next, 1600 + br i1 %exit.cond, label %exit, label %loop + +exit: + ret void +} + +; Same as @sink_multiple_store_groups_noalias_via_scev, but the offset between +; store groups is only 8, which means the alias across VFs. +define void @sink_multiple_store_groups_alias_via_scev(ptr %dst, ptr %src) { +; CHECK-LABEL: define void @sink_multiple_store_groups_alias_via_scev( +; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br label %[[VECTOR_MEMCHECK:.*]] +; CHECK: [[VECTOR_MEMCHECK]]: +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 12688 +; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[SRC]], i64 12828 +; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP1]] +; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP]] +; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE7:.*]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 16 +; CHECK-NEXT: [[IV:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 16 +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr double, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr double, ptr [[SRC]], i64 [[TMP1]] +; CHECK-NEXT: [[GEP_FLAG:%.*]] = getelementptr i8, ptr [[GEP_SRC]], i64 152 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP3]], i64 152 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[GEP_FLAG]], align 4, !alias.scope [[META85:![0-9]+]] +; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4, !alias.scope [[META85]] +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i32> poison, i32 [[TMP8]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i32> [[TMP10]], i32 [[TMP9]], i32 1 +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq <2 x i32> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = load double, ptr [[GEP_SRC]], align 8, !alias.scope [[META85]] +; CHECK-NEXT: [[TMP14:%.*]] = load double, ptr [[TMP3]], align 8, !alias.scope [[META85]] +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x double> poison, double [[TMP13]], i32 0 +; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x double> [[TMP15]], double [[TMP14]], i32 1 +; CHECK-NEXT: [[TMP17:%.*]] = xor <2 x i1> [[TMP12]], splat (i1 true) +; CHECK-NEXT: [[TMP18:%.*]] = fadd <2 x double> [[TMP16]], splat (double 8.000000e+00) +; CHECK-NEXT: [[TMP36:%.*]] = extractelement <2 x i1> [[TMP17]], i32 0 +; CHECK-NEXT: br i1 [[TMP36]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] +; CHECK: [[PRED_STORE_IF]]: +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr double, ptr [[DST]], i64 [[IV]] +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x double> [[TMP18]], i32 0 +; CHECK-NEXT: store double [[TMP19]], ptr [[TMP20]], align 8, !alias.scope [[META88:![0-9]+]], !noalias [[META85]] +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] +; CHECK: [[PRED_STORE_CONTINUE]]: +; CHECK-NEXT: [[TMP39:%.*]] = extractelement <2 x i1> [[TMP17]], i32 1 +; CHECK-NEXT: br i1 [[TMP39]], label %[[PRED_STORE_IF2:.*]], label %[[PRED_STORE_CONTINUE3:.*]] +; CHECK: [[PRED_STORE_IF2]]: +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x double> [[TMP18]], i32 1 +; CHECK-NEXT: store double [[TMP22]], ptr [[TMP21]], align 8, !alias.scope [[META88]], !noalias [[META85]] +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE3]] +; CHECK: [[PRED_STORE_CONTINUE3]]: +; CHECK-NEXT: [[TMP42:%.*]] = extractelement <2 x i1> [[TMP12]], i32 0 +; CHECK-NEXT: br i1 [[TMP42]], label %[[PRED_STORE_IF4:.*]], label %[[PRED_STORE_CONTINUE5:.*]] +; CHECK: [[PRED_STORE_IF4]]: +; CHECK-NEXT: [[TMP43:%.*]] = getelementptr double, ptr [[DST]], i64 [[IV]] +; CHECK-NEXT: store double [[TMP13]], ptr [[TMP43]], align 8, !alias.scope [[META88]], !noalias [[META85]] +; CHECK-NEXT: [[TMP44:%.*]] = getelementptr i8, ptr [[TMP43]], i64 8 +; CHECK-NEXT: store double 1.000000e+01, ptr [[TMP44]], align 8, !alias.scope [[META88]], !noalias [[META85]] +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE5]] +; CHECK: [[PRED_STORE_CONTINUE5]]: +; CHECK-NEXT: [[TMP45:%.*]] = extractelement <2 x i1> [[TMP12]], i32 1 +; CHECK-NEXT: br i1 [[TMP45]], label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7]] +; CHECK: [[PRED_STORE_IF6]]: +; CHECK-NEXT: [[TMP46:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP1]] +; CHECK-NEXT: store double [[TMP14]], ptr [[TMP46]], align 8, !alias.scope [[META88]], !noalias [[META85]] +; CHECK-NEXT: [[TMP47:%.*]] = getelementptr i8, ptr [[TMP46]], i64 8 +; CHECK-NEXT: store double 1.000000e+01, ptr [[TMP47]], align 8, !alias.scope [[META88]], !noalias [[META85]] +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE7]] +; CHECK: [[PRED_STORE_CONTINUE7]]: +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP48:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; CHECK-NEXT: br i1 [[TMP48]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP90:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br [[EXIT:label %.*]] +; CHECK: [[SCALAR_PH]]: +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] + %gep.src = getelementptr double, ptr %src, i64 %iv + %gep.flag = getelementptr i8, ptr %gep.src, i64 152 + %c = load i32, ptr %gep.flag, align 4 + %cmp = icmp eq i32 %c, 0 + %v.1 = load double, ptr %gep.src, align 8 + br i1 %cmp, label %then, label %else + then: %gep.dst1.then = getelementptr double, ptr %dst, i64 %iv store double %v.1, ptr %gep.dst1.then, align 8 @@ -876,11 +958,6 @@ else: %r.1 = fadd double %v.1, 8.0 %gep.dst1.else = getelementptr double, ptr %dst, i64 %iv store double %r.1, ptr %gep.dst1.else, align 8 - %gep.src.16 = getelementptr i8, ptr %gep.src, i64 16 - %v.3 = load double, ptr %gep.src.16, align 8 - %r.2 = fmul double 20.0, %v.3 - %gep.dst2.else = getelementptr i8, ptr %gep.dst1.else, i64 8 - store double %r.2, ptr %gep.dst2.else, align 8 br label %loop.latch loop.latch: @@ -988,20 +1065,20 @@ define void @test_three_stores_with_different_predicates(ptr %dst, ptr %src, ptr ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP0]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4, !alias.scope [[META85:![0-9]+]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4, !alias.scope [[META92:![0-9]+]] ; CHECK-NEXT: [[TMP3:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 ; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP0]] -; CHECK-NEXT: store i32 1, ptr [[TMP5]], align 4, !alias.scope [[META88:![0-9]+]], !noalias [[META85]] +; CHECK-NEXT: store i32 1, ptr [[TMP5]], align 4, !alias.scope [[META95:![0-9]+]], !noalias [[META92]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1 ; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_STORE_IF2:.*]], label %[[PRED_STORE_CONTINUE3:.*]] ; CHECK: [[PRED_STORE_IF2]]: ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP1]] -; CHECK-NEXT: store i32 1, ptr [[TMP7]], align 4, !alias.scope [[META88]], !noalias [[META85]] +; CHECK-NEXT: store i32 1, ptr [[TMP7]], align 4, !alias.scope [[META95]], !noalias [[META92]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE3]] ; CHECK: [[PRED_STORE_CONTINUE3]]: ; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true) @@ -1012,14 +1089,14 @@ define void @test_three_stores_with_different_predicates(ptr %dst, ptr %src, ptr ; CHECK-NEXT: br i1 [[TMP12]], label %[[PRED_STORE_IF4:.*]], label %[[PRED_STORE_CONTINUE5:.*]] ; CHECK: [[PRED_STORE_IF4]]: ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP0]] -; CHECK-NEXT: store i32 2, ptr [[TMP13]], align 4, !alias.scope [[META88]], !noalias [[META85]] +; CHECK-NEXT: store i32 2, ptr [[TMP13]], align 4, !alias.scope [[META95]], !noalias [[META92]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE5]] ; CHECK: [[PRED_STORE_CONTINUE5]]: ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP11]], i32 1 ; CHECK-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7:.*]] ; CHECK: [[PRED_STORE_IF6]]: ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP1]] -; CHECK-NEXT: store i32 2, ptr [[TMP15]], align 4, !alias.scope [[META88]], !noalias [[META85]] +; CHECK-NEXT: store i32 2, ptr [[TMP15]], align 4, !alias.scope [[META95]], !noalias [[META92]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE7]] ; CHECK: [[PRED_STORE_CONTINUE7]]: ; CHECK-NEXT: [[TMP16:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 9) @@ -1028,19 +1105,19 @@ define void @test_three_stores_with_different_predicates(ptr %dst, ptr %src, ptr ; CHECK-NEXT: br i1 [[TMP18]], label %[[PRED_STORE_IF8:.*]], label %[[PRED_STORE_CONTINUE9:.*]] ; CHECK: [[PRED_STORE_IF8]]: ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP0]] -; CHECK-NEXT: store i32 3, ptr [[TMP19]], align 4, !alias.scope [[META88]], !noalias [[META85]] +; CHECK-NEXT: store i32 3, ptr [[TMP19]], align 4, !alias.scope [[META95]], !noalias [[META92]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE9]] ; CHECK: [[PRED_STORE_CONTINUE9]]: ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x i1> [[TMP17]], i32 1 ; CHECK-NEXT: br i1 [[TMP20]], label %[[PRED_STORE_IF10:.*]], label %[[PRED_STORE_CONTINUE11]] ; CHECK: [[PRED_STORE_IF10]]: ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP1]] -; CHECK-NEXT: store i32 3, ptr [[TMP21]], align 4, !alias.scope [[META88]], !noalias [[META85]] +; CHECK-NEXT: store i32 3, ptr [[TMP21]], align 4, !alias.scope [[META95]], !noalias [[META92]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE11]] ; CHECK: [[PRED_STORE_CONTINUE11]]: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 -; CHECK-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP90:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP97:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br [[EXIT:label %.*]] ; CHECK: [[SCALAR_PH]]: diff --git a/llvm/unittests/CodeGen/GlobalISel/InstructionSelectTest.cpp b/llvm/unittests/CodeGen/GlobalISel/InstructionSelectTest.cpp index 7fbccf7160e17..223798342b3ee 100644 --- a/llvm/unittests/CodeGen/GlobalISel/InstructionSelectTest.cpp +++ b/llvm/unittests/CodeGen/GlobalISel/InstructionSelectTest.cpp @@ -59,10 +59,8 @@ TEST_F(AArch64GISelMITest, TestInstructionSelectErase) { GTEST_SKIP(); legacy::PassManager PM; - std::unique_ptr TPC(TM->createPassConfig(PM)); EraseMockInstructionSelector ISel; - ISel.TPC = TPC.get(); for (auto &MI : *EntryMBB) { ISel.MIs.push_back(&MI); } diff --git a/llvm/utils/gn/secondary/lldb/source/Target/BUILD.gn b/llvm/utils/gn/secondary/lldb/source/Target/BUILD.gn index 679373d741661..ac63bbc6ee3b3 100644 --- a/llvm/utils/gn/secondary/lldb/source/Target/BUILD.gn +++ b/llvm/utils/gn/secondary/lldb/source/Target/BUILD.gn @@ -35,6 +35,7 @@ static_library("Target") { sources = [ "ABI.cpp", "AssertFrameRecognizer.cpp", + "BorrowedStackFrame.cpp", "CoreFileMemoryRanges.cpp", "DynamicRegisterInfo.cpp", "ExecutionContext.cpp", diff --git a/llvm/utils/profcheck-xfail.txt b/llvm/utils/profcheck-xfail.txt index 835025d1e319e..980f99687c4cc 100644 --- a/llvm/utils/profcheck-xfail.txt +++ b/llvm/utils/profcheck-xfail.txt @@ -17,7 +17,6 @@ CodeGen/RISCV/zmmul.ll CodeGen/WebAssembly/memory-interleave.ll CodeGen/X86/AMX/amx-low-intrinsics.ll CodeGen/X86/masked_gather_scatter.ll -CodeGen/X86/nocfivalue.ll DebugInfo/AArch64/ir-outliner.ll DebugInfo/assignment-tracking/X86/hotcoldsplit.ll DebugInfo/Generic/block-asan.ll @@ -148,9 +147,6 @@ Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-ui129tofp.ll Transforms/ExpandMemCmp/AArch64/memcmp.ll Transforms/ExpandMemCmp/X86/memcmp.ll Transforms/ExpandMemCmp/X86/memcmp-x32.ll -Transforms/ExpandVariadics/expand-va-intrinsic-split-linkage.ll -Transforms/ExpandVariadics/expand-va-intrinsic-split-simple.ll -Transforms/ExpandVariadics/intrinsics.ll Transforms/FixIrreducible/basic.ll Transforms/FixIrreducible/bug45623.ll Transforms/FixIrreducible/callbr.ll @@ -472,9 +468,6 @@ Transforms/LoopDeletion/invalidate-scev-after-hoisting.ll Transforms/LoopIdiom/AArch64/byte-compare-index.ll Transforms/LoopIdiom/AArch64/find-first-byte.ll Transforms/LoopIdiom/RISCV/byte-compare-index.ll -Transforms/LoopUnroll/peel-last-iteration-expansion-cost.ll -Transforms/LoopUnroll/peel-last-iteration-with-guards.ll -Transforms/LoopUnroll/peel-last-iteration-with-variable-trip-count.ll Transforms/LowerAtomic/atomic-load.ll Transforms/LowerAtomic/atomic-swap.ll Transforms/LowerConstantIntrinsics/builtin-object-size-phi.ll @@ -505,41 +498,15 @@ Transforms/LowerSwitch/do-not-handle-impossible-values.ll Transforms/LowerSwitch/feature.ll Transforms/LowerSwitch/fold-popular-case-to-unreachable-default.ll Transforms/LowerSwitch/pr59316.ll -Transforms/LowerTypeTests/aarch64-jumptable.ll -Transforms/LowerTypeTests/blockaddress-2.ll -Transforms/LowerTypeTests/blockaddress.ll -Transforms/LowerTypeTests/cfi-annotation.ll Transforms/LowerTypeTests/cfi-coff-comdat-rename.ll -Transforms/LowerTypeTests/cfi-direct-call1.ll -Transforms/LowerTypeTests/cfi-icall-alias.ll -Transforms/LowerTypeTests/cfi-nounwind-direct-call.ll -Transforms/LowerTypeTests/cfi-unwind-direct-call.ll -Transforms/LowerTypeTests/export-alias.ll -Transforms/LowerTypeTests/export-cross-dso-cfi.ll -Transforms/LowerTypeTests/export-icall.ll -Transforms/LowerTypeTests/export-rename-local.ll -Transforms/LowerTypeTests/export-symver.ll -Transforms/LowerTypeTests/function-arm-thumb.ll -Transforms/LowerTypeTests/function-disjoint.ll -Transforms/LowerTypeTests/function-ext.ll Transforms/LowerTypeTests/function.ll -Transforms/LowerTypeTests/function-thumb-bti.ll Transforms/LowerTypeTests/function-weak.ll -Transforms/LowerTypeTests/icall-branch-funnel.ll Transforms/LowerTypeTests/import.ll -Transforms/LowerTypeTests/nocfivalue.ll -Transforms/LowerTypeTests/pr37625.ll -Transforms/LowerTypeTests/section.ll Transforms/LowerTypeTests/simple.ll -Transforms/LowerTypeTests/x86-jumptable.ll -Transforms/MemCpyOpt/memset-memcpy-dbgloc.ll -Transforms/MemCpyOpt/memset-memcpy-redundant-memset.ll -Transforms/MemCpyOpt/opaque-ptr.ll Transforms/MergeFunc/2011-02-08-RemoveEqual.ll Transforms/MergeFunc/apply_function_attributes.ll Transforms/MergeFunc/call-and-invoke-with-ranges-attr.ll Transforms/MergeFunc/call-and-invoke-with-ranges.ll -Transforms/MergeFunc/cfi-thunk-merging.ll Transforms/MergeFunc/comdat.ll Transforms/MergeFunc/crash-cast-arrays.ll Transforms/MergeFunc/crash.ll @@ -572,10 +539,6 @@ Transforms/MergeFunc/ranges-multiple.ll Transforms/MergeFunc/self-referential-global.ll Transforms/MergeFunc/unnamed-addr-reprocessing.ll Transforms/MergeFunc/vector-GEP-crash.ll -Transforms/MergeICmps/X86/alias-merge-blocks.ll -Transforms/MergeICmps/X86/entry-block-shuffled-2.ll -Transforms/MergeICmps/X86/entry-block-shuffled.ll -Transforms/MergeICmps/X86/pr59740.ll Transforms/OpenMP/always_inline_device.ll Transforms/OpenMP/custom_state_machines.ll Transforms/OpenMP/custom_state_machines_remarks.ll diff --git a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td index 3581b07dc4e3e..16eaf28ddd95b 100644 --- a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td +++ b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td @@ -1229,15 +1229,13 @@ def AMDGPU_ScaledMFMAOp : def AMDGPU_MakeDmaBaseOp : AMDGPU_Op<"make_dma_base", [Pure, AttrSizedOperandSegments]>, - Arguments<(ins - Arg:$src, - Variadic:$src_indices, - Arg:$dst, - Variadic:$dst_indices)>, + Arguments<(ins Arg:$global, + Variadic:$global_indices, + Arg:$lds, + Variadic:$lds_indices)>, Results<(outs AMDGPU_TDMBaseType: $base)> { // TODO: - // * Add verifiers such that one of the memrefs is from LDS and the other global. // * Add verifiers to make sure that the number of indices do not exceed the number of dimensions. let summary = "Pair of based addresses used when moving tiles between LDS and global memory."; @@ -1251,7 +1249,7 @@ def AMDGPU_MakeDmaBaseOp : For example: ```mlir - %base = amdgpu.make_dma_base %src[%idx0], %dst[%idx1] : memref<8xi32>, memref<8xi32, #gpu.address_space> -> !amdgpu.tdm_base + %base = amdgpu.make_dma_base %global[%idx0, %idx1], %lds[%idx2, %idx3] : memref<64x64xi32>, memref<64x64xi32, #gpu.address_space> -> !amdgpu.tdm_base %descriptor = amdgpu.make_dma_descriptor %base globalSize [2, 2] globalStride [2, 1] sharedSize [2, 2] : !amdgpu.tdm_base -> !amdgpu.tdm_descriptor amdgpu.tensor_load_to_lds %descriptor : !amdgpu.tdm_descriptor ``` @@ -1259,27 +1257,31 @@ def AMDGPU_MakeDmaBaseOp : to ```mlir - // pseudocode - %base_0 = llvm.mlir.undef : !llvm.struct<(ptr, ptr)> - %base_1 = llvm.insertvalue %global_addr, %base_0[0] : !llvm.struct<(ptr, ptr)> - %base_2 = llvm.insertvalue %lds_addr, %base_1[1] : !llvm.struct(ptr, ptr)> - // type(%base_2) = !llvm.struct<(ptr, ptr) roughly corresponds to amdgpu.tdm_base - - // The base will be used when contructing dgroup0 - // when lowering amdgpu.make_dma_descriptor - %dgroup0_0 = llvm.mlir.undef : !llvm.struct<(....)> - %dgroup0_1 = llvm.insertvalue %base2, %dgroup0_0 : .... - - // When lowering amdgpu.tensor_load_to_lds - rocdl.tensor.load.to.lds %dgroup0, %dgroup1, %dgroup2, %dgroup3 cachepolicy 0 : vector<4xi32>, vector<8xi32> + // pseudo-code + %global_base = llvm.extractvalue %global_memref[1] + %global_address = llvm.get_element_ptr ... + + %lds_base = llvm.extractvalue %lds_memref[1] + %lds_address = llvm.get_element_ptr ... + + // Definition of %base + %undef = llvm.mlir.undef : vector<4xi32> + %v0 = llvm.insertelement %15, %undef[0] : vector<4xi32> + %v1 = llvm.insertelement %lds_address, %v0[1] : vector<4xi32> + %v2 = llvm.insertelement %global_address_low, %v1[2] : vector<4xi32> + %base = llvm.insertelement %global_address_high, %v2[3] : vector<4xi32> + + rocdl.tensor.load.to.lds %base, %dgroup1, %dgroup2, %dgroup3 cachepolicy 0 : vector<4xi32>, vector<8xi32> ``` These tensor DMA operations were introduced in gfx1250. }]; let assemblyFormat = [{ - $src `[` $src_indices `]` `,` $dst `[` $dst_indices `]` attr-dict `:` type($src) `,` type($dst) `->` type(results) + $global `[` $global_indices `]` `,` $lds `[` $lds_indices `]` attr-dict `:` type($global) `,` type($lds) `->` type(results) }]; + + let hasVerifier = 1; } def AMDGPU_MakeDmaDescriptorOp : @@ -1323,12 +1325,12 @@ def AMDGPU_MakeDmaDescriptorOp : ```mlir // Example of moving a two-dimensional tensor to LDS. - %base = amdgpu.make_dma_base %src[0, 0], %dst[0, 0] : memref<64x64xi32>, memref<64x64xi32, #gpu.address_space> -> !amdgpu.tdm_base + %base = amdgpu.make_dma_base %global[0, 0], %lds[0, 0] : memref<64x64xi32>, memref<64x64xi32, #gpu.address_space> -> !amdgpu.tdm_base %descriptor = amdgpu.make_dma_descriptor %base globalSize [64, 64] globalStride [64, 1] sharedSize [64, 64] : !amdgpu.tdm_base -> !amdgpu.tdm_descriptor amdgpu.tensor_load_to_lds %descriptor : !amdgpu.tdm_descriptor // Example of moving a two dimension tensor to LDS where padding is applied after every integer. - %base = amdgpu.make_dma_base %src[0, 0], %dst[0, 0] : memref<32x32xi32>, memref<64x64xi32, #gpu.address_space> -> !amdgpu.tdm_base + %base = amdgpu.make_dma_base %global[0, 0], %lds[0, 0] : memref<32x32xi32>, memref<64x64xi32, #gpu.address_space> -> !amdgpu.tdm_base %descriptor = amdgpu.make_dma_descriptor %base globalSize [32, 32] globalStride [32, 1] sharedSize [64, 64] padding(%pad pad_every %pad_every) : !amdgpu.tdm_base -> !amdgpu.tdm_descriptor amdgpu.tensor_load_to_lds %descriptor : !amdgpu.tdm_descriptor ``` diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td index b8317b4a1d2ec..77d1a6f8d53b5 100644 --- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td @@ -3232,6 +3232,18 @@ def OpenACC_RoutineOp : OpenACC_Op<"routine", [IsolatedFromAbove]> { OptionalAttr:$gangDimDeviceType); let extraClassDeclaration = [{ + // 'create' function to generate an 'empty' routine. + static RoutineOp create(::mlir::OpBuilder & builder, + ::mlir::Location location, + ::llvm::StringRef sym_name, + mlir::SymbolRefAttr func_name, bool implicit) { + return create(builder, location, sym_name, func_name, /*bindIDName=*/{}, + /*bindStrName=*/{}, /*bindIdNameDeviceType=*/{}, + /*bindStrnameDeviceType=*/{}, /*worker=*/{}, /*vector=*/{}, + /*seq=*/{}, /*nohost=*/false, implicit, /*gang=*/{}, + /*gangDim=*/{}, /*gangDimDeviceType=*/{}); + } + static StringRef getGangDimKeyword() { return "dim"; } /// Return true if the op has the worker attribute for the @@ -3267,6 +3279,13 @@ def OpenACC_RoutineOp : OpenACC_Op<"routine", [IsolatedFromAbove]> { std::optional<::std::variant> getBindNameValue(); std::optional<::std::variant> getBindNameValue(mlir::acc::DeviceType deviceType); + + // Add an entry to the 'seq' attribute for each additional device types. + void addSeq(MLIRContext *, llvm::ArrayRef); + // Add an entry to the 'vector' attribute for each additional device types. + void addVector(MLIRContext *, llvm::ArrayRef); + // Add an entry to the 'worker' attribute for each additional device types. + void addWorker(MLIRContext *, llvm::ArrayRef); }]; let assemblyFormat = [{ diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCTypeInterfaces.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCTypeInterfaces.td index d1bbc7f206ce6..3f11bf6fbfce3 100644 --- a/mlir/include/mlir/Dialect/OpenACC/OpenACCTypeInterfaces.td +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCTypeInterfaces.td @@ -176,6 +176,50 @@ def OpenACC_PointerLikeTypeInterface : TypeInterface<"PointerLikeType"> { return false; }] >, + InterfaceMethod< + /*description=*/[{ + Generates a load operation from the pointer-like type. This dereferences + the pointer and returns the loaded value. + + The `srcPtr` parameter is the pointer to load from. If the current type is + represented in a way that it does not capture the pointee type, `valueType` + must be passed in to provide the necessary type information. + + Returns the loaded value, or an empty Value if load generation failed. + }], + /*retTy=*/"::mlir::Value", + /*methodName=*/"genLoad", + /*args=*/(ins "::mlir::OpBuilder &":$builder, + "::mlir::Location":$loc, + "::mlir::TypedValue<::mlir::acc::PointerLikeType>":$srcPtr, + "::mlir::Type":$valueType), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + return {}; + }] + >, + InterfaceMethod< + /*description=*/[{ + Generates a store operation to the pointer-like type. This stores a value + to the memory location pointed to by the pointer. + + The `destPtr` parameter is the pointer to store to. The `valueToStore` + parameter is the value to be stored. The type information is derived from + the valueToStore parameter itself. + + Returns true if store was successfully generated, false otherwise. + }], + /*retTy=*/"bool", + /*methodName=*/"genStore", + /*args=*/(ins "::mlir::OpBuilder &":$builder, + "::mlir::Location":$loc, + "::mlir::Value":$valueToStore, + "::mlir::TypedValue<::mlir::acc::PointerLikeType>":$destPtr), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + return false; + }] + >, ]; } diff --git a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp index b9a5e7d7f6eac..2b6938712dad2 100644 --- a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp +++ b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp @@ -2264,6 +2264,77 @@ struct AMDGPUPermlaneLowering : public ConvertOpToLLVMPattern { } }; +struct AMDGPUMakeDmaBaseLowering + : public ConvertOpToLLVMPattern { + using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern; + + AMDGPUMakeDmaBaseLowering(const LLVMTypeConverter &converter, Chipset chipset) + : ConvertOpToLLVMPattern(converter), chipset(chipset) {} + Chipset chipset; + + LogicalResult + matchAndRewrite(MakeDmaBaseOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + if (chipset < kGfx1250) + return op->emitOpError("make_dma_base is only supported on gfx1250"); + + Location loc = op.getLoc(); + + ValueRange ldsIndices = adaptor.getLdsIndices(); + Value lds = adaptor.getLds(); + auto ldsMemRefType = cast(op.getLds().getType()); + + Value ldsPtr = + getStridedElementPtr(rewriter, loc, ldsMemRefType, lds, ldsIndices); + + ValueRange globalIndices = adaptor.getGlobalIndices(); + Value global = adaptor.getGlobal(); + auto globalMemRefType = cast(op.getGlobal().getType()); + + Value globalPtr = getStridedElementPtr(rewriter, loc, globalMemRefType, + global, globalIndices); + + Type i32 = rewriter.getI32Type(); + Type i64 = rewriter.getI64Type(); + + Value castForLdsAddr = LLVM::PtrToIntOp::create(rewriter, loc, i32, ldsPtr); + Value castForGlobalAddr = + LLVM::PtrToIntOp::create(rewriter, loc, i64, globalPtr); + + Value lowHalf = + LLVM::TruncOp::create(rewriter, loc, i32, castForGlobalAddr); + + Value shift = LLVM::LShrOp::create(rewriter, loc, castForGlobalAddr, + createI64Constant(rewriter, loc, 32)); + + Value highHalf = LLVM::TruncOp::create(rewriter, loc, i32, shift); + + Value mask = createI32Constant(rewriter, loc, (1ull << 25) - 1); + Value validHighHalf = LLVM::AndOp::create(rewriter, loc, highHalf, mask); + + Value typeField = createI32Constant(rewriter, loc, 2 << 30); + Value highHalfPlusType = + LLVM::OrOp::create(rewriter, loc, validHighHalf, typeField); + + Value c0 = createI32Constant(rewriter, loc, 0); + Value c1 = createI32Constant(rewriter, loc, 1); + Value c2 = createI32Constant(rewriter, loc, 2); + Value c3 = createI32Constant(rewriter, loc, 3); + + Type v4i32 = this->typeConverter->convertType(VectorType::get(4, i32)); + Value result = LLVM::PoisonOp::create(rewriter, loc, v4i32); + result = LLVM::InsertElementOp::create(rewriter, loc, result, c1, c0); + result = LLVM::InsertElementOp::create(rewriter, loc, result, + castForLdsAddr, c1); + result = LLVM::InsertElementOp::create(rewriter, loc, result, lowHalf, c2); + result = LLVM::InsertElementOp::create(rewriter, loc, result, + highHalfPlusType, c3); + + rewriter.replaceOp(op, result); + return success(); + } +}; + struct ConvertAMDGPUToROCDLPass : public impl::ConvertAMDGPUToROCDLPassBase { using Base::Base; @@ -2278,6 +2349,10 @@ struct ConvertAMDGPUToROCDLPass RewritePatternSet patterns(ctx); LLVMTypeConverter converter(ctx); + converter.addConversion([&](TDMBaseType type) -> Type { + Type i32 = IntegerType::get(type.getContext(), 32); + return converter.convertType(VectorType::get(4, i32)); + }); populateAMDGPUToROCDLConversionPatterns(converter, patterns, *maybeChipset); LLVMConversionTarget target(getContext()); target.addIllegalDialect<::mlir::amdgpu::AMDGPUDialect>(); @@ -2333,6 +2408,7 @@ void mlir::populateAMDGPUToROCDLConversionPatterns(LLVMTypeConverter &converter, ScaledExtPackedOpLowering, PackedScaledTruncOpLowering, PackedTrunc2xFp8OpLowering, PackedStochRoundFp8OpLowering, GatherToLDSOpLowering, TransposeLoadOpLowering, - AMDGPUPermlaneLowering>(converter, chipset); + AMDGPUPermlaneLowering, AMDGPUMakeDmaBaseLowering>(converter, + chipset); patterns.add(converter); } diff --git a/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp b/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp index 93cb9b38a5ecf..8b58c3b1dd182 100644 --- a/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp +++ b/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp @@ -705,6 +705,24 @@ LogicalResult TransposeLoadOp::verify() { return success(); } +//===----------------------------------------------------------------------===// +// MakeDmaBaseOp +//===----------------------------------------------------------------------===// + +LogicalResult MakeDmaBaseOp::verify() { + MemRefType ldsType = cast(getLds().getType()); + MemRefType globalType = cast(getGlobal().getType()); + if (!hasWorkgroupMemorySpace(ldsType.getMemorySpace())) { + return emitOpError( + "lds memref must have workgroup address space attribute."); + } + if (!hasGlobalMemorySpace(globalType.getMemorySpace())) { + return emitOpError( + "global memref must have global address space attribute."); + } + return success(); +} + //===----------------------------------------------------------------------===// // MakeDmaDescriptorOp //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/AMDGPU/Transforms/MaskedloadToLoad.cpp b/mlir/lib/Dialect/AMDGPU/Transforms/MaskedloadToLoad.cpp index f15c63c166e0a..89ef51f922cad 100644 --- a/mlir/lib/Dialect/AMDGPU/Transforms/MaskedloadToLoad.cpp +++ b/mlir/lib/Dialect/AMDGPU/Transforms/MaskedloadToLoad.cpp @@ -33,19 +33,18 @@ using namespace mlir::amdgpu; /// This pattern supports lowering of: `vector.maskedload` to `vector.load` /// and `arith.select` if the memref is in buffer address space. -static LogicalResult baseInBufferAddrSpace(PatternRewriter &rewriter, - vector::MaskedLoadOp maskedOp) { - auto memRefType = dyn_cast(maskedOp.getBase().getType()); +static LogicalResult hasBufferAddressSpace(Type type) { + auto memRefType = dyn_cast(type); if (!memRefType) - return rewriter.notifyMatchFailure(maskedOp, "not a memref source"); + return failure(); Attribute addrSpace = memRefType.getMemorySpace(); if (!isa_and_nonnull(addrSpace)) - return rewriter.notifyMatchFailure(maskedOp, "no address space"); + return failure(); if (dyn_cast(addrSpace).getValue() != amdgpu::AddressSpace::FatRawBuffer) - return rewriter.notifyMatchFailure(maskedOp, "not in buffer address space"); + return failure(); return success(); } @@ -83,10 +82,11 @@ struct MaskedLoadLowering final : OpRewritePattern { LogicalResult matchAndRewrite(vector::MaskedLoadOp maskedOp, PatternRewriter &rewriter) const override { if (maskedOp->hasAttr(kMaskedloadNeedsMask)) - return failure(); + return rewriter.notifyMatchFailure(maskedOp, "already rewritten"); - if (failed(baseInBufferAddrSpace(rewriter, maskedOp))) { - return failure(); + if (failed(hasBufferAddressSpace(maskedOp.getBase().getType()))) { + return rewriter.notifyMatchFailure( + maskedOp, "isn't a load from a fat buffer resource"); } // Check if this is either a full inbounds load or an empty, oob load. If @@ -176,9 +176,14 @@ struct FullMaskedLoadToConditionalLoad LogicalResult matchAndRewrite(vector::MaskedLoadOp loadOp, PatternRewriter &rewriter) const override { + if (succeeded(hasBufferAddressSpace(loadOp.getBase().getType()))) + return rewriter.notifyMatchFailure( + loadOp, "buffer loads are handled by a more specialized pattern"); + FailureOr maybeCond = matchFullMask(rewriter, loadOp.getMask()); if (failed(maybeCond)) { - return failure(); + return rewriter.notifyMatchFailure(loadOp, + "isn't loading a broadcasted scalar"); } Value cond = maybeCond.value(); @@ -203,6 +208,15 @@ struct FullMaskedStoreToConditionalStore LogicalResult matchAndRewrite(vector::MaskedStoreOp storeOp, PatternRewriter &rewriter) const override { + // A condition-free implementation of fully masked stores requires + // 1) an accessor for the num_records field on buffer resources/fat pointers + // 2) knowledge that said field will always be set accurately - that is, + // that writes to x < num_records of offset wouldn't trap, which is + // something a pattern user would need to assert or we'd need to prove. + // + // Therefore, conditional stores to buffers still go down this path at + // present. + FailureOr maybeCond = matchFullMask(rewriter, storeOp.getMask()); if (failed(maybeCond)) { return failure(); diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp index 841d1d781f1a1..7039bbe1d11ec 100644 --- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp +++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp @@ -203,12 +203,68 @@ struct MemRefPointerLikeModel return false; } + + mlir::Value genLoad(Type pointer, OpBuilder &builder, Location loc, + TypedValue srcPtr, + Type valueType) const { + // Load from a memref - only valid for scalar memrefs (rank 0). + // This is because the address computation for memrefs is part of the load + // (and not computed separately), but the API does not have arguments for + // indexing. + auto memrefValue = dyn_cast_if_present>(srcPtr); + if (!memrefValue) + return {}; + + auto memrefTy = memrefValue.getType(); + + // Only load from scalar memrefs (rank 0) + if (memrefTy.getRank() != 0) + return {}; + + return memref::LoadOp::create(builder, loc, memrefValue); + } + + bool genStore(Type pointer, OpBuilder &builder, Location loc, + Value valueToStore, TypedValue destPtr) const { + // Store to a memref - only valid for scalar memrefs (rank 0) + // This is because the address computation for memrefs is part of the store + // (and not computed separately), but the API does not have arguments for + // indexing. + auto memrefValue = dyn_cast_if_present>(destPtr); + if (!memrefValue) + return false; + + auto memrefTy = memrefValue.getType(); + + // Only store to scalar memrefs (rank 0) + if (memrefTy.getRank() != 0) + return false; + + memref::StoreOp::create(builder, loc, valueToStore, memrefValue); + return true; + } }; struct LLVMPointerPointerLikeModel : public PointerLikeType::ExternalModel { Type getElementType(Type pointer) const { return Type(); } + + mlir::Value genLoad(Type pointer, OpBuilder &builder, Location loc, + TypedValue srcPtr, + Type valueType) const { + // For LLVM pointers, we need the valueType to determine what to load + if (!valueType) + return {}; + + return LLVM::LoadOp::create(builder, loc, valueType, srcPtr); + } + + bool genStore(Type pointer, OpBuilder &builder, Location loc, + Value valueToStore, TypedValue destPtr) const { + LLVM::StoreOp::create(builder, loc, valueToStore, destPtr); + return true; + } }; struct MemrefAddressOfGlobalModel @@ -4293,6 +4349,24 @@ RoutineOp::getGangDimValue(mlir::acc::DeviceType deviceType) { return std::nullopt; } +void RoutineOp::addSeq(MLIRContext *context, + llvm::ArrayRef effectiveDeviceTypes) { + setSeqAttr(addDeviceTypeAffectedOperandHelper(context, getSeqAttr(), + effectiveDeviceTypes)); +} + +void RoutineOp::addVector(MLIRContext *context, + llvm::ArrayRef effectiveDeviceTypes) { + setVectorAttr(addDeviceTypeAffectedOperandHelper(context, getVectorAttr(), + effectiveDeviceTypes)); +} + +void RoutineOp::addWorker(MLIRContext *context, + llvm::ArrayRef effectiveDeviceTypes) { + setWorkerAttr(addDeviceTypeAffectedOperandHelper(context, getWorkerAttr(), + effectiveDeviceTypes)); +} + //===----------------------------------------------------------------------===// // InitOp //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp index f2b0e71c9397f..59a1ad9dbe189 100644 --- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp +++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp @@ -517,8 +517,7 @@ void LayoutInfoPropagation::visitPrefetchNdOp( auto [bWidth, bHeight, bCount] = blockWHC.value(); SmallVector instData; int instWidth = xegpu::getLargestDivisor( - static_cast(tdescTy.getDimSize(tdescTy.getRank() - 1)), bWidth, - bCount); + static_cast(tdescTy.getDimSize(tdescTy.getRank() - 1)), bWidth); if (instWidth == -1) prefetch.emitWarning( "No suitable instruction multiple found for the given shape."); @@ -759,8 +758,7 @@ void LayoutInfoPropagation::visitStoreNdOp( auto [bWidth, bHeight, bCount] = blockWHC.value(); SmallVector instData; int instWidth = xegpu::getLargestDivisor( - static_cast(dataTy.getDimSize(dataTy.getRank() - 1)), bWidth, - bCount); + static_cast(dataTy.getDimSize(dataTy.getRank() - 1)), bWidth); if (instWidth == -1) store.emitWarning( "No suitable instruction multiple found for the given shape."); diff --git a/mlir/test/Conversion/AMDGPUToROCDL/cvt_scale_pk-gfx1250.mlir b/mlir/test/Conversion/AMDGPUToROCDL/gfx1250.mlir similarity index 81% rename from mlir/test/Conversion/AMDGPUToROCDL/cvt_scale_pk-gfx1250.mlir rename to mlir/test/Conversion/AMDGPUToROCDL/gfx1250.mlir index d2391140ce056..27daea58f8f92 100644 --- a/mlir/test/Conversion/AMDGPUToROCDL/cvt_scale_pk-gfx1250.mlir +++ b/mlir/test/Conversion/AMDGPUToROCDL/gfx1250.mlir @@ -162,3 +162,51 @@ func.func @amdgpu.scaled_ext_packed816_invalid_dst_elem_type(%v: vector<16xf6E3M %ret0 = amdgpu.scaled_ext_packed816 %v scale(%scale) blockSize(32) firstScaleLane(0) firstScaleByte(0) : vector<16xf6E3M2FN>, vector<4xf8E8M0FNU> -> vector<16xf64> return %ret0: vector<16xf64> } + +// ----- + +#gpu_global_addrspace = 1 +#gpu_lds_addrspace = 3 +#amdgpu_fat_buffer_addrspace = 7 + +// CHECK-LABEL: func @make_dma_base +// CHECK-SAME: (%[[IDX:.+]]: index, %[[MEM:.+]]: memref<8xi32, 1>, %[[SMEM:.+]]: memref<8xi32, 3>) +func.func @make_dma_base(%idx: index, %mem: memref<8xi32, #gpu_global_addrspace>, %smem: memref<8xi32,#gpu_lds_addrspace>) -> (!amdgpu.tdm_base) { + // CHECK-DAG: %[[INT:.+]] = builtin.unrealized_conversion_cast %[[IDX]] : index to i64 + // CHECK-DAG: %[[MEMREF_DESC_MEM:.+]] = builtin.unrealized_conversion_cast %[[MEM]] : memref<8xi32, 1> + // CHECK-DAG: %[[MEMREF_DESC_SMEM:.+]] = builtin.unrealized_conversion_cast %[[SMEM]] : memref<8xi32, 3> + + // CHECK-DAG: %[[MEM_BASE_PTR:.+]] = llvm.extractvalue %[[MEMREF_DESC_MEM]][1] : !llvm.struct<(ptr<1> + // CHECK-DAG: %[[SMEM_BASE_PTR:.+]] = llvm.extractvalue %[[MEMREF_DESC_SMEM]][1] : !llvm.struct<(ptr<3> + + // CHECK-DAG: %[[MEM_BASE_OFFSET:.+]] = llvm.getelementptr %[[MEM_BASE_PTR]][%[[INT]]] + // CHECK-DAG: %[[SMEM_BASE_OFFSET:.+]] = llvm.getelementptr %[[SMEM_BASE_PTR]][%[[INT]]] + + // CHECK-DAG: %[[MEM_INT:.+]] = llvm.ptrtoint %[[MEM_BASE_OFFSET]] : !llvm.ptr<1> to i64 + // CHECK-DAG: %[[SMEM_INT:.+]] = llvm.ptrtoint %[[SMEM_BASE_OFFSET]] : !llvm.ptr<3> to i32 + + // CHECK: %[[MEM_INT_LOW:.+]] = llvm.trunc %[[MEM_INT]] : i64 to i32 + // CHECK-DAG: %[[SHIFT:.+]] = llvm.mlir.constant(32 : i64) + // CHECK: %[[SHIFTED_MEM_INT:.+]] = llvm.lshr %[[MEM_INT]], %[[SHIFT]] + // CHECK: %[[MEM_INT_HIGH:.+]] = llvm.trunc %[[SHIFTED_MEM_INT]] : i64 to i32 + // CHECK-DAG: %[[MASK:.+]] = llvm.mlir.constant(33554431 : i32) + // CHECK: %[[VALID_MEM_INT_HIGH:.+]] = llvm.and %[[MEM_INT_HIGH]], %[[MASK]] + + // CHECK-DAG: %[[TYPE_FIELD:.+]] = llvm.mlir.constant(-2147483648 : i32) + // CHECK: %[[MEM_INT_HIGH_TYPE:.+]] = llvm.or %[[VALID_MEM_INT_HIGH]], %[[TYPE_FIELD]] + + // CHECK-DAG: %[[C0:.+]] = llvm.mlir.constant(0 : i32) : i32 + // CHECK-DAG: %[[C1:.+]] = llvm.mlir.constant(1 : i32) : i32 + // CHECK-DAG: %[[C2:.+]] = llvm.mlir.constant(2 : i32) : i32 + // CHECK-DAG: %[[C3:.+]] = llvm.mlir.constant(3 : i32) : i32 + + // CHECK: %[[V4I32_0_0:.+]] = llvm.mlir.poison : vector<4xi32> + // CHECK: %[[V4I32_0_1:.+]] = llvm.insertelement %[[C1]], %[[V4I32_0_0]][%[[C0]] : i32] + // CHECK: %[[V4I32_0_2:.+]] = llvm.insertelement %[[SMEM_INT]], %[[V4I32_0_1]][%[[C1]] : i32] + // CHECK: %[[V4I32_0_3:.+]] = llvm.insertelement %[[MEM_INT_LOW]], %[[V4I32_0_2]][%[[C2]] : i32] + // CHECK: %[[V4I32_0_4:.+]] = llvm.insertelement %[[MEM_INT_HIGH_TYPE]], %[[V4I32_0_3]][%[[C3]] : i32] + + %0 = amdgpu.make_dma_base %mem[%idx], %smem[%idx] : memref<8xi32, #gpu_global_addrspace>, memref<8xi32, #gpu_lds_addrspace> -> !amdgpu.tdm_base + + func.return %0 : !amdgpu.tdm_base +} diff --git a/mlir/test/Dialect/AMDGPU/invalid.mlir b/mlir/test/Dialect/AMDGPU/invalid.mlir index 5b3a79d14cb1a..b915bfa324c77 100644 --- a/mlir/test/Dialect/AMDGPU/invalid.mlir +++ b/mlir/test/Dialect/AMDGPU/invalid.mlir @@ -357,6 +357,20 @@ func.func @scaled_mfma_invalid_k(%arg0 : vector<4xf8E8M0FNU>, %arg1 : vector<32x // ----- +func.func @make_dma_base_invalid_addressspace(%idx: index, %mem: memref<8xi32>) { + // expected-error@+1 {{'amdgpu.make_dma_base' op lds memref must have workgroup address space attribute.}} + amdgpu.make_dma_base %mem[%idx], %mem[%idx] : memref<8xi32>, memref<8xi32> -> !amdgpu.tdm_base +} + +// ----- + +func.func @make_dma_base_invalid_addressspace(%idx: index, %smem : memref<8xi32, #gpu.address_space>) { + // expected-error@+1 {{'amdgpu.make_dma_base' op global memref must have global address space attribute.}} + amdgpu.make_dma_base %smem[%idx], %smem[%idx] : memref<8xi32, #gpu.address_space>, memref<8xi32, #gpu.address_space> -> !amdgpu.tdm_base +} + +// ----- + func.func @make_dma_base_invalid_barrier(%base: !amdgpu.tdm_base, %barrier: memref<8xi32>, %idx: index) { // expected-error@+1 {{'amdgpu.make_dma_descriptor' op atomic barrier address must be in LDS.}} amdgpu.make_dma_descriptor %base globalSize [0] globalStride [1] sharedSize [0] atomicBarrier(%barrier[%idx] : memref<8xi32>) : !amdgpu.tdm_base -> !amdgpu.tdm_descriptor diff --git a/mlir/test/Dialect/AMDGPU/ops.mlir b/mlir/test/Dialect/AMDGPU/ops.mlir index 390ad8cb8c1a5..3260bd4a8df9a 100644 --- a/mlir/test/Dialect/AMDGPU/ops.mlir +++ b/mlir/test/Dialect/AMDGPU/ops.mlir @@ -691,9 +691,6 @@ func.func @memory_counter_wait() { func.func @make_dma_base(%idx: index, %mem: memref<8xi32>, %smem: memref<8xi32, #gpu.address_space>) { // CHECK: amdgpu.make_dma_base %[[MEM]][%[[IDX]]], %[[SMEM]][%[[IDX]]] : memref<8xi32>, memref<8xi32, #gpu.address_space> -> !amdgpu.tdm_base amdgpu.make_dma_base %mem[%idx], %smem[%idx] : memref<8xi32>, memref<8xi32, #gpu.address_space> -> !amdgpu.tdm_base - - // CHECK: amdgpu.make_dma_base %[[SMEM]][%[[IDX]]], %[[MEM]][%[[IDX]]] : memref<8xi32, #gpu.address_space>, memref<8xi32> -> !amdgpu.tdm_base - amdgpu.make_dma_base %smem[%idx], %mem[%idx] : memref<8xi32, #gpu.address_space>, memref<8xi32> -> !amdgpu.tdm_base func.return } @@ -748,3 +745,4 @@ func.func @make_dma_descriptor(%base: !amdgpu.tdm_base, %barrier: memref<8x func.return } + diff --git a/mlir/test/Dialect/OpenACC/pointer-like-interface-load.mlir b/mlir/test/Dialect/OpenACC/pointer-like-interface-load.mlir new file mode 100644 index 0000000000000..36df6a1d1bbe3 --- /dev/null +++ b/mlir/test/Dialect/OpenACC/pointer-like-interface-load.mlir @@ -0,0 +1,29 @@ +// RUN: mlir-opt %s --split-input-file --pass-pipeline="builtin.module(func.func(test-acc-pointer-like-interface{test-mode=load}))" 2>&1 | FileCheck %s + +func.func @test_memref_load_scalar() { + %ptr = memref.alloca() {test.ptr} : memref + // CHECK: Successfully generated load for operation: %[[PTR:.*]] = memref.alloca() {test.ptr} : memref + // CHECK: Loaded value type: f32 + // CHECK: Generated: %{{.*}} = memref.load %[[PTR]][] : memref + return +} + +// ----- + +func.func @test_memref_load_int() { + %ptr = memref.alloca() {test.ptr} : memref + // CHECK: Successfully generated load for operation: %[[PTR:.*]] = memref.alloca() {test.ptr} : memref + // CHECK: Loaded value type: i64 + // CHECK: Generated: %{{.*}} = memref.load %[[PTR]][] : memref + return +} + +// ----- + +func.func @test_memref_load_dynamic() { + %c10 = arith.constant 10 : index + %ptr = memref.alloc(%c10) {test.ptr} : memref + // CHECK: Failed to generate load for operation: %[[PTR:.*]] = memref.alloc(%{{.*}}) {test.ptr} : memref + return +} + diff --git a/mlir/test/Dialect/OpenACC/pointer-like-interface-store.mlir b/mlir/test/Dialect/OpenACC/pointer-like-interface-store.mlir new file mode 100644 index 0000000000000..0fee43102d6d9 --- /dev/null +++ b/mlir/test/Dialect/OpenACC/pointer-like-interface-store.mlir @@ -0,0 +1,39 @@ +// RUN: mlir-opt %s --split-input-file --pass-pipeline="builtin.module(func.func(test-acc-pointer-like-interface{test-mode=store}))" 2>&1 | FileCheck %s + +func.func @test_memref_store_scalar() { + %ptr = memref.alloca() {test.ptr} : memref + // CHECK: Successfully generated store for operation: %[[PTR:.*]] = memref.alloca() {test.ptr} : memref + // CHECK: Generated: %[[VAL:.*]] = arith.constant 4.200000e+01 : f32 + // CHECK: Generated: memref.store %[[VAL]], %[[PTR]][] : memref + return +} + +// ----- + +func.func @test_memref_store_int() { + %ptr = memref.alloca() {test.ptr} : memref + // CHECK: Successfully generated store for operation: %[[PTR:.*]] = memref.alloca() {test.ptr} : memref + // CHECK: Generated: %[[VAL:.*]] = arith.constant 42 : i32 + // CHECK: Generated: memref.store %[[VAL]], %[[PTR]][] : memref + return +} + +// ----- + +func.func @test_memref_store_i64() { + %ptr = memref.alloca() {test.ptr} : memref + // CHECK: Successfully generated store for operation: %[[PTR:.*]] = memref.alloca() {test.ptr} : memref + // CHECK: Generated: %[[VAL:.*]] = arith.constant 42 : i64 + // CHECK: Generated: memref.store %[[VAL]], %[[PTR]][] : memref + return +} + +// ----- + +func.func @test_memref_store_dynamic() { + %c10 = arith.constant 10 : index + %ptr = memref.alloc(%c10) {test.ptr} : memref + // CHECK: Failed to generate store for operation: %[[PTR:.*]] = memref.alloc(%{{.*}}) {test.ptr} : memref + return +} + diff --git a/mlir/test/Dialect/Vector/vector-sink.mlir b/mlir/test/Dialect/Vector/vector-sink.mlir index 577b06df42929..beaba52af1841 100644 --- a/mlir/test/Dialect/Vector/vector-sink.mlir +++ b/mlir/test/Dialect/Vector/vector-sink.mlir @@ -780,7 +780,7 @@ func.func @negative_extract_load_scalable(%arg0: memref, %arg1: index) -> } //----------------------------------------------------------------------------- -// [Pattern: StoreOpFromSplatOrBroadcast] +// [Pattern: StoreOpFromBroadcast] //----------------------------------------------------------------------------- // CHECK-LABEL: @store_splat diff --git a/mlir/test/Dialect/XeGPU/propagate-layout-inst-data.mlir b/mlir/test/Dialect/XeGPU/propagate-layout-inst-data.mlir index d911baa49acbb..32fb3178a8af2 100644 --- a/mlir/test/Dialect/XeGPU/propagate-layout-inst-data.mlir +++ b/mlir/test/Dialect/XeGPU/propagate-layout-inst-data.mlir @@ -6,6 +6,8 @@ // CHECK: %[[CST:.*]] = arith.constant dense<0.000000e+00> : vector<8x16xf32> // CHECK: %[[TDESC_SRC:.*]] = xegpu.create_nd_tdesc %[[ARG0]] : memref<8x32xf32> -> !xegpu.tensor_desc<8x32xf32, #xegpu.layout> // CHECK: %[[TDESC_DST:.*]] = xegpu.create_nd_tdesc %[[ARG1]] : memref<8x32xf32> -> !xegpu.tensor_desc<8x32xf32, #xegpu.layout> +// CHECK: xegpu.prefetch_nd %[[TDESC_SRC]] <{l1_hint = #xegpu.cache_hint, l2_hint = #xegpu.cache_hint, layout = #xegpu.layout}> : +// CHECK-SAME: !xegpu.tensor_desc<8x32xf32, #xegpu.layout> // CHECK: %[[LOADED:.*]] = xegpu.load_nd %0 <{layout = #xegpu.layout}> {layout_result_0 = #xegpu.layout} : // CHECK-SAME: !xegpu.tensor_desc<8x32xf32, #xegpu.layout> -> vector<8x32xf32> // CHECK: xegpu.store_nd %[[LOADED]], %[[TDESC_DST]] <{layout = #xegpu.layout}> : vector<8x32xf32>, !xegpu.tensor_desc<8x32xf32, #xegpu.layout> @@ -16,6 +18,7 @@ func.func @load_store_no_array_len(%arg0: memref<8x32xf32>, %arg1: memref<8x32xf %cst = arith.constant dense<0.000000e+00> : vector<8x16xf32> %0 = xegpu.create_nd_tdesc %arg0 : memref<8x32xf32> -> !xegpu.tensor_desc<8x32xf32> %1 = xegpu.create_nd_tdesc %arg1 : memref<8x32xf32> -> !xegpu.tensor_desc<8x32xf32> + xegpu.prefetch_nd %0 <{l1_hint = #xegpu.cache_hint, l2_hint = #xegpu.cache_hint}>: !xegpu.tensor_desc<8x32xf32> %2 = xegpu.load_nd %0 : !xegpu.tensor_desc<8x32xf32> -> vector<8x32xf32> xegpu.store_nd %2, %1 : vector<8x32xf32>, !xegpu.tensor_desc<8x32xf32> return diff --git a/mlir/test/lib/Dialect/OpenACC/TestPointerLikeTypeInterface.cpp b/mlir/test/lib/Dialect/OpenACC/TestPointerLikeTypeInterface.cpp index 027b0a1a8b80b..3ff0dc85b2152 100644 --- a/mlir/test/lib/Dialect/OpenACC/TestPointerLikeTypeInterface.cpp +++ b/mlir/test/lib/Dialect/OpenACC/TestPointerLikeTypeInterface.cpp @@ -46,7 +46,7 @@ struct TestPointerLikeTypeInterfacePass Pass::Option testMode{ *this, "test-mode", - llvm::cl::desc("Test mode: walk, alloc, copy, or free"), + llvm::cl::desc("Test mode: walk, alloc, copy, free, load, or store"), llvm::cl::init("walk")}; StringRef getArgument() const override { @@ -75,6 +75,10 @@ struct TestPointerLikeTypeInterfacePass void testGenCopy(Operation *srcOp, Operation *destOp, Value srcResult, Value destResult, PointerLikeType pointerType, OpBuilder &builder); + void testGenLoad(Operation *op, Value result, PointerLikeType pointerType, + OpBuilder &builder); + void testGenStore(Operation *op, Value result, PointerLikeType pointerType, + OpBuilder &builder, Value providedValue = {}); struct PointerCandidate { Operation *op; @@ -92,9 +96,12 @@ void TestPointerLikeTypeInterfacePass::runOnOperation() { auto func = getOperation(); OpBuilder builder(&getContext()); - if (testMode == "alloc" || testMode == "free") { + if (testMode == "alloc" || testMode == "free" || testMode == "load" || + testMode == "store") { // Collect all candidates first SmallVector candidates; + // For store mode, also look for a test value to use + Value testValue; func.walk([&](Operation *op) { if (op->hasAttr("test.ptr")) { for (auto result : op->getResults()) { @@ -105,6 +112,11 @@ void TestPointerLikeTypeInterfacePass::runOnOperation() { } } } + // Collect value marked with test.value for store tests + if (testMode == "store" && op->hasAttr("test.value")) { + if (op->getNumResults() > 0) + testValue = op->getResult(0); + } }); // Now test all candidates @@ -115,6 +127,12 @@ void TestPointerLikeTypeInterfacePass::runOnOperation() { else if (testMode == "free") testGenFree(candidate.op, candidate.result, candidate.pointerType, builder); + else if (testMode == "load") + testGenLoad(candidate.op, candidate.result, candidate.pointerType, + builder); + else if (testMode == "store") + testGenStore(candidate.op, candidate.result, candidate.pointerType, + builder, testValue); } } else if (testMode == "copy") { // Collect all source and destination candidates @@ -292,6 +310,105 @@ void TestPointerLikeTypeInterfacePass::testGenCopy( } } +void TestPointerLikeTypeInterfacePass::testGenLoad(Operation *op, Value result, + PointerLikeType pointerType, + OpBuilder &builder) { + Location loc = op->getLoc(); + + // Create a new builder with the listener and set insertion point + OperationTracker tracker; + OpBuilder newBuilder(op->getContext()); + newBuilder.setListener(&tracker); + newBuilder.setInsertionPointAfter(op); + + // Call the genLoad API + auto typedResult = cast>(result); + Value loadRes = pointerType.genLoad(newBuilder, loc, typedResult, Type()); + + if (loadRes) { + llvm::errs() << "Successfully generated load for operation: "; + op->print(llvm::errs()); + llvm::errs() << "\n"; + llvm::errs() << "\tLoaded value type: "; + loadRes.getType().print(llvm::errs()); + llvm::errs() << "\n"; + + // Print all operations that were inserted + for (Operation *insertedOp : tracker.insertedOps) { + llvm::errs() << "\tGenerated: "; + insertedOp->print(llvm::errs()); + llvm::errs() << "\n"; + } + } else { + llvm::errs() << "Failed to generate load for operation: "; + op->print(llvm::errs()); + llvm::errs() << "\n"; + } +} + +void TestPointerLikeTypeInterfacePass::testGenStore(Operation *op, Value result, + PointerLikeType pointerType, + OpBuilder &builder, + Value providedValue) { + Location loc = op->getLoc(); + + // Create a new builder with the listener and set insertion point + OperationTracker tracker; + OpBuilder newBuilder(op->getContext()); + newBuilder.setListener(&tracker); + newBuilder.setInsertionPointAfter(op); + + // Use provided value if available, otherwise create a constant + Value valueToStore = providedValue; + if (!valueToStore) { + // Create a test value to store - use a constant matching the element type + Type elementType = pointerType.getElementType(); + if (!elementType) { + llvm::errs() << "Failed to generate store for operation: "; + op->print(llvm::errs()); + llvm::errs() << "\n"; + return; + } + + if (elementType.isIntOrIndex()) { + auto attr = newBuilder.getIntegerAttr(elementType, 42); + valueToStore = + arith::ConstantOp::create(newBuilder, loc, elementType, attr); + } else if (auto floatType = dyn_cast(elementType)) { + auto attr = newBuilder.getFloatAttr(floatType, 42.0); + valueToStore = + arith::ConstantOp::create(newBuilder, loc, floatType, attr); + } else { + llvm::errs() << "Failed to generate store for operation: "; + op->print(llvm::errs()); + llvm::errs() << "\n"; + return; + } + } + + // Call the genStore API + auto typedResult = cast>(result); + bool success = + pointerType.genStore(newBuilder, loc, valueToStore, typedResult); + + if (success) { + llvm::errs() << "Successfully generated store for operation: "; + op->print(llvm::errs()); + llvm::errs() << "\n"; + + // Print all operations that were inserted + for (Operation *insertedOp : tracker.insertedOps) { + llvm::errs() << "\tGenerated: "; + insertedOp->print(llvm::errs()); + llvm::errs() << "\n"; + } + } else { + llvm::errs() << "Failed to generate store for operation: "; + op->print(llvm::errs()); + llvm::errs() << "\n"; + } +} + } // namespace //===----------------------------------------------------------------------===// diff --git a/revert_patches.txt b/revert_patches.txt index f4ec0a3444c46..9e465ba90ae6a 100644 --- a/revert_patches.txt +++ b/revert_patches.txt @@ -5,6 +5,3 @@ d57230c7 [AMDGPU][MC] Disallow op_sel in some VOP3P dot instructions (#100485) breaks build of ROCmValidationSuite [C2y] Support WG14 N3457, the __COUNTER__ macro (#162662) --- -breaks rocRAND -[CUDA][HIP] Fix CTAD for host/device constructors (#168711) ---- diff --git a/utils/bazel/llvm-project-overlay/lldb/source/Plugins/BUILD.bazel b/utils/bazel/llvm-project-overlay/lldb/source/Plugins/BUILD.bazel index da39e58ac70ed..7dc16674b979f 100644 --- a/utils/bazel/llvm-project-overlay/lldb/source/Plugins/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/lldb/source/Plugins/BUILD.bazel @@ -2091,6 +2091,7 @@ cc_library( "//lldb:Target", "//lldb:TargetHeaders", "//lldb:Utility", + "//llvm:Support", ], ) @@ -2142,11 +2143,14 @@ cc_library( ":PluginObjectFilePlaceholder", ":PluginProcessUtility", "//lldb:Core", + "//lldb:CoreHeaders", "//lldb:Host", "//lldb:InterpreterHeaders", + "//lldb:SymbolHeaders", "//lldb:Target", "//lldb:TargetHeaders", "//lldb:Utility", + "//llvm:Support", ], ) diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel index 1428299076fb3..8e9b51b58f4f5 100644 --- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel @@ -1243,42 +1243,12 @@ cc_library( ], ) -AnalysisFpExcSrcs = [ - "lib/Analysis/ConstantFolding.cpp", -] - -cc_library( - name = "AnalysisFpExc", - srcs = AnalysisFpExcSrcs, - hdrs = glob( - [ - "include/llvm/Analysis/*.h", - "include/llvm/Analysis/Utils/*.h", - ], - ), - copts = llvm_copts + ["-ftrapping-math"], - textual_hdrs = glob([ - "include/llvm/Analysis/*.def", - ]), - deps = [ - ":BinaryFormat", - ":Core", - ":Object", - ":ProfileData", - ":Support", - ":TargetParser", - ":config", - ":target_library_info_gen", - ], -) - cc_library( name = "Analysis", srcs = glob( [ "lib/Analysis/*.cpp", ], - exclude = AnalysisFpExcSrcs, ), hdrs = glob( [ @@ -1288,12 +1258,11 @@ cc_library( ) + [ "include/llvm-c/Analysis.h", ], - copts = llvm_copts, + copts = llvm_copts + ["-ftrapping-math"], textual_hdrs = glob([ "include/llvm/Analysis/*.def", ]), deps = [ - ":AnalysisFpExc", ":BinaryFormat", ":Core", ":FrontendHLSL",